All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] fetch —object-info-format: client option for object-info
@ 2022-02-08 23:19 Calvin Wan
  2022-02-08 23:56 ` [PATCH v2] " Calvin Wan
  2022-02-09 19:09 ` [PATCH] fetch —object-info-format: client option for object-info John Cai
  0 siblings, 2 replies; 103+ messages in thread
From: Calvin Wan @ 2022-02-08 23:19 UTC (permalink / raw)
  To: git; +Cc: Calvin Wan, Jonathan Tan

Add ‘—object-info-format’ option to fetch. This option allows
the client to make an object-info [1] command request to a server
that supports protocol v2.

The transport implementation uses vtables [2], similar to how Git
fetches refs, to determine whether a process needs to be taken over
before sending the object-info request. Different protocols
require different setups for making requests.

[1] https://lore.kernel.org/git/20210420233830.2181153-1-bga@google.com/
[2] https://lore.kernel.org/git/26f276956001a120cc9105b0071762c2fd4a45c5.1513287544.git.jonathantanmy@google.com/

Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Calvin Wan <calvinwan@google.com>

---
 builtin/fetch.c              | 80 ++++++++++++++++++++----------
 fetch-pack.c                 | 53 ++++++++++++++++++++
 fetch-pack.h                 |  7 +++
 t/t5583-fetch-object-info.sh | 95 ++++++++++++++++++++++++++++++++++++
 transport-helper.c           | 12 +++++
 transport-internal.h         |  1 +
 transport.c                  | 63 ++++++++++++++++++++++++
 transport.h                  |  1 +
 8 files changed, 286 insertions(+), 26 deletions(-)
 create mode 100755 t/t5583-fetch-object-info.sh

diff --git a/builtin/fetch.c b/builtin/fetch.c
index 5f06b21f8e..991063072b 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -29,6 +29,9 @@
 #include "commit-graph.h"
 #include "shallow.h"
 #include "worktree.h"
+#include "protocol.h"
+#include "pkt-line.h"
+#include "connect.h"
 
 #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
 
@@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
 	N_("git fetch [<options>] <group>"),
 	N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
 	N_("git fetch --all [<options>]"),
+	N_("git fetch --object-info-format=[<arguments>] <remote> [<object-ids>]"),
 	NULL
 };
 
@@ -85,6 +89,7 @@ static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
 static int fetch_write_commit_graph = -1;
 static int stdin_refspecs = 0;
 static int negotiate_only;
+static struct string_list object_info_format = STRING_LIST_INIT_NODUP;
 
 static int git_fetch_config(const char *k, const char *v, void *cb)
 {
@@ -220,6 +225,8 @@ static struct option builtin_fetch_options[] = {
 		 N_("write the commit-graph after fetching")),
 	OPT_BOOL(0, "stdin", &stdin_refspecs,
 		 N_("accept refspecs from stdin")),
+	OPT_STRING_LIST(0, "object-info-format", &object_info_format, N_("option"),
+		 N_("command request arguments")),
 	OPT_END()
 };
 
@@ -2000,6 +2007,8 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
 	struct remote *remote = NULL;
 	int result = 0;
 	int prune_tags_ok = 1;
+	struct oid_array oids = OID_ARRAY_INIT;
+	struct object_id oid;
 
 	packet_trace_identity("fetch");
 
@@ -2019,6 +2028,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
 
 	argc = parse_options(argc, argv, prefix,
 			     builtin_fetch_options, builtin_fetch_usage, 0);
+
 	if (recurse_submodules != RECURSE_SUBMODULES_OFF) {
 		int *sfjc = submodule_fetch_jobs_config == -1
 			    ? &submodule_fetch_jobs_config : NULL;
@@ -2057,34 +2067,52 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
 	if (dry_run)
 		write_fetch_head = 0;
 
-	if (all) {
-		if (argc == 1)
-			die(_("fetch --all does not take a repository argument"));
-		else if (argc > 1)
-			die(_("fetch --all does not make sense with refspecs"));
-		(void) for_each_remote(get_one_remote_for_fetch, &list);
-	} else if (argc == 0) {
-		/* No arguments -- use default remote */
-		remote = remote_get(NULL);
-	} else if (multiple) {
-		/* All arguments are assumed to be remotes or groups */
-		for (i = 0; i < argc; i++)
-			if (!add_remote_or_group(argv[i], &list))
-				die(_("no such remote or remote group: %s"),
-				    argv[i]);
-	} else {
-		/* Single remote or group */
-		(void) add_remote_or_group(argv[0], &list);
-		if (list.nr > 1) {
-			/* More than one remote */
-			if (argc > 1)
-				die(_("fetching a group and specifying refspecs does not make sense"));
+	if (object_info_format.nr > 0) {
+		if (argc == 0 || argc == 1) {
+			die(_("must supply remote and object ids when using --object-info-format"));
 		} else {
-			/* Zero or one remotes */
 			remote = remote_get(argv[0]);
-			prune_tags_ok = (argc == 1);
-			argc--;
-			argv++;
+			for (i = 1; i < argc; i++) {
+				if (get_oid(argv[i], &oid))
+					return error(_("malformed object name '%s'"), argv[i]);
+				oid_array_append(&oids, &oid);
+			}
+		}
+		gtransport = prepare_transport(remote, 0);
+		gtransport->server_options = &object_info_format;
+		result = transport_fetch_object_info(gtransport, &oids);
+
+		return result;	
+	} else {
+		if (all) {
+			if (argc == 1)
+				die(_("fetch --all does not take a repository argument"));
+			else if (argc > 1)
+				die(_("fetch --all does not make sense with refspecs"));
+			(void) for_each_remote(get_one_remote_for_fetch, &list);
+		} else if (argc == 0) {
+			/* No arguments -- use default remote */
+			remote = remote_get(NULL);
+		} else if (multiple) {
+			/* All arguments are assumed to be remotes or groups */
+			for (i = 0; i < argc; i++)
+				if (!add_remote_or_group(argv[i], &list))
+					die(_("no such remote or remote group: %s"),
+					    argv[i]);
+		} else {
+			/* Single remote or group */
+			(void) add_remote_or_group(argv[0], &list);
+			if (list.nr > 1) {
+				/* More than one remote */
+				if (argc > 1)
+					die(_("fetching a group and specifying refspecs does not make sense"));
+			} else {
+				/* Zero or one remotes */
+				remote = remote_get(argv[0]);
+				prune_tags_ok = (argc == 1);
+				argc--;
+				argv++;
+			}
 		}
 	}
 
diff --git a/fetch-pack.c b/fetch-pack.c
index dd6ec449f2..d1c5254aa8 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1265,6 +1265,59 @@ static void write_fetch_command_and_capabilities(struct strbuf *req_buf,
 	packet_buf_delim(req_buf);
 }
 
+static void write_object_info_command_and_capabilities(struct strbuf *req_buf,
+						 const struct string_list *server_options)
+{
+	const char *hash_name;
+
+	if (server_supports_v2("object-info", 1))
+		packet_buf_write(req_buf, "command=object-info");
+	if (server_supports_v2("agent", 0))
+		packet_buf_write(req_buf, "agent=%s", git_user_agent_sanitized());
+	if (advertise_sid && server_supports_v2("session-id", 0))
+		packet_buf_write(req_buf, "session-id=%s", trace2_session_id());
+	if (server_options && server_options->nr &&
+	    server_supports_v2("server-option", 1)) {
+		int i;
+		for (i = 0; i < server_options->nr; i++)
+			packet_buf_write(req_buf, "server-option=%s",
+					 server_options->items[i].string);
+	}
+
+	if (server_feature_v2("object-format", &hash_name)) {
+		int hash_algo = hash_algo_by_name(hash_name);
+		if (hash_algo_by_ptr(the_hash_algo) != hash_algo)
+			die(_("mismatched algorithms: client %s; server %s"),
+			    the_hash_algo->name, hash_name);
+		packet_buf_write(req_buf, "object-format=%s", the_hash_algo->name);
+	} else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1) {
+		die(_("the server does not support algorithm '%s'"),
+		    the_hash_algo->name);
+	}
+	packet_buf_delim(req_buf);
+}
+
+void send_object_info_request(int fd_out, struct object_info_args *args)
+{
+	struct strbuf req_buf = STRBUF_INIT;
+	int i;
+
+	write_object_info_command_and_capabilities(&req_buf, args->server_options);
+
+	if (string_list_has_string(args->server_options, "size"))
+		packet_buf_write(&req_buf, "size");
+
+	for (i = 0; i < args->oids->nr; i++) {
+		packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
+	}
+
+	packet_buf_flush(&req_buf);
+	if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
+		die_errno(_("unable to write request to remote"));
+
+	strbuf_release(&req_buf);
+}
+
 static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 			      struct fetch_pack_args *args,
 			      const struct ref *wants, struct oidset *common,
diff --git a/fetch-pack.h b/fetch-pack.h
index 7f94a2a583..2ad5ec5c64 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -68,6 +68,11 @@ struct fetch_pack_args {
 	unsigned connectivity_checked:1;
 };
 
+struct object_info_args {
+	const struct string_list *server_options;
+	const struct oid_array *oids;
+};
+
 /*
  * sought represents remote references that should be updated from.
  * On return, the names that were found on the remote will have been
@@ -101,4 +106,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
  */
 int report_unmatched_refs(struct ref **sought, int nr_sought);
 
+void send_object_info_request(int fd_out, struct object_info_args *args);
+
 #endif
diff --git a/t/t5583-fetch-object-info.sh b/t/t5583-fetch-object-info.sh
new file mode 100755
index 0000000000..93b09da3b1
--- /dev/null
+++ b/t/t5583-fetch-object-info.sh
@@ -0,0 +1,95 @@
+#!/bin/sh
+
+test_description='test git fetch object-info version 2'
+
+TEST_NO_CREATE_REPO=1
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+. ./test-lib.sh
+
+initial_directory=$pwd
+
+# Test fetch object-info with 'git://' transport
+#
+. "$TEST_DIRECTORY"/lib-git-daemon.sh
+start_git_daemon --export-all --enable=receive-pack
+daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent
+
+
+test_expect_success 'create repo to be served by git-daemon' '
+	git init "$daemon_parent" &&
+	test_commit -C "$daemon_parent" message1 a.txt
+'
+
+test_expect_success 'fetch object-info with git:// using protocol v2' '
+	test_when_finished "cd '$initial_directory'" &&
+
+	cd "$daemon_parent" &&
+
+	cat >actual <<-EOF &&
+	$(GIT_TRACE_PACKET=1 git -c protocol.version=2 fetch --object-info-format=size "$GIT_DAEMON_URL/parent" $(git rev-parse message1:a.txt))
+	EOF
+
+	cat >expect <<-EOF &&
+	$(git rev-parse message1:a.txt) $(wc -c <a.txt | xargs)
+	EOF
+
+	test_cmp expect actual
+'
+stop_git_daemon
+
+# Test protocol v2 with 'http://' transport
+#
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+test_expect_success 'create repo to be served by http:// transport' '
+	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true &&
+	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message1 a.txt
+'
+
+test_expect_success 'fetch object-info with http:// using protocol v2' '
+	test_when_finished "cd '$initial_directory'" &&
+
+	cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+	cat >actual <<-EOF &&
+	$(git -c protocol.version=2 fetch --object-info-format=size "$HTTPD_URL/smart/http_parent" $(git rev-parse message1:a.txt))
+	EOF
+
+	cat >expect <<-EOF &&
+	$(git rev-parse message1:a.txt) $(wc -c <a.txt | xargs)
+	EOF
+
+	test_cmp expect actual
+'
+
+# Test fetch object-info with 'file://' transport
+#
+
+test_expect_success 'create repo to be serbed by file:// transport' '
+	git init server &&
+	test_commit -C server message1 a.txt &&
+	git -C server config protocol.version 2
+'
+
+test_expect_success 'fetch object-info with file:// using protocol v2' '
+	test_when_finished "cd '$initial_directory'" &&
+
+	cd server &&
+
+	cat >actual <<-EOF &&
+	$(git fetch --object-info-format=size "file://$(pwd)" $(git rev-parse message1:a.txt))
+	EOF
+
+	cat >expect <<-EOF &&
+	$(git rev-parse message1:a.txt) $(wc -c <a.txt | xargs)
+	EOF
+
+	test_cmp expect actual
+'
+
+test_done
\ No newline at end of file
diff --git a/transport-helper.c b/transport-helper.c
index a0297b0986..9ecda03dde 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -671,6 +671,17 @@ static int connect_helper(struct transport *transport, const char *name,
 static struct ref *get_refs_list_using_list(struct transport *transport,
 					    int for_push);
 
+static int fetch_object_info(struct transport *transport, struct oid_array *oids)
+{
+	get_helper(transport);
+
+	if (process_connect(transport, 0)) {
+		do_take_over(transport);
+		return transport->vtable->fetch_object_info(transport, oids);
+	}
+	return -1;
+}
+
 static int fetch_refs(struct transport *transport,
 		      int nr_heads, struct ref **to_fetch)
 {
@@ -1269,6 +1280,7 @@ static struct transport_vtable vtable = {
 	.get_refs_list	= get_refs_list,
 	.fetch_refs	= fetch_refs,
 	.push_refs	= push_refs,
+	.fetch_object_info = fetch_object_info,
 	.connect	= connect_helper,
 	.disconnect	= release_helper
 };
diff --git a/transport-internal.h b/transport-internal.h
index c4ca0b733a..04fa015011 100644
--- a/transport-internal.h
+++ b/transport-internal.h
@@ -59,6 +59,7 @@ struct transport_vtable {
 	 * use. disconnect() releases these resources.
 	 **/
 	int (*disconnect)(struct transport *connection);
+	int (*fetch_object_info)(struct transport *transport, struct oid_array *oids);
 };
 
 #endif
diff --git a/transport.c b/transport.c
index 2a3e324154..0e1bf3c4cd 100644
--- a/transport.c
+++ b/transport.c
@@ -445,6 +445,62 @@ static int fetch_refs_via_pack(struct transport *transport,
 	return ret;
 }
 
+static int fetch_object_info(struct transport *transport, struct oid_array *oids)
+{
+	int ret = 0;
+	struct git_transport_data *data = transport->data;
+	struct object_info_args args;
+	struct packet_reader reader;
+
+	memset(&args, 0, sizeof(args));
+	args.server_options = transport->server_options;
+	args.oids = oids;
+
+	connect_setup(transport, 0);
+	packet_reader_init(&reader, data->fd[0], NULL, 0,
+			PACKET_READ_CHOMP_NEWLINE |
+			PACKET_READ_GENTLE_ON_EOF |
+			PACKET_READ_DIE_ON_ERR_PACKET);
+	data->version = discover_version(&reader);
+
+	if (data->version == protocol_unknown_version)
+		BUG("unknown protocol version");
+	else if (data->version <= protocol_v1)
+		die_if_server_options(transport);
+
+	switch (data->version) {
+	case protocol_v2:
+		send_object_info_request(data->fd[1], &args);
+		break;
+	case protocol_v1:
+	case protocol_v0:
+		die(_("wrong protocol version. expected v2"));
+	case protocol_unknown_version:
+		BUG("unknown protocol version");
+	}
+
+	if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
+		die(_("error reading object info header"));
+	}
+	if (strcmp(reader.line, "size")) {
+		die(_("expected 'size', received '%s'"), reader.line);
+	}
+	while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
+		printf("%s\n", reader.line);
+	}
+	
+	close(data->fd[0]);
+
+	if (data->fd[1] >= 0) {
+		close(data->fd[1]);
+	}
+	if (finish_connect(data->conn))
+		ret = -1;
+	data->conn = NULL;
+
+	return ret;
+}
+
 static int push_had_errors(struct ref *ref)
 {
 	for (; ref; ref = ref->next) {
@@ -890,6 +946,7 @@ static struct transport_vtable taken_over_vtable = {
 	.get_refs_list	= get_refs_via_connect,
 	.fetch_refs	= fetch_refs_via_pack,
 	.push_refs	= git_transport_push,
+	.fetch_object_info = fetch_object_info,
 	.disconnect	= disconnect_git
 };
 
@@ -1043,6 +1100,7 @@ static struct transport_vtable builtin_smart_vtable = {
 	.get_refs_list	= get_refs_via_connect,
 	.fetch_refs	= fetch_refs_via_pack,
 	.push_refs	= git_transport_push,
+	.fetch_object_info = fetch_object_info,
 	.connect	= connect_git,
 	.disconnect	= disconnect_git
 };
@@ -1420,6 +1478,11 @@ const struct ref *transport_get_remote_refs(struct transport *transport,
 	return transport->remote_refs;
 }
 
+int transport_fetch_object_info(struct transport *transport, struct oid_array *oids) {
+	transport->vtable->fetch_object_info(transport, oids);
+	return 1;
+}
+
 int transport_fetch_refs(struct transport *transport, struct ref *refs)
 {
 	int rc;
diff --git a/transport.h b/transport.h
index 3f16e50c19..1c807591de 100644
--- a/transport.h
+++ b/transport.h
@@ -278,6 +278,7 @@ const struct ref *transport_get_remote_refs(struct transport *transport,
  * This can only be called after fetching the remote refs.
  */
 const struct git_hash_algo *transport_get_hash_algo(struct transport *transport);
+int transport_fetch_object_info(struct transport *transport, struct oid_array *oids);
 int transport_fetch_refs(struct transport *transport, struct ref *refs);
 
 /*

base-commit: b23dac905bde28da47543484320db16312c87551
-- 
2.33.0.664.g0785eb7698


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* [PATCH v2] fetch —object-info-format: client option for object-info
  2022-02-08 23:19 [PATCH] fetch —object-info-format: client option for object-info Calvin Wan
@ 2022-02-08 23:56 ` Calvin Wan
  2022-02-09 12:48   ` Philip Oakley
                     ` (2 more replies)
  2022-02-09 19:09 ` [PATCH] fetch —object-info-format: client option for object-info John Cai
  1 sibling, 3 replies; 103+ messages in thread
From: Calvin Wan @ 2022-02-08 23:56 UTC (permalink / raw)
  To: git; +Cc: Calvin Wan, Jonathan Tan

Add ‘—object-info-format’ option to fetch. This option allows
the client to make an object-info [1] command request to a server
that supports protocol v2.

The transport implementation uses vtables [2], similar to how Git
fetches refs, to determine whether a process needs to be taken over
before sending the object-info request. Different protocols
require different setups for making requests.

[1] https://lore.kernel.org/git/20210420233830.2181153-1-bga@google.com/
[2] https://lore.kernel.org/git/26f276956001a120cc9105b0071762c2fd4a45c5.15=
13287544.git.jonathantanmy@google.com/

Helped-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Calvin Wan <calvinwan@google.com>

---
Please ignore the patch above. It was sent with a stale patch message.
 builtin/fetch.c              | 26 ++++++++++++
 fetch-pack.c                 | 53 +++++++++++++++++++++++
 fetch-pack.h                 |  7 ++++
 t/t5583-fetch-object-info.sh | 81 ++++++++++++++++++++++++++++++++++++
 transport-helper.c           | 12 ++++++
 transport-internal.h         |  1 +
 transport.c                  | 59 ++++++++++++++++++++++++++
 transport.h                  |  1 +
 8 files changed, 240 insertions(+)
 create mode 100755 t/t5583-fetch-object-info.sh

diff --git a/builtin/fetch.c b/builtin/fetch.c
index 5f06b21f8e..b48d9e93d0 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -29,6 +29,9 @@
 #include "commit-graph.h"
 #include "shallow.h"
 #include "worktree.h"
+#include "protocol.h"
+#include "pkt-line.h"
+#include "connect.h"
 
 #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
 
@@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
 	N_("git fetch [<options>] <group>"),
 	N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
 	N_("git fetch --all [<options>]"),
+	N_("git fetch --object-info-format=[<arguments>] <remote> [<object-ids>]"),
 	NULL
 };
 
@@ -85,6 +89,7 @@ static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
 static int fetch_write_commit_graph = -1;
 static int stdin_refspecs = 0;
 static int negotiate_only;
+static struct string_list object_info_format = STRING_LIST_INIT_NODUP;
 
 static int git_fetch_config(const char *k, const char *v, void *cb)
 {
@@ -220,6 +225,8 @@ static struct option builtin_fetch_options[] = {
 		 N_("write the commit-graph after fetching")),
 	OPT_BOOL(0, "stdin", &stdin_refspecs,
 		 N_("accept refspecs from stdin")),
+	OPT_STRING_LIST(0, "object-info-format", &object_info_format, N_("option"),
+		 N_("command request arguments")),
 	OPT_END()
 };
 
@@ -2000,6 +2007,8 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
 	struct remote *remote = NULL;
 	int result = 0;
 	int prune_tags_ok = 1;
+	struct oid_array oids = OID_ARRAY_INIT;
+	struct object_id oid;
 
 	packet_trace_identity("fetch");
 
@@ -2057,6 +2066,23 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
 	if (dry_run)
 		write_fetch_head = 0;
 
+	if (object_info_format.nr > 0) {
+		if (argc == 0 || argc == 1) {
+			die(_("must supply remote and object ids when using --object-info-format"));
+		} else {
+			remote = remote_get(argv[0]);
+			for (i = 1; i < argc; i++) {
+				if (get_oid(argv[i], &oid))
+					return error(_("malformed object name '%s'"), argv[i]);
+				oid_array_append(&oids, &oid);
+			}
+		}
+		gtransport = prepare_transport(remote, 0);
+		gtransport->server_options = &object_info_format;
+		result = transport_fetch_object_info(gtransport, &oids);
+		return result;
+	}
+
 	if (all) {
 		if (argc == 1)
 			die(_("fetch --all does not take a repository argument"));
diff --git a/fetch-pack.c b/fetch-pack.c
index dd6ec449f2..d1c5254aa8 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1265,6 +1265,59 @@ static void write_fetch_command_and_capabilities(struct strbuf *req_buf,
 	packet_buf_delim(req_buf);
 }
 
+static void write_object_info_command_and_capabilities(struct strbuf *req_buf,
+						 const struct string_list *server_options)
+{
+	const char *hash_name;
+
+	if (server_supports_v2("object-info", 1))
+		packet_buf_write(req_buf, "command=object-info");
+	if (server_supports_v2("agent", 0))
+		packet_buf_write(req_buf, "agent=%s", git_user_agent_sanitized());
+	if (advertise_sid && server_supports_v2("session-id", 0))
+		packet_buf_write(req_buf, "session-id=%s", trace2_session_id());
+	if (server_options && server_options->nr &&
+	    server_supports_v2("server-option", 1)) {
+		int i;
+		for (i = 0; i < server_options->nr; i++)
+			packet_buf_write(req_buf, "server-option=%s",
+					 server_options->items[i].string);
+	}
+
+	if (server_feature_v2("object-format", &hash_name)) {
+		int hash_algo = hash_algo_by_name(hash_name);
+		if (hash_algo_by_ptr(the_hash_algo) != hash_algo)
+			die(_("mismatched algorithms: client %s; server %s"),
+			    the_hash_algo->name, hash_name);
+		packet_buf_write(req_buf, "object-format=%s", the_hash_algo->name);
+	} else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1) {
+		die(_("the server does not support algorithm '%s'"),
+		    the_hash_algo->name);
+	}
+	packet_buf_delim(req_buf);
+}
+
+void send_object_info_request(int fd_out, struct object_info_args *args)
+{
+	struct strbuf req_buf = STRBUF_INIT;
+	int i;
+
+	write_object_info_command_and_capabilities(&req_buf, args->server_options);
+
+	if (string_list_has_string(args->server_options, "size"))
+		packet_buf_write(&req_buf, "size");
+
+	for (i = 0; i < args->oids->nr; i++) {
+		packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
+	}
+
+	packet_buf_flush(&req_buf);
+	if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
+		die_errno(_("unable to write request to remote"));
+
+	strbuf_release(&req_buf);
+}
+
 static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 			      struct fetch_pack_args *args,
 			      const struct ref *wants, struct oidset *common,
diff --git a/fetch-pack.h b/fetch-pack.h
index 7f94a2a583..2ad5ec5c64 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -68,6 +68,11 @@ struct fetch_pack_args {
 	unsigned connectivity_checked:1;
 };
 
+struct object_info_args {
+	const struct string_list *server_options;
+	const struct oid_array *oids;
+};
+
 /*
  * sought represents remote references that should be updated from.
  * On return, the names that were found on the remote will have been
@@ -101,4 +106,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
  */
 int report_unmatched_refs(struct ref **sought, int nr_sought);
 
+void send_object_info_request(int fd_out, struct object_info_args *args);
+
 #endif
diff --git a/t/t5583-fetch-object-info.sh b/t/t5583-fetch-object-info.sh
new file mode 100755
index 0000000000..942426b3ca
--- /dev/null
+++ b/t/t5583-fetch-object-info.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+
+test_description='test git fetch object-info version 2'
+
+TEST_NO_CREATE_REPO=1
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+. ./test-lib.sh
+
+# Test fetch object-info with 'git://' transport
+
+. "$TEST_DIRECTORY"/lib-git-daemon.sh
+start_git_daemon --export-all --enable=receive-pack
+daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent
+
+
+test_expect_success 'create repo to be served by git-daemon' '
+	git init "$daemon_parent" &&
+	test_commit -C "$daemon_parent" message1 a.txt
+'
+
+test_expect_success 'fetch object-info with git:// using protocol v2' '
+	(
+		cd "$daemon_parent" &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		length=$(wc -c <a.txt) &&
+
+		printf "%s %d\n" "$object_id" "$length" >expect &&
+		git -c protocol.version=2 fetch --object-info-format=size "$GIT_DAEMON_URL/parent" "$object_id" >actual &&
+		test_cmp expect actual
+	)
+'
+stop_git_daemon
+
+# Test protocol v2 with 'http://' transport
+#
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+  
+test_expect_success 'create repo to be served by http:// transport' '
+	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true &&
+	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message1 a.txt
+'
+
+test_expect_success 'fetch object-info with http:// using protocol v2' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		length=$(wc -c <a.txt) &&
+
+		printf "%s %d\n" "$object_id" "$length" >expect &&
+		git -c protocol.version=2 fetch --object-info-format=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual &&
+		test_cmp expect actual
+	)
+'
+
+# Test fetch object-info with 'file://' transport
+#
+
+test_expect_success 'create repo to be served by file:// transport' '
+	git init server &&
+	test_commit -C server message1 a.txt &&
+	git -C server config protocol.version 2
+'
+
+test_expect_success 'fetch object-info with file:// using protocol v2' '
+	(
+		cd server &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		length=$(wc -c <a.txt) &&
+
+		printf "%s %d\n" "$object_id" "$length" >expect &&
+		git fetch --object-info-format=size "file://$(pwd)" "$object_id" >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_done
diff --git a/transport-helper.c b/transport-helper.c
index a0297b0986..9ecda03dde 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -671,6 +671,17 @@ static int connect_helper(struct transport *transport, const char *name,
 static struct ref *get_refs_list_using_list(struct transport *transport,
 					    int for_push);
 
+static int fetch_object_info(struct transport *transport, struct oid_array *oids)
+{
+	get_helper(transport);
+
+	if (process_connect(transport, 0)) {
+		do_take_over(transport);
+		return transport->vtable->fetch_object_info(transport, oids);
+	}
+	return -1;
+}
+
 static int fetch_refs(struct transport *transport,
 		      int nr_heads, struct ref **to_fetch)
 {
@@ -1269,6 +1280,7 @@ static struct transport_vtable vtable = {
 	.get_refs_list	= get_refs_list,
 	.fetch_refs	= fetch_refs,
 	.push_refs	= push_refs,
+	.fetch_object_info = fetch_object_info,
 	.connect	= connect_helper,
 	.disconnect	= release_helper
 };
diff --git a/transport-internal.h b/transport-internal.h
index c4ca0b733a..04fa015011 100644
--- a/transport-internal.h
+++ b/transport-internal.h
@@ -59,6 +59,7 @@ struct transport_vtable {
 	 * use. disconnect() releases these resources.
 	 **/
 	int (*disconnect)(struct transport *connection);
+	int (*fetch_object_info)(struct transport *transport, struct oid_array *oids);
 };
 
 #endif
diff --git a/transport.c b/transport.c
index 2a3e324154..87cd14c99f 100644
--- a/transport.c
+++ b/transport.c
@@ -445,6 +445,59 @@ static int fetch_refs_via_pack(struct transport *transport,
 	return ret;
 }
 
+static int fetch_object_info(struct transport *transport, struct oid_array *oids)
+{
+	int ret = 0;
+	struct git_transport_data *data = transport->data;
+	struct object_info_args args;
+	struct packet_reader reader;
+
+	memset(&args, 0, sizeof(args));
+	args.server_options = transport->server_options;
+	args.oids = oids;
+
+	connect_setup(transport, 0);
+	packet_reader_init(&reader, data->fd[0], NULL, 0,
+			PACKET_READ_CHOMP_NEWLINE |
+			PACKET_READ_DIE_ON_ERR_PACKET);
+	data->version = discover_version(&reader);
+
+	if (data->version == protocol_unknown_version)
+		BUG("unknown protocol version");
+	else if (data->version <= protocol_v1)
+		die_if_server_options(transport);
+
+	switch (data->version) {
+	case protocol_v2:
+		send_object_info_request(data->fd[1], &args);
+		break;
+	case protocol_v1:
+	case protocol_v0:
+		die(_("wrong protocol version. expected v2"));
+	case protocol_unknown_version:
+		BUG("unknown protocol version");
+	}
+
+	if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
+		die(_("error reading object info header"));
+	}
+	if (strcmp(reader.line, "size")) {
+		die(_("expected 'size', received '%s'"), reader.line);
+	}
+	while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
+		printf("%s\n", reader.line);
+	}
+	check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
+	close(data->fd[0]);
+	if (data->fd[1] >= 0)
+		close(data->fd[1]);
+	if (finish_connect(data->conn))
+		ret = -1;
+	data->conn = NULL;
+
+	return ret;
+}
+
 static int push_had_errors(struct ref *ref)
 {
 	for (; ref; ref = ref->next) {
@@ -890,6 +943,7 @@ static struct transport_vtable taken_over_vtable = {
 	.get_refs_list	= get_refs_via_connect,
 	.fetch_refs	= fetch_refs_via_pack,
 	.push_refs	= git_transport_push,
+	.fetch_object_info = fetch_object_info,
 	.disconnect	= disconnect_git
 };
 
@@ -1043,6 +1097,7 @@ static struct transport_vtable builtin_smart_vtable = {
 	.get_refs_list	= get_refs_via_connect,
 	.fetch_refs	= fetch_refs_via_pack,
 	.push_refs	= git_transport_push,
+	.fetch_object_info = fetch_object_info,
 	.connect	= connect_git,
 	.disconnect	= disconnect_git
 };
@@ -1420,6 +1475,10 @@ const struct ref *transport_get_remote_refs(struct transport *transport,
 	return transport->remote_refs;
 }
 
+int transport_fetch_object_info(struct transport *transport, struct oid_array *oids) {
+	return transport->vtable->fetch_object_info(transport, oids);
+}
+
 int transport_fetch_refs(struct transport *transport, struct ref *refs)
 {
 	int rc;
diff --git a/transport.h b/transport.h
index 3f16e50c19..1c807591de 100644
--- a/transport.h
+++ b/transport.h
@@ -278,6 +278,7 @@ const struct ref *transport_get_remote_refs(struct transport *transport,
  * This can only be called after fetching the remote refs.
  */
 const struct git_hash_algo *transport_get_hash_algo(struct transport *transport);
+int transport_fetch_object_info(struct transport *transport, struct oid_array *oids);
 int transport_fetch_refs(struct transport *transport, struct ref *refs);
 
 /*

base-commit: b23dac905bde28da47543484320db16312c87551
-- 
2.33.0.664.g0785eb7698


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* Re: [PATCH v2] fetch —object-info-format: client option for object-info
  2022-02-08 23:56 ` [PATCH v2] " Calvin Wan
@ 2022-02-09 12:48   ` Philip Oakley
  2022-02-10 22:32     ` Calvin Wan
  2022-02-09 20:41   ` [PATCH v2] fetch object-info-format: " Jonathan Tan
  2022-03-28 19:11   ` [PATCH v3 0/3] object-info: add option for retrieving object info Calvin Wan
  2 siblings, 1 reply; 103+ messages in thread
From: Philip Oakley @ 2022-02-09 12:48 UTC (permalink / raw)
  To: Calvin Wan, git; +Cc: Jonathan Tan

On 08/02/2022 23:56, Calvin Wan wrote:
> Add ‘—object-info-format’ option to fetch. This option allows
> the client to make an object-info [1] command request to a server
> that supports protocol v2.
>
> The transport implementation uses vtables [2], similar to how Git
> fetches refs, to determine whether a process needs to be taken over
> before sending the object-info request. Different protocols
> require different setups for making requests.
>
> [1] https://lore.kernel.org/git/20210420233830.2181153-1-bga@google.com/
> [2] https://lore.kernel.org/git/26f276956001a120cc9105b0071762c2fd4a45c5.15=
> 13287544.git.jonathantanmy@google.com/
>
> Helped-by: Jonathan Tan <jonathantanmy@google.com>
> Signed-off-by: Calvin Wan <calvinwan@google.com>
>
> ---
> Please ignore the patch above. It was sent with a stale patch message.
>  builtin/fetch.c              | 26 ++++++++++++
>  fetch-pack.c                 | 53 +++++++++++++++++++++++
>  fetch-pack.h                 |  7 ++++
>  t/t5583-fetch-object-info.sh | 81 ++++++++++++++++++++++++++++++++++++
>  transport-helper.c           | 12 ++++++
>  transport-internal.h         |  1 +
>  transport.c                  | 59 ++++++++++++++++++++++++++
>  transport.h                  |  1 +
>  8 files changed, 240 insertions(+)
>  create mode 100755 t/t5583-fetch-object-info.sh
>
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index 5f06b21f8e..b48d9e93d0 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -29,6 +29,9 @@
>  #include "commit-graph.h"
>  #include "shallow.h"
>  #include "worktree.h"
> +#include "protocol.h"
> +#include "pkt-line.h"
> +#include "connect.h"
>  
>  #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
>  
> @@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
>  	N_("git fetch [<options>] <group>"),
>  	N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
>  	N_("git fetch --all [<options>]"),
> +	N_("git fetch --object-info-format=[<arguments>] <remote> [<object-ids>]"),
>  	NULL
>  };

Doesn't this also need a matching Documentarian update for the option?
--
Philip

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH] fetch —object-info-format: client option for object-info
  2022-02-08 23:19 [PATCH] fetch —object-info-format: client option for object-info Calvin Wan
  2022-02-08 23:56 ` [PATCH v2] " Calvin Wan
@ 2022-02-09 19:09 ` John Cai
  2022-02-10 22:49   ` Calvin Wan
  1 sibling, 1 reply; 103+ messages in thread
From: John Cai @ 2022-02-09 19:09 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, Jonathan Tan

Hi Calvin,

On 8 Feb 2022, at 18:19, Calvin Wan wrote:

> Add ‘—object-info-format’ option to fetch. This option allows
> the client to make an object-info [1] command request to a server
> that supports protocol v2.
>
> The transport implementation uses vtables [2], similar to how Git
> fetches refs, to determine whether a process needs to be taken over
> before sending the object-info request. Different protocols
> require different setups for making requests.
>
> [1] https://lore.kernel.org/git/20210420233830.2181153-1-bga@google.com/
> [2] https://lore.kernel.org/git/26f276956001a120cc9105b0071762c2fd4a45c5.1513287544.git.jonathantanmy@google.com/
>
> Helped-by: Jonathan Tan <jonathantanmy@google.com>
> Signed-off-by: Calvin Wan <calvinwan@google.com>
>
> ---
>  builtin/fetch.c              | 80 ++++++++++++++++++++----------
>  fetch-pack.c                 | 53 ++++++++++++++++++++
>  fetch-pack.h                 |  7 +++
>  t/t5583-fetch-object-info.sh | 95 ++++++++++++++++++++++++++++++++++++
>  transport-helper.c           | 12 +++++
>  transport-internal.h         |  1 +
>  transport.c                  | 63 ++++++++++++++++++++++++
>  transport.h                  |  1 +
>  8 files changed, 286 insertions(+), 26 deletions(-)
>  create mode 100755 t/t5583-fetch-object-info.sh
>
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index 5f06b21f8e..991063072b 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -29,6 +29,9 @@
>  #include "commit-graph.h"
>  #include "shallow.h"
>  #include "worktree.h"
> +#include "protocol.h"
> +#include "pkt-line.h"
> +#include "connect.h"
>
>  #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
>
> @@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
>  	N_("git fetch [<options>] <group>"),
>  	N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
>  	N_("git fetch --all [<options>]"),
> +	N_("git fetch --object-info-format=[<arguments>] <remote> [<object-ids>]"),

--object-info-format seems to be more like a command, rather than a formatting
option. When a flag ends with "format", it feels like it is specifying how to
transform the output kind of like git for-each-ref --format. So --object-info-format looks
feels like it's meant to transform the object-info output. Maybe just --object-info
and then provide a different flag for the <arguments> perhaps?

Would be interested in hearing what others think from a UI perspective.

>  	NULL
>  };
>
> @@ -85,6 +89,7 @@ static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
>  static int fetch_write_commit_graph = -1;
>  static int stdin_refspecs = 0;
>  static int negotiate_only;
> +static struct string_list object_info_format = STRING_LIST_INIT_NODUP;

should object_info_format be cleared at some point?

>
>  static int git_fetch_config(const char *k, const char *v, void *cb)
>  {
> @@ -220,6 +225,8 @@ static struct option builtin_fetch_options[] = {
>  		 N_("write the commit-graph after fetching")),
>  	OPT_BOOL(0, "stdin", &stdin_refspecs,
>  		 N_("accept refspecs from stdin")),
> +	OPT_STRING_LIST(0, "object-info-format", &object_info_format, N_("option"),
> +		 N_("command request arguments")),
>  	OPT_END()
>  };
>
> @@ -2000,6 +2007,8 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	struct remote *remote = NULL;
>  	int result = 0;
>  	int prune_tags_ok = 1;
> +	struct oid_array oids = OID_ARRAY_INIT;
> +	struct object_id oid;
>
>  	packet_trace_identity("fetch");
>
> @@ -2019,6 +2028,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>
>  	argc = parse_options(argc, argv, prefix,
>  			     builtin_fetch_options, builtin_fetch_usage, 0);
> +
>  	if (recurse_submodules != RECURSE_SUBMODULES_OFF) {
>  		int *sfjc = submodule_fetch_jobs_config == -1
>  			    ? &submodule_fetch_jobs_config : NULL;
> @@ -2057,34 +2067,52 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	if (dry_run)
>  		write_fetch_head = 0;
>
> -	if (all) {
> -		if (argc == 1)
> -			die(_("fetch --all does not take a repository argument"));
> -		else if (argc > 1)
> -			die(_("fetch --all does not make sense with refspecs"));
> -		(void) for_each_remote(get_one_remote_for_fetch, &list);
> -	} else if (argc == 0) {
> -		/* No arguments -- use default remote */
> -		remote = remote_get(NULL);
> -	} else if (multiple) {
> -		/* All arguments are assumed to be remotes or groups */
> -		for (i = 0; i < argc; i++)
> -			if (!add_remote_or_group(argv[i], &list))
> -				die(_("no such remote or remote group: %s"),
> -				    argv[i]);
> -	} else {
> -		/* Single remote or group */
> -		(void) add_remote_or_group(argv[0], &list);
> -		if (list.nr > 1) {
> -			/* More than one remote */
> -			if (argc > 1)
> -				die(_("fetching a group and specifying refspecs does not make sense"));
> +	if (object_info_format.nr > 0) {
> +		if (argc == 0 || argc == 1) {
> +			die(_("must supply remote and object ids when using --object-info-format"));
>  		} else {
> -			/* Zero or one remotes */
>  			remote = remote_get(argv[0]);
> -			prune_tags_ok = (argc == 1);
> -			argc--;
> -			argv++;
> +			for (i = 1; i < argc; i++) {
> +				if (get_oid(argv[i], &oid))
> +					return error(_("malformed object name '%s'"), argv[i]);
> +				oid_array_append(&oids, &oid);
> +			}
> +		}
> +		gtransport = prepare_transport(remote, 0);
> +		gtransport->server_options = &object_info_format;
> +		result = transport_fetch_object_info(gtransport, &oids);
> +
> +		return result;	
> +	} else {
> +		if (all) {
> +			if (argc == 1)
> +				die(_("fetch --all does not take a repository argument"));
> +			else if (argc > 1)
> +				die(_("fetch --all does not make sense with refspecs"));
> +			(void) for_each_remote(get_one_remote_for_fetch, &list);
> +		} else if (argc == 0) {
> +			/* No arguments -- use default remote */
> +			remote = remote_get(NULL);
> +		} else if (multiple) {
> +			/* All arguments are assumed to be remotes or groups */
> +			for (i = 0; i < argc; i++)
> +				if (!add_remote_or_group(argv[i], &list))
> +					die(_("no such remote or remote group: %s"),
> +					    argv[i]);
> +		} else {
> +			/* Single remote or group */
> +			(void) add_remote_or_group(argv[0], &list);
> +			if (list.nr > 1) {
> +				/* More than one remote */
> +				if (argc > 1)
> +					die(_("fetching a group and specifying refspecs does not make sense"));
> +			} else {
> +				/* Zero or one remotes */
> +				remote = remote_get(argv[0]);
> +				prune_tags_ok = (argc == 1);
> +				argc--;
> +				argv++;
> +			}
>  		}
>  	}
>
> diff --git a/fetch-pack.c b/fetch-pack.c
> index dd6ec449f2..d1c5254aa8 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -1265,6 +1265,59 @@ static void write_fetch_command_and_capabilities(struct strbuf *req_buf,
>  	packet_buf_delim(req_buf);
>  }
>
> +static void write_object_info_command_and_capabilities(struct strbuf *req_buf,
> +						 const struct string_list *server_options)
> +{
> +	const char *hash_name;
> +
> +	if (server_supports_v2("object-info", 1))
> +		packet_buf_write(req_buf, "command=object-info");
> +	if (server_supports_v2("agent", 0))
> +		packet_buf_write(req_buf, "agent=%s", git_user_agent_sanitized());
> +	if (advertise_sid && server_supports_v2("session-id", 0))
> +		packet_buf_write(req_buf, "session-id=%s", trace2_session_id());
> +	if (server_options && server_options->nr &&
> +	    server_supports_v2("server-option", 1)) {
> +		int i;
> +		for (i = 0; i < server_options->nr; i++)
> +			packet_buf_write(req_buf, "server-option=%s",
> +					 server_options->items[i].string);
> +	}
> +
> +	if (server_feature_v2("object-format", &hash_name)) {
> +		int hash_algo = hash_algo_by_name(hash_name);
> +		if (hash_algo_by_ptr(the_hash_algo) != hash_algo)
> +			die(_("mismatched algorithms: client %s; server %s"),
> +			    the_hash_algo->name, hash_name);
> +		packet_buf_write(req_buf, "object-format=%s", the_hash_algo->name);
> +	} else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1) {
> +		die(_("the server does not support algorithm '%s'"),
> +		    the_hash_algo->name);
> +	}
> +	packet_buf_delim(req_buf);
> +}
> +
> +void send_object_info_request(int fd_out, struct object_info_args *args)
> +{
> +	struct strbuf req_buf = STRBUF_INIT;
> +	int i;
> +
> +	write_object_info_command_and_capabilities(&req_buf, args->server_options);
> +
> +	if (string_list_has_string(args->server_options, "size"))
> +		packet_buf_write(&req_buf, "size");
> +
> +	for (i = 0; i < args->oids->nr; i++) {
> +		packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
> +	}
> +
> +	packet_buf_flush(&req_buf);
> +	if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
> +		die_errno(_("unable to write request to remote"));
> +
> +	strbuf_release(&req_buf);
> +}
> +
>  static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
>  			      struct fetch_pack_args *args,
>  			      const struct ref *wants, struct oidset *common,
> diff --git a/fetch-pack.h b/fetch-pack.h
> index 7f94a2a583..2ad5ec5c64 100644
> --- a/fetch-pack.h
> +++ b/fetch-pack.h
> @@ -68,6 +68,11 @@ struct fetch_pack_args {
>  	unsigned connectivity_checked:1;
>  };
>
> +struct object_info_args {
> +	const struct string_list *server_options;
> +	const struct oid_array *oids;
> +};
> +
>  /*
>   * sought represents remote references that should be updated from.
>   * On return, the names that were found on the remote will have been
> @@ -101,4 +106,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
>   */
>  int report_unmatched_refs(struct ref **sought, int nr_sought);
>
> +void send_object_info_request(int fd_out, struct object_info_args *args);
> +
>  #endif
> diff --git a/t/t5583-fetch-object-info.sh b/t/t5583-fetch-object-info.sh
> new file mode 100755
> index 0000000000..93b09da3b1
> --- /dev/null
> +++ b/t/t5583-fetch-object-info.sh
> @@ -0,0 +1,95 @@
> +#!/bin/sh
> +
> +test_description='test git fetch object-info version 2'
> +
> +TEST_NO_CREATE_REPO=1
> +
> +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
> +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
> +
> +. ./test-lib.sh
> +
> +initial_directory=$pwd
> +
> +# Test fetch object-info with 'git://' transport
> +#
> +. "$TEST_DIRECTORY"/lib-git-daemon.sh
> +start_git_daemon --export-all --enable=receive-pack
> +daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent
> +
> +
> +test_expect_success 'create repo to be served by git-daemon' '
> +	git init "$daemon_parent" &&
> +	test_commit -C "$daemon_parent" message1 a.txt
> +'
> +
> +test_expect_success 'fetch object-info with git:// using protocol v2' '
> +	test_when_finished "cd '$initial_directory'" &&
> +
> +	cd "$daemon_parent" &&
> +
> +	cat >actual <<-EOF &&
> +	$(GIT_TRACE_PACKET=1 git -c protocol.version=2 fetch --object-info-format=size "$GIT_DAEMON_URL/parent" $(git rev-parse message1:a.txt))
> +	EOF
> +
> +	cat >expect <<-EOF &&
> +	$(git rev-parse message1:a.txt) $(wc -c <a.txt | xargs)
> +	EOF
> +
> +	test_cmp expect actual
> +'
> +stop_git_daemon
> +
> +# Test protocol v2 with 'http://' transport
> +#
> +. "$TEST_DIRECTORY"/lib-httpd.sh
> +start_httpd
> +
> +test_expect_success 'create repo to be served by http:// transport' '
> +	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +	git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true &&
> +	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message1 a.txt
> +'
> +
> +test_expect_success 'fetch object-info with http:// using protocol v2' '
> +	test_when_finished "cd '$initial_directory'" &&
> +
> +	cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +
> +	cat >actual <<-EOF &&
> +	$(git -c protocol.version=2 fetch --object-info-format=size "$HTTPD_URL/smart/http_parent" $(git rev-parse message1:a.txt))
> +	EOF
> +
> +	cat >expect <<-EOF &&
> +	$(git rev-parse message1:a.txt) $(wc -c <a.txt | xargs)
> +	EOF
> +
> +	test_cmp expect actual
> +'
> +
> +# Test fetch object-info with 'file://' transport
> +#
> +
> +test_expect_success 'create repo to be serbed by file:// transport' '
> +	git init server &&
> +	test_commit -C server message1 a.txt &&
> +	git -C server config protocol.version 2
> +'
> +
> +test_expect_success 'fetch object-info with file:// using protocol v2' '
> +	test_when_finished "cd '$initial_directory'" &&
> +
> +	cd server &&
> +
> +	cat >actual <<-EOF &&
> +	$(git fetch --object-info-format=size "file://$(pwd)" $(git rev-parse message1:a.txt))
> +	EOF
> +
> +	cat >expect <<-EOF &&
> +	$(git rev-parse message1:a.txt) $(wc -c <a.txt | xargs)
> +	EOF
> +
> +	test_cmp expect actual
> +'
> +
> +test_done
> \ No newline at end of file
> diff --git a/transport-helper.c b/transport-helper.c
> index a0297b0986..9ecda03dde 100644
> --- a/transport-helper.c
> +++ b/transport-helper.c
> @@ -671,6 +671,17 @@ static int connect_helper(struct transport *transport, const char *name,
>  static struct ref *get_refs_list_using_list(struct transport *transport,
>  					    int for_push);
>
> +static int fetch_object_info(struct transport *transport, struct oid_array *oids)
> +{
> +	get_helper(transport);
> +
> +	if (process_connect(transport, 0)) {
> +		do_take_over(transport);
> +		return transport->vtable->fetch_object_info(transport, oids);
> +	}
> +	return -1;
> +}
> +
>  static int fetch_refs(struct transport *transport,
>  		      int nr_heads, struct ref **to_fetch)
>  {
> @@ -1269,6 +1280,7 @@ static struct transport_vtable vtable = {
>  	.get_refs_list	= get_refs_list,
>  	.fetch_refs	= fetch_refs,
>  	.push_refs	= push_refs,
> +	.fetch_object_info = fetch_object_info,
>  	.connect	= connect_helper,
>  	.disconnect	= release_helper
>  };
> diff --git a/transport-internal.h b/transport-internal.h
> index c4ca0b733a..04fa015011 100644
> --- a/transport-internal.h
> +++ b/transport-internal.h
> @@ -59,6 +59,7 @@ struct transport_vtable {
>  	 * use. disconnect() releases these resources.
>  	 **/
>  	int (*disconnect)(struct transport *connection);
> +	int (*fetch_object_info)(struct transport *transport, struct oid_array *oids);
>  };
>
>  #endif
> diff --git a/transport.c b/transport.c
> index 2a3e324154..0e1bf3c4cd 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -445,6 +445,62 @@ static int fetch_refs_via_pack(struct transport *transport,
>  	return ret;
>  }
>
> +static int fetch_object_info(struct transport *transport, struct oid_array *oids)
> +{
> +	int ret = 0;
> +	struct git_transport_data *data = transport->data;
> +	struct object_info_args args;
> +	struct packet_reader reader;
> +
> +	memset(&args, 0, sizeof(args));
> +	args.server_options = transport->server_options;
> +	args.oids = oids;
> +
> +	connect_setup(transport, 0);
> +	packet_reader_init(&reader, data->fd[0], NULL, 0,
> +			PACKET_READ_CHOMP_NEWLINE |
> +			PACKET_READ_GENTLE_ON_EOF |
> +			PACKET_READ_DIE_ON_ERR_PACKET);
> +	data->version = discover_version(&reader);
> +
> +	if (data->version == protocol_unknown_version)
> +		BUG("unknown protocol version");
> +	else if (data->version <= protocol_v1)
> +		die_if_server_options(transport);
> +
> +	switch (data->version) {
> +	case protocol_v2:
> +		send_object_info_request(data->fd[1], &args);
> +		break;
> +	case protocol_v1:
> +	case protocol_v0:
> +		die(_("wrong protocol version. expected v2"));
> +	case protocol_unknown_version:
> +		BUG("unknown protocol version");
> +	}
> +
> +	if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> +		die(_("error reading object info header"));
> +	}
> +	if (strcmp(reader.line, "size")) {
> +		die(_("expected 'size', received '%s'"), reader.line);
> +	}
> +	while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
> +		printf("%s\n", reader.line);
> +	}
> +	
> +	close(data->fd[0]);
> +
> +	if (data->fd[1] >= 0) {
> +		close(data->fd[1]);
> +	}
> +	if (finish_connect(data->conn))
> +		ret = -1;
> +	data->conn = NULL;
> +
> +	return ret;
> +}
> +
>  static int push_had_errors(struct ref *ref)
>  {
>  	for (; ref; ref = ref->next) {
> @@ -890,6 +946,7 @@ static struct transport_vtable taken_over_vtable = {
>  	.get_refs_list	= get_refs_via_connect,
>  	.fetch_refs	= fetch_refs_via_pack,
>  	.push_refs	= git_transport_push,
> +	.fetch_object_info = fetch_object_info,
>  	.disconnect	= disconnect_git
>  };
>
> @@ -1043,6 +1100,7 @@ static struct transport_vtable builtin_smart_vtable = {
>  	.get_refs_list	= get_refs_via_connect,
>  	.fetch_refs	= fetch_refs_via_pack,
>  	.push_refs	= git_transport_push,
> +	.fetch_object_info = fetch_object_info,
>  	.connect	= connect_git,
>  	.disconnect	= disconnect_git
>  };
> @@ -1420,6 +1478,11 @@ const struct ref *transport_get_remote_refs(struct transport *transport,
>  	return transport->remote_refs;
>  }
>
> +int transport_fetch_object_info(struct transport *transport, struct oid_array *oids) {
> +	transport->vtable->fetch_object_info(transport, oids);
> +	return 1;
> +}
> +
>  int transport_fetch_refs(struct transport *transport, struct ref *refs)
>  {
>  	int rc;
> diff --git a/transport.h b/transport.h
> index 3f16e50c19..1c807591de 100644
> --- a/transport.h
> +++ b/transport.h
> @@ -278,6 +278,7 @@ const struct ref *transport_get_remote_refs(struct transport *transport,
>   * This can only be called after fetching the remote refs.
>   */
>  const struct git_hash_algo *transport_get_hash_algo(struct transport *transport);
> +int transport_fetch_object_info(struct transport *transport, struct oid_array *oids);
>  int transport_fetch_refs(struct transport *transport, struct ref *refs);
>
>  /*
>
> base-commit: b23dac905bde28da47543484320db16312c87551
> -- 
> 2.33.0.664.g0785eb7698

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v2] fetch object-info-format: client option for object-info
  2022-02-08 23:56 ` [PATCH v2] " Calvin Wan
  2022-02-09 12:48   ` Philip Oakley
@ 2022-02-09 20:41   ` Jonathan Tan
  2022-02-10 22:58     ` Calvin Wan
  2022-03-28 19:11   ` [PATCH v3 0/3] object-info: add option for retrieving object info Calvin Wan
  2 siblings, 1 reply; 103+ messages in thread
From: Jonathan Tan @ 2022-02-09 20:41 UTC (permalink / raw)
  To: Calvin Wan; +Cc: Jonathan Tan, git

Calvin Wan <calvinwan@google.com> writes:
> Add ‘—object-info-format’ option to fetch. This option allows
> the client to make an object-info [1] command request to a server
> that supports protocol v2.

Avoid using characters above 127 in commit messages (unless, say, as part of
someone's name). They make it hard to search for, and in this case, whatever
dash is there is wrong (it should be "--").

> The transport implementation uses vtables [2], similar to how Git
> fetches refs,

You should be explicit that you're adding a new function to the vtable.
(Whether that is what we should do is another issue: let me look at the
patch to see.)

> to determine whether a process needs to be taken over
> before sending the object-info request. 

The vtable is not to determine whether a process needs to be taken over,
but so that we support multiple protocols (HTTP, SSH, etc.). In any
case, this detail is probably not relevant.

> Different protocols
> require different setups for making requests.

This is true, but I don't see the relevance.

> [1] https://lore.kernel.org/git/20210420233830.2181153-1-bga@google.com/
> [2] https://lore.kernel.org/git/26f276956001a120cc9105b0071762c2fd4a45c5.15=
> 13287544.git.jonathantanmy@google.com/

For merged code, quote the commit, not the email.

> @@ -220,6 +225,8 @@ static struct option builtin_fetch_options[] = {
>  		 N_("write the commit-graph after fetching")),
>  	OPT_BOOL(0, "stdin", &stdin_refspecs,
>  		 N_("accept refspecs from stdin")),
> +	OPT_STRING_LIST(0, "object-info-format", &object_info_format, N_("option"),
> +		 N_("command request arguments")),

I would have expected a parameter named "format" to take a format
string, but taking a string list of the fields we need might work too.
In any case, maybe rename it to "--object-info" or similar.

> @@ -2000,6 +2007,8 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	struct remote *remote = NULL;
>  	int result = 0;
>  	int prune_tags_ok = 1;
> +	struct oid_array oids = OID_ARRAY_INIT;
> +	struct object_id oid;

The "oids" at function level needs a more descriptive name (e.g.
"oids_for_object_info"). The name of "oid" is fine, since it's just used
as a temporary variable, but since it is temporary, it should be
declared in the block where it's used. (Same for "oids", actually:
declare it in the block it's used, and in that case you can keep the
name since it's more tightly scoped.)

> @@ -2057,6 +2066,23 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	if (dry_run)
>  		write_fetch_head = 0;
>  
> +	if (object_info_format.nr > 0) {
> +		if (argc == 0 || argc == 1) {
> +			die(_("must supply remote and object ids when using --object-info-format"));
> +		} else {
> +			remote = remote_get(argv[0]);
> +			for (i = 1; i < argc; i++) {
> +				if (get_oid(argv[i], &oid))
> +					return error(_("malformed object name '%s'"), argv[i]);
> +				oid_array_append(&oids, &oid);
> +			}
> +		}
> +		gtransport = prepare_transport(remote, 0);
> +		gtransport->server_options = &object_info_format;
> +		result = transport_fetch_object_info(gtransport, &oids);
> +		return result;
> +	}

I was thinking that this should reuse the refspec parsing mechanism
(which also supports stdin), but upon more thought, using the refspec
parser means that we would also need to check that all refspecs are
exact OIDs (because we wouldn't know what to do with them otherwise).
OK, parsing the objects by ourselves looks reasonable.

The assignment of object_info_format to server_options is probably not a
good idea, though, since readers of server_options would expect server
options, not what you're assigning. The best place to put this
information is in smart_options. (See the negotiate_only code.)

> +static void write_object_info_command_and_capabilities(struct strbuf *req_buf,
> +						 const struct string_list *server_options)
> +{

[snip contents]

This code is very similar to code in fetch-pack.c. If you stick to
crafting the request in builtin/fetch.c, you should refactor
fetch-pack.{c,h} to expose this functionality (in a preparatory commit)
and then use that function from here.

> +void send_object_info_request(int fd_out, struct object_info_args *args)
> +{
> +	struct strbuf req_buf = STRBUF_INIT;
> +	int i;
> +
> +	write_object_info_command_and_capabilities(&req_buf, args->server_options);
> +
> +	if (string_list_has_string(args->server_options, "size"))
> +		packet_buf_write(&req_buf, "size");

What happens if "size" is not in the list?

> +		printf "%s %d\n" "$object_id" "$length" >expect &&

You can just write "echo $object_id $length >expect". Also, test the
following:
 - more than one OID
 - an OID that's not on the remote
 - a malformed OID
 - a server that doesn't support protocol v2
 - a server that supports protocol v2 but not object-format

(You don't have to do this for all protocols; just pick one. I prefer
HTTP, since that's the most complex.)

Other than that, the tests look good. Thanks for testing the different
protocols.

> @@ -1269,6 +1280,7 @@ static struct transport_vtable vtable = {
>  	.get_refs_list	= get_refs_list,
>  	.fetch_refs	= fetch_refs,
>  	.push_refs	= push_refs,
> +	.fetch_object_info = fetch_object_info,
>  	.connect	= connect_helper,
>  	.disconnect	= release_helper
>  };

Adding a function to the transport vtable is not so disruptive since we
don't have many transport vtables, but better if we can avoid this
disruption. In this case, I think it's better to reuse fetch_refs.
Mainly, the plumbing from transport_fetch_refs() to all the helper
functions in fetch-pack.c already exists, so reusing fetch_refs would
allow us to reuse that plumbing.

This also means that we don't have to expose the protocol functionality
in fetch-pack.c that you copied over to builtin/fetch.c in this patch,
which is an added bonus.

> +static int fetch_object_info(struct transport *transport, struct oid_array *oids)
> +{

[snip contents]

And reusing the plumbing might mean that we don't need this function
too.

Taking a step back, there also needs to be a fallback mechanism for when
the server doesn't support object-info.

If you generally agree with my review comments, I would say that your
next steps are:
 - investigate if we can reuse the transport_fetch_pack -> fetch-pack.c
   machinery
 - make a fallback for when the server doesn't support object-info
   (might be easier when we use the machinery, so I would start with
   that first)

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v2] fetch —object-info-format: client option for object-info
  2022-02-09 12:48   ` Philip Oakley
@ 2022-02-10 22:32     ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-02-10 22:32 UTC (permalink / raw)
  To: Philip Oakley, git; +Cc: Jonathan Tan

Yes it does, thank you!


On Wed, Feb 9, 2022 at 4:48 AM Philip Oakley <philipoakley@iee.email> wrote:
>
> On 08/02/2022 23:56, Calvin Wan wrote:
> > Add ‘—object-info-format’ option to fetch. This option allows
> > the client to make an object-info [1] command request to a server
> > that supports protocol v2.
> >
> > The transport implementation uses vtables [2], similar to how Git
> > fetches refs, to determine whether a process needs to be taken over
> > before sending the object-info request. Different protocols
> > require different setups for making requests.
> >
> > [1] https://lore.kernel.org/git/20210420233830.2181153-1-bga@google.com/
> > [2] https://lore.kernel.org/git/26f276956001a120cc9105b0071762c2fd4a45c5.15=
> > 13287544.git.jonathantanmy@google.com/
> >
> > Helped-by: Jonathan Tan <jonathantanmy@google.com>
> > Signed-off-by: Calvin Wan <calvinwan@google.com>
> >
> > ---
> > Please ignore the patch above. It was sent with a stale patch message.
> >  builtin/fetch.c              | 26 ++++++++++++
> >  fetch-pack.c                 | 53 +++++++++++++++++++++++
> >  fetch-pack.h                 |  7 ++++
> >  t/t5583-fetch-object-info.sh | 81 ++++++++++++++++++++++++++++++++++++
> >  transport-helper.c           | 12 ++++++
> >  transport-internal.h         |  1 +
> >  transport.c                  | 59 ++++++++++++++++++++++++++
> >  transport.h                  |  1 +
> >  8 files changed, 240 insertions(+)
> >  create mode 100755 t/t5583-fetch-object-info.sh
> >
> > diff --git a/builtin/fetch.c b/builtin/fetch.c
> > index 5f06b21f8e..b48d9e93d0 100644
> > --- a/builtin/fetch.c
> > +++ b/builtin/fetch.c
> > @@ -29,6 +29,9 @@
> >  #include "commit-graph.h"
> >  #include "shallow.h"
> >  #include "worktree.h"
> > +#include "protocol.h"
> > +#include "pkt-line.h"
> > +#include "connect.h"
> >
> >  #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
> >
> > @@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
> >       N_("git fetch [<options>] <group>"),
> >       N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
> >       N_("git fetch --all [<options>]"),
> > +     N_("git fetch --object-info-format=[<arguments>] <remote> [<object-ids>]"),
> >       NULL
> >  };
>
> Doesn't this also need a matching Documentarian update for the option?
> --
> Philip

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH] fetch —object-info-format: client option for object-info
  2022-02-09 19:09 ` [PATCH] fetch —object-info-format: client option for object-info John Cai
@ 2022-02-10 22:49   ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-02-10 22:49 UTC (permalink / raw)
  To: John Cai, git; +Cc: Jonathan Tan

> Maybe just --object-info and then provide a different flag for the <arguments> perhaps?
I agree. --object-info is a better name for the option. And with
<arguments>, there is also probably a better name -- will look into
the codebase and see what other options do.

> should object_info_format be cleared at some point?
I don't see why it needs to be cleared. The command only uses this
variable once in object-info.

Thank you for your input!

On Wed, Feb 9, 2022 at 11:09 AM John Cai <johncai86@gmail.com> wrote:
>
> Hi Calvin,
>
> On 8 Feb 2022, at 18:19, Calvin Wan wrote:
>
> > Add ‘—object-info-format’ option to fetch. This option allows
> > the client to make an object-info [1] command request to a server
> > that supports protocol v2.
> >
> > The transport implementation uses vtables [2], similar to how Git
> > fetches refs, to determine whether a process needs to be taken over
> > before sending the object-info request. Different protocols
> > require different setups for making requests.
> >
> > [1] https://lore.kernel.org/git/20210420233830.2181153-1-bga@google.com/
> > [2] https://lore.kernel.org/git/26f276956001a120cc9105b0071762c2fd4a45c5.1513287544.git.jonathantanmy@google.com/
> >
> > Helped-by: Jonathan Tan <jonathantanmy@google.com>
> > Signed-off-by: Calvin Wan <calvinwan@google.com>
> >
> > ---
> >  builtin/fetch.c              | 80 ++++++++++++++++++++----------
> >  fetch-pack.c                 | 53 ++++++++++++++++++++
> >  fetch-pack.h                 |  7 +++
> >  t/t5583-fetch-object-info.sh | 95 ++++++++++++++++++++++++++++++++++++
> >  transport-helper.c           | 12 +++++
> >  transport-internal.h         |  1 +
> >  transport.c                  | 63 ++++++++++++++++++++++++
> >  transport.h                  |  1 +
> >  8 files changed, 286 insertions(+), 26 deletions(-)
> >  create mode 100755 t/t5583-fetch-object-info.sh
> >
> > diff --git a/builtin/fetch.c b/builtin/fetch.c
> > index 5f06b21f8e..991063072b 100644
> > --- a/builtin/fetch.c
> > +++ b/builtin/fetch.c
> > @@ -29,6 +29,9 @@
> >  #include "commit-graph.h"
> >  #include "shallow.h"
> >  #include "worktree.h"
> > +#include "protocol.h"
> > +#include "pkt-line.h"
> > +#include "connect.h"
> >
> >  #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
> >
> > @@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
> >       N_("git fetch [<options>] <group>"),
> >       N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
> >       N_("git fetch --all [<options>]"),
> > +     N_("git fetch --object-info-format=[<arguments>] <remote> [<object-ids>]"),
>
> --object-info-format seems to be more like a command, rather than a formatting
> option. When a flag ends with "format", it feels like it is specifying how to
> transform the output kind of like git for-each-ref --format. So --object-info-format looks
> feels like it's meant to transform the object-info output. Maybe just --object-info
> and then provide a different flag for the <arguments> perhaps?
>
> Would be interested in hearing what others think from a UI perspective.
>
> >       NULL
> >  };
> >
> > @@ -85,6 +89,7 @@ static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
> >  static int fetch_write_commit_graph = -1;
> >  static int stdin_refspecs = 0;
> >  static int negotiate_only;
> > +static struct string_list object_info_format = STRING_LIST_INIT_NODUP;
>
> should object_info_format be cleared at some point?
>
> >
> >  static int git_fetch_config(const char *k, const char *v, void *cb)
> >  {
> > @@ -220,6 +225,8 @@ static struct option builtin_fetch_options[] = {
> >                N_("write the commit-graph after fetching")),
> >       OPT_BOOL(0, "stdin", &stdin_refspecs,
> >                N_("accept refspecs from stdin")),
> > +     OPT_STRING_LIST(0, "object-info-format", &object_info_format, N_("option"),
> > +              N_("command request arguments")),
> >       OPT_END()
> >  };
> >
> > @@ -2000,6 +2007,8 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >       struct remote *remote = NULL;
> >       int result = 0;
> >       int prune_tags_ok = 1;
> > +     struct oid_array oids = OID_ARRAY_INIT;
> > +     struct object_id oid;
> >
> >       packet_trace_identity("fetch");
> >
> > @@ -2019,6 +2028,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >
> >       argc = parse_options(argc, argv, prefix,
> >                            builtin_fetch_options, builtin_fetch_usage, 0);
> > +
> >       if (recurse_submodules != RECURSE_SUBMODULES_OFF) {
> >               int *sfjc = submodule_fetch_jobs_config == -1
> >                           ? &submodule_fetch_jobs_config : NULL;
> > @@ -2057,34 +2067,52 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >       if (dry_run)
> >               write_fetch_head = 0;
> >
> > -     if (all) {
> > -             if (argc == 1)
> > -                     die(_("fetch --all does not take a repository argument"));
> > -             else if (argc > 1)
> > -                     die(_("fetch --all does not make sense with refspecs"));
> > -             (void) for_each_remote(get_one_remote_for_fetch, &list);
> > -     } else if (argc == 0) {
> > -             /* No arguments -- use default remote */
> > -             remote = remote_get(NULL);
> > -     } else if (multiple) {
> > -             /* All arguments are assumed to be remotes or groups */
> > -             for (i = 0; i < argc; i++)
> > -                     if (!add_remote_or_group(argv[i], &list))
> > -                             die(_("no such remote or remote group: %s"),
> > -                                 argv[i]);
> > -     } else {
> > -             /* Single remote or group */
> > -             (void) add_remote_or_group(argv[0], &list);
> > -             if (list.nr > 1) {
> > -                     /* More than one remote */
> > -                     if (argc > 1)
> > -                             die(_("fetching a group and specifying refspecs does not make sense"));
> > +     if (object_info_format.nr > 0) {
> > +             if (argc == 0 || argc == 1) {
> > +                     die(_("must supply remote and object ids when using --object-info-format"));
> >               } else {
> > -                     /* Zero or one remotes */
> >                       remote = remote_get(argv[0]);
> > -                     prune_tags_ok = (argc == 1);
> > -                     argc--;
> > -                     argv++;
> > +                     for (i = 1; i < argc; i++) {
> > +                             if (get_oid(argv[i], &oid))
> > +                                     return error(_("malformed object name '%s'"), argv[i]);
> > +                             oid_array_append(&oids, &oid);
> > +                     }
> > +             }
> > +             gtransport = prepare_transport(remote, 0);
> > +             gtransport->server_options = &object_info_format;
> > +             result = transport_fetch_object_info(gtransport, &oids);
> > +
> > +             return result;
> > +     } else {
> > +             if (all) {
> > +                     if (argc == 1)
> > +                             die(_("fetch --all does not take a repository argument"));
> > +                     else if (argc > 1)
> > +                             die(_("fetch --all does not make sense with refspecs"));
> > +                     (void) for_each_remote(get_one_remote_for_fetch, &list);
> > +             } else if (argc == 0) {
> > +                     /* No arguments -- use default remote */
> > +                     remote = remote_get(NULL);
> > +             } else if (multiple) {
> > +                     /* All arguments are assumed to be remotes or groups */
> > +                     for (i = 0; i < argc; i++)
> > +                             if (!add_remote_or_group(argv[i], &list))
> > +                                     die(_("no such remote or remote group: %s"),
> > +                                         argv[i]);
> > +             } else {
> > +                     /* Single remote or group */
> > +                     (void) add_remote_or_group(argv[0], &list);
> > +                     if (list.nr > 1) {
> > +                             /* More than one remote */
> > +                             if (argc > 1)
> > +                                     die(_("fetching a group and specifying refspecs does not make sense"));
> > +                     } else {
> > +                             /* Zero or one remotes */
> > +                             remote = remote_get(argv[0]);
> > +                             prune_tags_ok = (argc == 1);
> > +                             argc--;
> > +                             argv++;
> > +                     }
> >               }
> >       }
> >
> > diff --git a/fetch-pack.c b/fetch-pack.c
> > index dd6ec449f2..d1c5254aa8 100644
> > --- a/fetch-pack.c
> > +++ b/fetch-pack.c
> > @@ -1265,6 +1265,59 @@ static void write_fetch_command_and_capabilities(struct strbuf *req_buf,
> >       packet_buf_delim(req_buf);
> >  }
> >
> > +static void write_object_info_command_and_capabilities(struct strbuf *req_buf,
> > +                                              const struct string_list *server_options)
> > +{
> > +     const char *hash_name;
> > +
> > +     if (server_supports_v2("object-info", 1))
> > +             packet_buf_write(req_buf, "command=object-info");
> > +     if (server_supports_v2("agent", 0))
> > +             packet_buf_write(req_buf, "agent=%s", git_user_agent_sanitized());
> > +     if (advertise_sid && server_supports_v2("session-id", 0))
> > +             packet_buf_write(req_buf, "session-id=%s", trace2_session_id());
> > +     if (server_options && server_options->nr &&
> > +         server_supports_v2("server-option", 1)) {
> > +             int i;
> > +             for (i = 0; i < server_options->nr; i++)
> > +                     packet_buf_write(req_buf, "server-option=%s",
> > +                                      server_options->items[i].string);
> > +     }
> > +
> > +     if (server_feature_v2("object-format", &hash_name)) {
> > +             int hash_algo = hash_algo_by_name(hash_name);
> > +             if (hash_algo_by_ptr(the_hash_algo) != hash_algo)
> > +                     die(_("mismatched algorithms: client %s; server %s"),
> > +                         the_hash_algo->name, hash_name);
> > +             packet_buf_write(req_buf, "object-format=%s", the_hash_algo->name);
> > +     } else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1) {
> > +             die(_("the server does not support algorithm '%s'"),
> > +                 the_hash_algo->name);
> > +     }
> > +     packet_buf_delim(req_buf);
> > +}
> > +
> > +void send_object_info_request(int fd_out, struct object_info_args *args)
> > +{
> > +     struct strbuf req_buf = STRBUF_INIT;
> > +     int i;
> > +
> > +     write_object_info_command_and_capabilities(&req_buf, args->server_options);
> > +
> > +     if (string_list_has_string(args->server_options, "size"))
> > +             packet_buf_write(&req_buf, "size");
> > +
> > +     for (i = 0; i < args->oids->nr; i++) {
> > +             packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
> > +     }
> > +
> > +     packet_buf_flush(&req_buf);
> > +     if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
> > +             die_errno(_("unable to write request to remote"));
> > +
> > +     strbuf_release(&req_buf);
> > +}
> > +
> >  static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
> >                             struct fetch_pack_args *args,
> >                             const struct ref *wants, struct oidset *common,
> > diff --git a/fetch-pack.h b/fetch-pack.h
> > index 7f94a2a583..2ad5ec5c64 100644
> > --- a/fetch-pack.h
> > +++ b/fetch-pack.h
> > @@ -68,6 +68,11 @@ struct fetch_pack_args {
> >       unsigned connectivity_checked:1;
> >  };
> >
> > +struct object_info_args {
> > +     const struct string_list *server_options;
> > +     const struct oid_array *oids;
> > +};
> > +
> >  /*
> >   * sought represents remote references that should be updated from.
> >   * On return, the names that were found on the remote will have been
> > @@ -101,4 +106,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
> >   */
> >  int report_unmatched_refs(struct ref **sought, int nr_sought);
> >
> > +void send_object_info_request(int fd_out, struct object_info_args *args);
> > +
> >  #endif
> > diff --git a/t/t5583-fetch-object-info.sh b/t/t5583-fetch-object-info.sh
> > new file mode 100755
> > index 0000000000..93b09da3b1
> > --- /dev/null
> > +++ b/t/t5583-fetch-object-info.sh
> > @@ -0,0 +1,95 @@
> > +#!/bin/sh
> > +
> > +test_description='test git fetch object-info version 2'
> > +
> > +TEST_NO_CREATE_REPO=1
> > +
> > +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
> > +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
> > +
> > +. ./test-lib.sh
> > +
> > +initial_directory=$pwd
> > +
> > +# Test fetch object-info with 'git://' transport
> > +#
> > +. "$TEST_DIRECTORY"/lib-git-daemon.sh
> > +start_git_daemon --export-all --enable=receive-pack
> > +daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent
> > +
> > +
> > +test_expect_success 'create repo to be served by git-daemon' '
> > +     git init "$daemon_parent" &&
> > +     test_commit -C "$daemon_parent" message1 a.txt
> > +'
> > +
> > +test_expect_success 'fetch object-info with git:// using protocol v2' '
> > +     test_when_finished "cd '$initial_directory'" &&
> > +
> > +     cd "$daemon_parent" &&
> > +
> > +     cat >actual <<-EOF &&
> > +     $(GIT_TRACE_PACKET=1 git -c protocol.version=2 fetch --object-info-format=size "$GIT_DAEMON_URL/parent" $(git rev-parse message1:a.txt))
> > +     EOF
> > +
> > +     cat >expect <<-EOF &&
> > +     $(git rev-parse message1:a.txt) $(wc -c <a.txt | xargs)
> > +     EOF
> > +
> > +     test_cmp expect actual
> > +'
> > +stop_git_daemon
> > +
> > +# Test protocol v2 with 'http://' transport
> > +#
> > +. "$TEST_DIRECTORY"/lib-httpd.sh
> > +start_httpd
> > +
> > +test_expect_success 'create repo to be served by http:// transport' '
> > +     git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> > +     git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true &&
> > +     test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message1 a.txt
> > +'
> > +
> > +test_expect_success 'fetch object-info with http:// using protocol v2' '
> > +     test_when_finished "cd '$initial_directory'" &&
> > +
> > +     cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> > +
> > +     cat >actual <<-EOF &&
> > +     $(git -c protocol.version=2 fetch --object-info-format=size "$HTTPD_URL/smart/http_parent" $(git rev-parse message1:a.txt))
> > +     EOF
> > +
> > +     cat >expect <<-EOF &&
> > +     $(git rev-parse message1:a.txt) $(wc -c <a.txt | xargs)
> > +     EOF
> > +
> > +     test_cmp expect actual
> > +'
> > +
> > +# Test fetch object-info with 'file://' transport
> > +#
> > +
> > +test_expect_success 'create repo to be serbed by file:// transport' '
> > +     git init server &&
> > +     test_commit -C server message1 a.txt &&
> > +     git -C server config protocol.version 2
> > +'
> > +
> > +test_expect_success 'fetch object-info with file:// using protocol v2' '
> > +     test_when_finished "cd '$initial_directory'" &&
> > +
> > +     cd server &&
> > +
> > +     cat >actual <<-EOF &&
> > +     $(git fetch --object-info-format=size "file://$(pwd)" $(git rev-parse message1:a.txt))
> > +     EOF
> > +
> > +     cat >expect <<-EOF &&
> > +     $(git rev-parse message1:a.txt) $(wc -c <a.txt | xargs)
> > +     EOF
> > +
> > +     test_cmp expect actual
> > +'
> > +
> > +test_done
> > \ No newline at end of file
> > diff --git a/transport-helper.c b/transport-helper.c
> > index a0297b0986..9ecda03dde 100644
> > --- a/transport-helper.c
> > +++ b/transport-helper.c
> > @@ -671,6 +671,17 @@ static int connect_helper(struct transport *transport, const char *name,
> >  static struct ref *get_refs_list_using_list(struct transport *transport,
> >                                           int for_push);
> >
> > +static int fetch_object_info(struct transport *transport, struct oid_array *oids)
> > +{
> > +     get_helper(transport);
> > +
> > +     if (process_connect(transport, 0)) {
> > +             do_take_over(transport);
> > +             return transport->vtable->fetch_object_info(transport, oids);
> > +     }
> > +     return -1;
> > +}
> > +
> >  static int fetch_refs(struct transport *transport,
> >                     int nr_heads, struct ref **to_fetch)
> >  {
> > @@ -1269,6 +1280,7 @@ static struct transport_vtable vtable = {
> >       .get_refs_list  = get_refs_list,
> >       .fetch_refs     = fetch_refs,
> >       .push_refs      = push_refs,
> > +     .fetch_object_info = fetch_object_info,
> >       .connect        = connect_helper,
> >       .disconnect     = release_helper
> >  };
> > diff --git a/transport-internal.h b/transport-internal.h
> > index c4ca0b733a..04fa015011 100644
> > --- a/transport-internal.h
> > +++ b/transport-internal.h
> > @@ -59,6 +59,7 @@ struct transport_vtable {
> >        * use. disconnect() releases these resources.
> >        **/
> >       int (*disconnect)(struct transport *connection);
> > +     int (*fetch_object_info)(struct transport *transport, struct oid_array *oids);
> >  };
> >
> >  #endif
> > diff --git a/transport.c b/transport.c
> > index 2a3e324154..0e1bf3c4cd 100644
> > --- a/transport.c
> > +++ b/transport.c
> > @@ -445,6 +445,62 @@ static int fetch_refs_via_pack(struct transport *transport,
> >       return ret;
> >  }
> >
> > +static int fetch_object_info(struct transport *transport, struct oid_array *oids)
> > +{
> > +     int ret = 0;
> > +     struct git_transport_data *data = transport->data;
> > +     struct object_info_args args;
> > +     struct packet_reader reader;
> > +
> > +     memset(&args, 0, sizeof(args));
> > +     args.server_options = transport->server_options;
> > +     args.oids = oids;
> > +
> > +     connect_setup(transport, 0);
> > +     packet_reader_init(&reader, data->fd[0], NULL, 0,
> > +                     PACKET_READ_CHOMP_NEWLINE |
> > +                     PACKET_READ_GENTLE_ON_EOF |
> > +                     PACKET_READ_DIE_ON_ERR_PACKET);
> > +     data->version = discover_version(&reader);
> > +
> > +     if (data->version == protocol_unknown_version)
> > +             BUG("unknown protocol version");
> > +     else if (data->version <= protocol_v1)
> > +             die_if_server_options(transport);
> > +
> > +     switch (data->version) {
> > +     case protocol_v2:
> > +             send_object_info_request(data->fd[1], &args);
> > +             break;
> > +     case protocol_v1:
> > +     case protocol_v0:
> > +             die(_("wrong protocol version. expected v2"));
> > +     case protocol_unknown_version:
> > +             BUG("unknown protocol version");
> > +     }
> > +
> > +     if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> > +             die(_("error reading object info header"));
> > +     }
> > +     if (strcmp(reader.line, "size")) {
> > +             die(_("expected 'size', received '%s'"), reader.line);
> > +     }
> > +     while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
> > +             printf("%s\n", reader.line);
> > +     }
> > +
> > +     close(data->fd[0]);
> > +
> > +     if (data->fd[1] >= 0) {
> > +             close(data->fd[1]);
> > +     }
> > +     if (finish_connect(data->conn))
> > +             ret = -1;
> > +     data->conn = NULL;
> > +
> > +     return ret;
> > +}
> > +
> >  static int push_had_errors(struct ref *ref)
> >  {
> >       for (; ref; ref = ref->next) {
> > @@ -890,6 +946,7 @@ static struct transport_vtable taken_over_vtable = {
> >       .get_refs_list  = get_refs_via_connect,
> >       .fetch_refs     = fetch_refs_via_pack,
> >       .push_refs      = git_transport_push,
> > +     .fetch_object_info = fetch_object_info,
> >       .disconnect     = disconnect_git
> >  };
> >
> > @@ -1043,6 +1100,7 @@ static struct transport_vtable builtin_smart_vtable = {
> >       .get_refs_list  = get_refs_via_connect,
> >       .fetch_refs     = fetch_refs_via_pack,
> >       .push_refs      = git_transport_push,
> > +     .fetch_object_info = fetch_object_info,
> >       .connect        = connect_git,
> >       .disconnect     = disconnect_git
> >  };
> > @@ -1420,6 +1478,11 @@ const struct ref *transport_get_remote_refs(struct transport *transport,
> >       return transport->remote_refs;
> >  }
> >
> > +int transport_fetch_object_info(struct transport *transport, struct oid_array *oids) {
> > +     transport->vtable->fetch_object_info(transport, oids);
> > +     return 1;
> > +}
> > +
> >  int transport_fetch_refs(struct transport *transport, struct ref *refs)
> >  {
> >       int rc;
> > diff --git a/transport.h b/transport.h
> > index 3f16e50c19..1c807591de 100644
> > --- a/transport.h
> > +++ b/transport.h
> > @@ -278,6 +278,7 @@ const struct ref *transport_get_remote_refs(struct transport *transport,
> >   * This can only be called after fetching the remote refs.
> >   */
> >  const struct git_hash_algo *transport_get_hash_algo(struct transport *transport);
> > +int transport_fetch_object_info(struct transport *transport, struct oid_array *oids);
> >  int transport_fetch_refs(struct transport *transport, struct ref *refs);
> >
> >  /*
> >
> > base-commit: b23dac905bde28da47543484320db16312c87551
> > --
> > 2.33.0.664.g0785eb7698

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v2] fetch object-info-format: client option for object-info
  2022-02-09 20:41   ` [PATCH v2] fetch object-info-format: " Jonathan Tan
@ 2022-02-10 22:58     ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-02-10 22:58 UTC (permalink / raw)
  To: Jonathan Tan, git

> If you generally agree with my review comments
Everything you've said makes sense. Thanks for taking an in depth look
at this! I'll make the changes you suggested and then start working on
the next steps.

On Wed, Feb 9, 2022 at 12:42 PM Jonathan Tan <jonathantanmy@google.com> wrote:
>
> Calvin Wan <calvinwan@google.com> writes:
> > Add ‘—object-info-format’ option to fetch. This option allows
> > the client to make an object-info [1] command request to a server
> > that supports protocol v2.
>
> Avoid using characters above 127 in commit messages (unless, say, as part of
> someone's name). They make it hard to search for, and in this case, whatever
> dash is there is wrong (it should be "--").
>
> > The transport implementation uses vtables [2], similar to how Git
> > fetches refs,
>
> You should be explicit that you're adding a new function to the vtable.
> (Whether that is what we should do is another issue: let me look at the
> patch to see.)
>
> > to determine whether a process needs to be taken over
> > before sending the object-info request.
>
> The vtable is not to determine whether a process needs to be taken over,
> but so that we support multiple protocols (HTTP, SSH, etc.). In any
> case, this detail is probably not relevant.
>
> > Different protocols
> > require different setups for making requests.
>
> This is true, but I don't see the relevance.
>
> > [1] https://lore.kernel.org/git/20210420233830.2181153-1-bga@google.com/
> > [2] https://lore.kernel.org/git/26f276956001a120cc9105b0071762c2fd4a45c5.15=
> > 13287544.git.jonathantanmy@google.com/
>
> For merged code, quote the commit, not the email.
>
> > @@ -220,6 +225,8 @@ static struct option builtin_fetch_options[] = {
> >                N_("write the commit-graph after fetching")),
> >       OPT_BOOL(0, "stdin", &stdin_refspecs,
> >                N_("accept refspecs from stdin")),
> > +     OPT_STRING_LIST(0, "object-info-format", &object_info_format, N_("option"),
> > +              N_("command request arguments")),
>
> I would have expected a parameter named "format" to take a format
> string, but taking a string list of the fields we need might work too.
> In any case, maybe rename it to "--object-info" or similar.
>
> > @@ -2000,6 +2007,8 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >       struct remote *remote = NULL;
> >       int result = 0;
> >       int prune_tags_ok = 1;
> > +     struct oid_array oids = OID_ARRAY_INIT;
> > +     struct object_id oid;
>
> The "oids" at function level needs a more descriptive name (e.g.
> "oids_for_object_info"). The name of "oid" is fine, since it's just used
> as a temporary variable, but since it is temporary, it should be
> declared in the block where it's used. (Same for "oids", actually:
> declare it in the block it's used, and in that case you can keep the
> name since it's more tightly scoped.)
>
> > @@ -2057,6 +2066,23 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >       if (dry_run)
> >               write_fetch_head = 0;
> >
> > +     if (object_info_format.nr > 0) {
> > +             if (argc == 0 || argc == 1) {
> > +                     die(_("must supply remote and object ids when using --object-info-format"));
> > +             } else {
> > +                     remote = remote_get(argv[0]);
> > +                     for (i = 1; i < argc; i++) {
> > +                             if (get_oid(argv[i], &oid))
> > +                                     return error(_("malformed object name '%s'"), argv[i]);
> > +                             oid_array_append(&oids, &oid);
> > +                     }
> > +             }
> > +             gtransport = prepare_transport(remote, 0);
> > +             gtransport->server_options = &object_info_format;
> > +             result = transport_fetch_object_info(gtransport, &oids);
> > +             return result;
> > +     }
>
> I was thinking that this should reuse the refspec parsing mechanism
> (which also supports stdin), but upon more thought, using the refspec
> parser means that we would also need to check that all refspecs are
> exact OIDs (because we wouldn't know what to do with them otherwise).
> OK, parsing the objects by ourselves looks reasonable.
>
> The assignment of object_info_format to server_options is probably not a
> good idea, though, since readers of server_options would expect server
> options, not what you're assigning. The best place to put this
> information is in smart_options. (See the negotiate_only code.)
>
> > +static void write_object_info_command_and_capabilities(struct strbuf *req_buf,
> > +                                              const struct string_list *server_options)
> > +{
>
> [snip contents]
>
> This code is very similar to code in fetch-pack.c. If you stick to
> crafting the request in builtin/fetch.c, you should refactor
> fetch-pack.{c,h} to expose this functionality (in a preparatory commit)
> and then use that function from here.
>
> > +void send_object_info_request(int fd_out, struct object_info_args *args)
> > +{
> > +     struct strbuf req_buf = STRBUF_INIT;
> > +     int i;
> > +
> > +     write_object_info_command_and_capabilities(&req_buf, args->server_options);
> > +
> > +     if (string_list_has_string(args->server_options, "size"))
> > +             packet_buf_write(&req_buf, "size");
>
> What happens if "size" is not in the list?
>
> > +             printf "%s %d\n" "$object_id" "$length" >expect &&
>
> You can just write "echo $object_id $length >expect". Also, test the
> following:
>  - more than one OID
>  - an OID that's not on the remote
>  - a malformed OID
>  - a server that doesn't support protocol v2
>  - a server that supports protocol v2 but not object-format
>
> (You don't have to do this for all protocols; just pick one. I prefer
> HTTP, since that's the most complex.)
>
> Other than that, the tests look good. Thanks for testing the different
> protocols.
>
> > @@ -1269,6 +1280,7 @@ static struct transport_vtable vtable = {
> >       .get_refs_list  = get_refs_list,
> >       .fetch_refs     = fetch_refs,
> >       .push_refs      = push_refs,
> > +     .fetch_object_info = fetch_object_info,
> >       .connect        = connect_helper,
> >       .disconnect     = release_helper
> >  };
>
> Adding a function to the transport vtable is not so disruptive since we
> don't have many transport vtables, but better if we can avoid this
> disruption. In this case, I think it's better to reuse fetch_refs.
> Mainly, the plumbing from transport_fetch_refs() to all the helper
> functions in fetch-pack.c already exists, so reusing fetch_refs would
> allow us to reuse that plumbing.
>
> This also means that we don't have to expose the protocol functionality
> in fetch-pack.c that you copied over to builtin/fetch.c in this patch,
> which is an added bonus.
>
> > +static int fetch_object_info(struct transport *transport, struct oid_array *oids)
> > +{
>
> [snip contents]
>
> And reusing the plumbing might mean that we don't need this function
> too.
>
> Taking a step back, there also needs to be a fallback mechanism for when
> the server doesn't support object-info.
>
> If you generally agree with my review comments, I would say that your
> next steps are:
>  - investigate if we can reuse the transport_fetch_pack -> fetch-pack.c
>    machinery
>  - make a fallback for when the server doesn't support object-info
>    (might be easier when we use the machinery, so I would start with
>    that first)

^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH v3 0/3] object-info: add option for retrieving object info
  2022-02-08 23:56 ` [PATCH v2] " Calvin Wan
  2022-02-09 12:48   ` Philip Oakley
  2022-02-09 20:41   ` [PATCH v2] fetch object-info-format: " Jonathan Tan
@ 2022-03-28 19:11   ` Calvin Wan
  2022-03-28 19:11     ` [PATCH v3 1/3] fetch-pack: refactor packet writing and fetch options Calvin Wan
                       ` (5 more replies)
  2 siblings, 6 replies; 103+ messages in thread
From: Calvin Wan @ 2022-03-28 19:11 UTC (permalink / raw)
  To: git; +Cc: jonathantanmy, philipoakley, johncai86, Calvin Wan

Sometimes it is useful to get information about an object without
having to download it completely. The server logic has already been
implemented as “a2ba162cda (object-info: support for retrieving object
info, 2021-04-20)”. This patch implements the client option for it.

Add ‘--object-info’ option to fetch. This option allows the client to
make an object-info command request to a server that supports protocol
v2. If the server is v2, but does not allow for the object-info
command request, the entire object is fetched instead and the relevant
information is returned.

Patch 1 is a small refactor. Patch 2 adds a config option for
advertisement of object-info.  Patch 3 is the main implementation of
fetch object-info.

Major changes since v2:
 * Added a fallback for servers that do not
   allow object-info
 * Refactored the fetch object-info code by moving it to fetch_refs in
   the vtable rather than having its own function in the vtable. This
   allows for the code to easily transition to the fallback if
   object-info fails.

Calvin Wan (3):
  fetch-pack: refactor how packet is written to be more generic
  objectinfo.advertise: add config option to advertise object-info
  object-info: add option for retrieving object info

 Documentation/config/transfer.txt |   4 +
 Documentation/fetch-options.txt   |   5 ++
 Documentation/git-fetch.txt       |   1 +
 builtin/fetch.c                   |  36 +++++++-
 fetch-pack.c                      |  66 ++++++++++----
 fetch-pack.h                      |   9 ++
 protocol-caps.c                   |  11 +++
 protocol-caps.h                   |   6 ++
 serve.c                           |   2 +-
 t/t5583-fetch-object-info.sh      | 138 ++++++++++++++++++++++++++++++
 transport-helper.c                |   8 +-
 transport-internal.h              |   1 +
 transport.c                       |  75 +++++++++++++++-
 transport.h                       |   9 ++
 14 files changed, 349 insertions(+), 22 deletions(-)
 create mode 100755 t/t5583-fetch-object-info.sh


base-commit: f01e51a7cfd75131b7266131b1f7540ce0a8e5c1
-- 
2.33.0.664.g0785eb7698


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH v3 1/3] fetch-pack: refactor packet writing and fetch options
  2022-03-28 19:11   ` [PATCH v3 0/3] object-info: add option for retrieving object info Calvin Wan
@ 2022-03-28 19:11     ` Calvin Wan
  2022-03-29 22:54       ` Junio C Hamano
                         ` (2 more replies)
  2022-03-28 19:11     ` [PATCH v3 2/3] transfer.advertiseObjectInfo: add object-info config Calvin Wan
                       ` (4 subsequent siblings)
  5 siblings, 3 replies; 103+ messages in thread
From: Calvin Wan @ 2022-03-28 19:11 UTC (permalink / raw)
  To: git; +Cc: jonathantanmy, philipoakley, johncai86, Calvin Wan

A subsequent commit will need to write capabilities for another
command, so refactor write_fetch_command_and_capabilities() to be able
to used by both fetch and future command.

Move fetch options set in `FETCH_CHECK_LOCAL` from the fetch state
machine to above the state machine so it is set by default. The
initial state of the state machine is always `FETCH_CHECK_LOCAL` so
this does not affect any current functionality. This change prepares
for a subsequent commit that doesn't need to check the local state, but
still requires those options to be set before sending the fetch request.

---
 fetch-pack.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/fetch-pack.c b/fetch-pack.c
index 87657907e7..b709a61baf 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1237,13 +1237,13 @@ static int add_haves(struct fetch_negotiator *negotiator,
 	return haves_added;
 }
 
-static void write_fetch_command_and_capabilities(struct strbuf *req_buf,
-						 const struct string_list *server_options)
+static void write_command_and_capabilities(struct strbuf *req_buf,
+						 const struct string_list *server_options, const char* command)
 {
 	const char *hash_name;
 
-	if (server_supports_v2("fetch", 1))
-		packet_buf_write(req_buf, "command=fetch");
+	if (server_supports_v2(command, 1))
+		packet_buf_write(req_buf, "command=%s", command);
 	if (server_supports_v2("agent", 0))
 		packet_buf_write(req_buf, "agent=%s", git_user_agent_sanitized());
 	if (advertise_sid && server_supports_v2("session-id", 0))
@@ -1279,7 +1279,7 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 	int done_sent = 0;
 	struct strbuf req_buf = STRBUF_INIT;
 
-	write_fetch_command_and_capabilities(&req_buf, args->server_options);
+	write_command_and_capabilities(&req_buf, args->server_options, "fetch");
 
 	if (args->use_thin_pack)
 		packet_buf_write(&req_buf, "thin-pack");
@@ -1598,18 +1598,18 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 		reader.me = "fetch-pack";
 	}
 
+	/* v2 supports these by default */
+	allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1;
+	use_sideband = 2;
+	if (args->depth > 0 || args->deepen_since || args->deepen_not)
+		args->deepen = 1;
+
 	while (state != FETCH_DONE) {
 		switch (state) {
 		case FETCH_CHECK_LOCAL:
 			sort_ref_list(&ref, ref_compare_name);
 			QSORT(sought, nr_sought, cmp_ref_by_name);
 
-			/* v2 supports these by default */
-			allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1;
-			use_sideband = 2;
-			if (args->depth > 0 || args->deepen_since || args->deepen_not)
-				args->deepen = 1;
-
 			/* Filter 'ref' by 'sought' and those that aren't local */
 			mark_complete_and_common_ref(negotiator, args, &ref);
 			filter_refs(args, &ref, sought, nr_sought);
@@ -2060,7 +2060,7 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
 		int received_ready = 0;
 
 		strbuf_reset(&req_buf);
-		write_fetch_command_and_capabilities(&req_buf, server_options);
+		write_command_and_capabilities(&req_buf, server_options, "fetch");
 
 		packet_buf_write(&req_buf, "wait-for-done");
 
-- 
2.33.0.664.g0785eb7698


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* [PATCH v3 2/3] transfer.advertiseObjectInfo: add object-info config
  2022-03-28 19:11   ` [PATCH v3 0/3] object-info: add option for retrieving object info Calvin Wan
  2022-03-28 19:11     ` [PATCH v3 1/3] fetch-pack: refactor packet writing and fetch options Calvin Wan
@ 2022-03-28 19:11     ` Calvin Wan
  2022-03-29 22:34       ` Junio C Hamano
  2022-03-28 19:11     ` [PATCH v3 3/3] object-info: add option for retrieving object info Calvin Wan
                       ` (3 subsequent siblings)
  5 siblings, 1 reply; 103+ messages in thread
From: Calvin Wan @ 2022-03-28 19:11 UTC (permalink / raw)
  To: git; +Cc: jonathantanmy, philipoakley, johncai86, Calvin Wan

Currently, object-info is always advertised by the server. Add a
config option to optionally advertise it. A subsequent patch uses this
option for testing.

---
 Documentation/config/transfer.txt |  4 ++++
 protocol-caps.c                   | 11 +++++++++++
 protocol-caps.h                   |  6 ++++++
 serve.c                           |  2 +-
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/Documentation/config/transfer.txt b/Documentation/config/transfer.txt
index b49429eb4d..14892a3155 100644
--- a/Documentation/config/transfer.txt
+++ b/Documentation/config/transfer.txt
@@ -77,3 +77,7 @@ transfer.unpackLimit::
 transfer.advertiseSID::
 	Boolean. When true, client and server processes will advertise their
 	unique session IDs to their remote counterpart. Defaults to false.
+
+transfer.advertiseObjectInfo::
+	Boolean. When true or not set, server processes will advertise its
+	ability to accept `object-info` command requests
\ No newline at end of file
diff --git a/protocol-caps.c b/protocol-caps.c
index bbde91810a..f290e3cca2 100644
--- a/protocol-caps.c
+++ b/protocol-caps.c
@@ -8,6 +8,9 @@
 #include "object-store.h"
 #include "string-list.h"
 #include "strbuf.h"
+#include "config.h"
+
+static int advertise_object_info = -1;
 
 struct requested_info {
 	unsigned size : 1;
@@ -111,3 +114,11 @@ int cap_object_info(struct repository *r, struct packet_reader *request)
 
 	return 0;
 }
+
+int object_info_advertise(struct repository *r, struct strbuf *value)
+{
+	if (advertise_object_info == -1 &&
+		git_config_get_bool("transfer.advertiseObjectInfo", &advertise_object_info))
+		advertise_object_info = 0;
+	return advertise_object_info;
+}
diff --git a/protocol-caps.h b/protocol-caps.h
index 15c4550360..36f7a46b37 100644
--- a/protocol-caps.h
+++ b/protocol-caps.h
@@ -5,4 +5,10 @@ struct repository;
 struct packet_reader;
 int cap_object_info(struct repository *r, struct packet_reader *request);
 
+/*
+ * Advertises object-info capability if "objectinfo.advertise" is either
+ * set to true or not set
+ */
+int object_info_advertise(struct repository *r, struct strbuf *value);
+
 #endif /* PROTOCOL_CAPS_H */
diff --git a/serve.c b/serve.c
index b3fe9b5126..fb4ad367a7 100644
--- a/serve.c
+++ b/serve.c
@@ -133,7 +133,7 @@ static struct protocol_capability capabilities[] = {
 	},
 	{
 		.name = "object-info",
-		.advertise = always_advertise,
+		.advertise = object_info_advertise,
 		.command = cap_object_info,
 	},
 };
-- 
2.33.0.664.g0785eb7698


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* [PATCH v3 3/3] object-info: add option for retrieving object info
  2022-03-28 19:11   ` [PATCH v3 0/3] object-info: add option for retrieving object info Calvin Wan
  2022-03-28 19:11     ` [PATCH v3 1/3] fetch-pack: refactor packet writing and fetch options Calvin Wan
  2022-03-28 19:11     ` [PATCH v3 2/3] transfer.advertiseObjectInfo: add object-info config Calvin Wan
@ 2022-03-28 19:11     ` Calvin Wan
  2022-03-29 19:57       ` Junio C Hamano
                         ` (5 more replies)
  2022-03-29 20:35     ` [PATCH v3 0/3] " Junio C Hamano
                       ` (2 subsequent siblings)
  5 siblings, 6 replies; 103+ messages in thread
From: Calvin Wan @ 2022-03-28 19:11 UTC (permalink / raw)
  To: git; +Cc: jonathantanmy, philipoakley, johncai86, Calvin Wan

Sometimes it is useful to get information about an object without
having to download it completely. The server logic has already been
implemented as “a2ba162cda (object-info: support for retrieving object
info, 2021-04-20)”. This patch implements the client option for
it. Currently, only 'size' is supported, however future patches can
implement additional options.

Add ‘--object-info’ option to fetch. This option allows the client to
make an object-info command request to a server that supports protocol
v2. If the server is v2, but does not allow for the object-info
command request, fetch the objects as if it were a standard fetch
(however without changing any refs). Therefore, hook `fetch
object-info` into transport_fetch_refs() to easily fallback if needed.

A previous patch added the `transfer.advertiseObjectInfo` config
option, of which this patch utilizes to test the fallback.

---
 Documentation/fetch-options.txt |   5 ++
 Documentation/git-fetch.txt     |   1 +
 builtin/fetch.c                 |  36 ++++++++-
 fetch-pack.c                    |  42 +++++++++-
 fetch-pack.h                    |   9 +++
 t/t5583-fetch-object-info.sh    | 138 ++++++++++++++++++++++++++++++++
 transport-helper.c              |   8 +-
 transport-internal.h            |   1 +
 transport.c                     |  75 ++++++++++++++++-
 transport.h                     |   9 +++
 10 files changed, 315 insertions(+), 9 deletions(-)
 create mode 100755 t/t5583-fetch-object-info.sh

diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
index f903683189..f1ca95c32d 100644
--- a/Documentation/fetch-options.txt
+++ b/Documentation/fetch-options.txt
@@ -299,3 +299,8 @@ endif::git-pull[]
 -6::
 --ipv6::
 	Use IPv6 addresses only, ignoring IPv4 addresses.
+
+--object-info=[<arguments>]::
+	Fetches only the relevant information passed in arguments from a
+	specific remote and set of object ids. Object 'size' is currently
+	the only supported argument.
diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt
index 550c16ca61..a13d2ba7ad 100644
--- a/Documentation/git-fetch.txt
+++ b/Documentation/git-fetch.txt
@@ -13,6 +13,7 @@ SYNOPSIS
 'git fetch' [<options>] <group>
 'git fetch' --multiple [<options>] [(<repository> | <group>)...]
 'git fetch' --all [<options>]
+'git fetch' --object-info=[<arguments>] <remote> [<object-ids>]
 
 
 DESCRIPTION
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 4d12c2fd4d..0b21bebfca 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -29,6 +29,9 @@
 #include "commit-graph.h"
 #include "shallow.h"
 #include "worktree.h"
+#include "protocol.h"
+#include "pkt-line.h"
+#include "connect.h"
 
 #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
 
@@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
 	N_("git fetch [<options>] <group>"),
 	N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
 	N_("git fetch --all [<options>]"),
+	N_("git fetch --object-info=[<arguments>] <remote> [<object-ids>]"),
 	NULL
 };
 
@@ -86,6 +90,7 @@ static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
 static int fetch_write_commit_graph = -1;
 static int stdin_refspecs = 0;
 static int negotiate_only;
+static struct string_list object_info_options = STRING_LIST_INIT_NODUP;
 
 static int git_fetch_config(const char *k, const char *v, void *cb)
 {
@@ -221,6 +226,8 @@ static struct option builtin_fetch_options[] = {
 		 N_("write the commit-graph after fetching")),
 	OPT_BOOL(0, "stdin", &stdin_refspecs,
 		 N_("accept refspecs from stdin")),
+	OPT_STRING_LIST(0, "object-info", &object_info_options, N_("option"),
+		 N_("command request arguments")),
 	OPT_END()
 };
 
@@ -2087,6 +2094,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
 	struct remote *remote = NULL;
 	int result = 0;
 	int prune_tags_ok = 1;
+	struct oid_array object_info_oids = OID_ARRAY_INIT;
 
 	packet_trace_identity("fetch");
 
@@ -2168,7 +2176,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
 	if (dry_run)
 		write_fetch_head = 0;
 
-	if (all) {
+	if (object_info_options.nr > 0) {
+		if (argc == 0 || argc == 1) {
+			die(_("must supply remote and object ids when using --object-info"));
+		} else {
+			struct object_id oid;
+			remote = remote_get(argv[0]);
+			for (i = 1; i < argc; i++) {
+				if (get_oid(argv[i], &oid))
+					return error(_("malformed object id '%s'"), argv[i]);
+				oid_array_append(&object_info_oids, &oid);
+			}
+		}
+	} else if (all) {
 		if (argc == 1)
 			die(_("fetch --all does not take a repository argument"));
 		else if (argc > 1)
@@ -2199,7 +2219,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
 		}
 	}
 
-	if (negotiate_only) {
+	if (object_info_options.nr > 0) {
+		if (!remote)
+			die(_("must supply remote when using --object-info"));
+		gtransport = prepare_transport(remote, 1);
+		if (gtransport->smart_options) {
+			gtransport->smart_options->object_info = 1;
+			gtransport->smart_options->object_info_oids = &object_info_oids;
+			gtransport->smart_options->object_info_options = &object_info_options;
+		}
+		if (server_options.nr)
+			gtransport->server_options = &server_options;
+		result = transport_fetch_refs(gtransport, NULL);
+	} else if (negotiate_only) {
 		struct oidset acked_commits = OIDSET_INIT;
 		struct oidset_iter iter;
 		const struct object_id *oid;
diff --git a/fetch-pack.c b/fetch-pack.c
index b709a61baf..36e3d1c5d0 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1269,6 +1269,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
 	packet_buf_delim(req_buf);
 }
 
+void send_object_info_request(int fd_out, struct object_info_args *args)
+{
+	struct strbuf req_buf = STRBUF_INIT;
+	int i;
+
+	write_command_and_capabilities(&req_buf, args->server_options, "object-info");
+
+	if (string_list_has_string(args->object_info_options, "size"))
+		packet_buf_write(&req_buf, "size");
+
+	for (i = 0; i < args->oids->nr; i++) {
+		packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
+	}
+
+	packet_buf_flush(&req_buf);
+	if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
+		die_errno(_("unable to write request to remote"));
+
+	strbuf_release(&req_buf);
+}
+
 static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 			      struct fetch_pack_args *args,
 			      const struct ref *wants, struct oidset *common,
@@ -1604,6 +1625,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 	if (args->depth > 0 || args->deepen_since || args->deepen_not)
 		args->deepen = 1;
 
+	if (args->object_info) {
+		state = FETCH_SEND_REQUEST;
+	}
+
 	while (state != FETCH_DONE) {
 		switch (state) {
 		case FETCH_CHECK_LOCAL:
@@ -1613,7 +1638,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 			/* Filter 'ref' by 'sought' and those that aren't local */
 			mark_complete_and_common_ref(negotiator, args, &ref);
 			filter_refs(args, &ref, sought, nr_sought);
-			if (everything_local(args, &ref))
+			if (!args->object_info && everything_local(args, &ref))
 				state = FETCH_DONE;
 			else
 				state = FETCH_SEND_REQUEST;
@@ -1999,8 +2024,19 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
 		}
 		args->connectivity_checked = 1;
 	}
-
-	update_shallow(args, sought, nr_sought, &si);
+	
+	if (args->object_info) {
+		struct ref *ref_cpy_reader = ref_cpy;
+		unsigned long size = 0;
+		while (ref_cpy_reader) {
+			oid_object_info(the_repository, &(ref_cpy_reader->old_oid), &size);
+			printf("%s %li\n", oid_to_hex(&(ref_cpy_reader->old_oid)), size);
+			ref_cpy_reader = ref_cpy_reader->next;
+		}
+	}
+	else {
+		update_shallow(args, sought, nr_sought, &si);
+	}
 cleanup:
 	clear_shallow_info(&si);
 	oid_array_clear(&shallows_scratch);
diff --git a/fetch-pack.h b/fetch-pack.h
index 7f94a2a583..23e69bb218 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -42,6 +42,7 @@ struct fetch_pack_args {
 	unsigned update_shallow:1;
 	unsigned reject_shallow_remote:1;
 	unsigned deepen:1;
+	unsigned object_info:1;
 
 	/*
 	 * Indicate that the remote of this request is a promisor remote. The
@@ -68,6 +69,12 @@ struct fetch_pack_args {
 	unsigned connectivity_checked:1;
 };
 
+struct object_info_args {
+	const struct string_list *object_info_options;
+	const struct string_list *server_options;
+	const struct oid_array *oids;
+};
+
 /*
  * sought represents remote references that should be updated from.
  * On return, the names that were found on the remote will have been
@@ -101,4 +108,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
  */
 int report_unmatched_refs(struct ref **sought, int nr_sought);
 
+void send_object_info_request(int fd_out, struct object_info_args *args);
+
 #endif
diff --git a/t/t5583-fetch-object-info.sh b/t/t5583-fetch-object-info.sh
new file mode 100755
index 0000000000..e5db6d953c
--- /dev/null
+++ b/t/t5583-fetch-object-info.sh
@@ -0,0 +1,138 @@
+#!/bin/sh
+
+test_description='test git fetch object-info version 2'
+
+TEST_NO_CREATE_REPO=1
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+. ./test-lib.sh
+
+# Test fetch object-info with 'git://' transport
+
+. "$TEST_DIRECTORY"/lib-git-daemon.sh
+start_git_daemon --export-all --enable=receive-pack
+daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent
+
+
+test_expect_success 'create repo to be served by git-daemon' '
+	git init "$daemon_parent" &&
+	test_commit -C "$daemon_parent" message1 a.txt
+'
+
+test_expect_success 'fetch object-info with git:// using protocol v2' '
+	(
+		cd "$daemon_parent" &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		length=$(wc -c <a.txt) &&
+
+		printf "%s %d\n" "$object_id" "$length" >expect &&
+		git -c protocol.version=2 fetch --object-info=size "$GIT_DAEMON_URL/parent" "$object_id" >actual &&
+		test_cmp expect actual
+	)
+'
+stop_git_daemon
+
+# Test protocol v2 with 'http://' transport
+
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+test_expect_success 'create repo to be served by http:// transport' '
+	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true &&
+	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message1 a.txt &&
+	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message2 b.txt
+'
+
+test_expect_success 'fetch object-info with http:// using protocol v2' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		length=$(wc -c <a.txt) &&
+
+		printf "%s %d\n" "$object_id" "$length" >expect &&
+		git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'fetch object-info for multiple objects' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		object_id1=$(git rev-parse message1:a.txt) &&
+		object_id2=$(git rev-parse message2:b.txt) &&
+		length1=$(wc -c <a.txt) &&
+		length2=$(wc -c <b.txt) &&
+
+		printf "%s %d\n" "$object_id1" "$length1" >expect &&
+		printf "%s %d\n" "$object_id2" "$length2" >>expect &&
+		git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id1" "$object_id2" >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'fetch object-info fallbacks to standard fetch if object-info is not supported by the server' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		length=$(wc -c <a.txt) &&
+
+		printf "%s %d\n" "$object_id" "$length" >expect &&
+		git config objectinfo.advertise false &&
+		git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'fetch object-info fails on server with legacy protocol' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		test_must_fail git -c protocol.version=0 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err &&
+		test_i18ngrep "object-info requires protocol v2" err
+	)
+'
+
+test_expect_success 'fetch object-info fails on malformed OID' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		malformed_object_id="this_id_is_not_valid" &&
+		test_must_fail git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$malformed_object_id" 2>err &&
+		test_i18ngrep "malformed object id '$malformed_object_id'" err
+	)
+'
+
+test_expect_success 'fetch object-info fails on missing OID' '
+	git clone "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" missing_oid_repo &&
+	test_commit -C missing_oid_repo message3 c.txt &&
+	(
+		cd missing_oid_repo &&
+		object_id=$(git rev-parse message3:c.txt) &&
+		test_must_fail env GIT_TRACE_PACKET=1 git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err &&
+		test_i18ngrep "fatal: remote error: upload-pack: not our ref $object_id" err
+	)
+'
+
+# Test fetch object-info with 'file://' transport
+
+test_expect_success 'create repo to be served by file:// transport' '
+	git init server &&
+	test_commit -C server message1 a.txt &&
+	git -C server config protocol.version 2
+'
+
+test_expect_success 'fetch object-info with file:// using protocol v2' '
+	(
+		cd server &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		length=$(wc -c <a.txt) &&
+
+		printf "%s %d\n" "$object_id" "$length" >expect &&
+		git -c protocol.version=2 fetch --object-info=size "file://$(pwd)" "$object_id" >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_done
diff --git a/transport-helper.c b/transport-helper.c
index a0297b0986..64c175e904 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -686,13 +686,17 @@ static int fetch_refs(struct transport *transport,
 
 	/*
 	 * If we reach here, then the server, the client, and/or the transport
-	 * helper does not support protocol v2. --negotiate-only requires
-	 * protocol v2.
+	 * helper does not support protocol v2. --negotiate-only and --object-info
+	 * require protocol v2.
 	 */
 	if (data->transport_options.acked_commits) {
 		warning(_("--negotiate-only requires protocol v2"));
 		return -1;
 	}
+	if (transport->smart_options->object_info) {
+		warning(_("--object-info requires protocol v2"));
+		return -1;
+	}
 
 	if (!data->get_refs_list_called)
 		get_refs_list_using_list(transport, 0);
diff --git a/transport-internal.h b/transport-internal.h
index c4ca0b733a..04fa015011 100644
--- a/transport-internal.h
+++ b/transport-internal.h
@@ -59,6 +59,7 @@ struct transport_vtable {
 	 * use. disconnect() releases these resources.
 	 **/
 	int (*disconnect)(struct transport *connection);
+	int (*fetch_object_info)(struct transport *transport, struct oid_array *oids);
 };
 
 #endif
diff --git a/transport.c b/transport.c
index 70e9840a90..65a1b1fdb3 100644
--- a/transport.c
+++ b/transport.c
@@ -350,6 +350,67 @@ static struct ref *handshake(struct transport *transport, int for_push,
 	return refs;
 }
 
+/*
+ * Fetches object info if server supports object-info command
+ * Make sure to fallback to normal fetch if this fails
+ */
+static int fetch_object_info(struct transport *transport)
+{
+	struct git_transport_data *data = transport->data;
+	struct object_info_args args;
+	struct packet_reader reader;
+
+	memset(&args, 0, sizeof(args));
+	args.server_options = transport->server_options;
+	args.object_info_options = transport->smart_options->object_info_options;
+	args.oids = transport->smart_options->object_info_oids;
+
+	connect_setup(transport, 0);
+	packet_reader_init(&reader, data->fd[0], NULL, 0,
+			PACKET_READ_CHOMP_NEWLINE |
+			PACKET_READ_DIE_ON_ERR_PACKET);
+	data->version = discover_version(&reader);
+
+	transport->hash_algo = reader.hash_algo;
+
+	switch (data->version) {
+	case protocol_v2:
+		if (!server_supports_v2("object-info", 0))
+			return 0;
+		send_object_info_request(data->fd[1], &args);
+		break;
+	case protocol_v1:
+	case protocol_v0:
+		die(_("wrong protocol version. expected v2"));
+	case protocol_unknown_version:
+		BUG("unknown protocol version");
+	}
+
+	if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
+		die(_("error reading object info header"));
+	}
+	if (strcmp(reader.line, "size")) {
+		die(_("expected 'size', received '%s'"), reader.line);
+	}
+	while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
+		printf("%s\n", reader.line);
+	}
+	check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
+
+	return 1;
+}
+
+static int append_oid_to_ref(const struct object_id *oid, void *data)
+{
+	struct ref *ref = data;
+	struct ref *temp_ref = xcalloc(1, sizeof (struct ref));
+	temp_ref->old_oid = *oid;
+	temp_ref->exact_oid = 1;
+	ref->next = temp_ref;
+	ref = ref->next;
+	return 0;
+}
+
 static struct ref *get_refs_via_connect(struct transport *transport, int for_push,
 					struct transport_ls_refs_options *options)
 {
@@ -364,6 +425,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 	struct ref *refs = NULL;
 	struct fetch_pack_args args;
 	struct ref *refs_tmp = NULL;
+	struct ref *wanted_refs = xcalloc(1, sizeof (struct ref));
 
 	memset(&args, 0, sizeof(args));
 	args.uploadpack = data->options.uploadpack;
@@ -388,8 +450,17 @@ static int fetch_refs_via_pack(struct transport *transport,
 	args.server_options = transport->server_options;
 	args.negotiation_tips = data->options.negotiation_tips;
 	args.reject_shallow_remote = transport->smart_options->reject_shallow;
+	args.object_info = transport->smart_options->object_info;
 
-	if (!data->got_remote_heads) {
+	if (transport->smart_options && transport->smart_options->object_info) {
+		struct ref *temp_ref = wanted_refs;
+		if (fetch_object_info(transport)) {
+			goto cleanup;
+		}
+		oid_array_for_each(transport->smart_options->object_info_oids, append_oid_to_ref, temp_ref);
+		wanted_refs = wanted_refs->next;
+		transport->remote_refs = wanted_refs;
+	} else if (!data->got_remote_heads) {
 		int i;
 		int must_list_refs = 0;
 		for (i = 0; i < nr_heads; i++) {
@@ -406,7 +477,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 	else if (data->version <= protocol_v1)
 		die_if_server_options(transport);
 
-	if (data->options.acked_commits) {
+	if (data->options.acked_commits && !transport->smart_options->object_info) {
 		if (data->version < protocol_v2) {
 			warning(_("--negotiate-only requires protocol v2"));
 			ret = -1;
diff --git a/transport.h b/transport.h
index a0bc6a1e9e..3bba16f86e 100644
--- a/transport.h
+++ b/transport.h
@@ -30,6 +30,12 @@ struct git_transport_options {
 	 */
 	unsigned connectivity_checked:1;
 
+	/*
+	 * Transport will attempt to pull only object-info. Fallbacks
+	 * to pulling entire object if object-info is not supported
+	 */
+	unsigned object_info : 1;
+
 	int depth;
 	const char *deepen_since;
 	const struct string_list *deepen_not;
@@ -53,6 +59,9 @@ struct git_transport_options {
 	 * common commits to this oidset instead of fetching any packfiles.
 	 */
 	struct oidset *acked_commits;
+
+	struct oid_array *object_info_oids;
+	struct string_list *object_info_options;
 };
 
 enum transport_family {
-- 
2.33.0.664.g0785eb7698


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 3/3] object-info: add option for retrieving object info
  2022-03-28 19:11     ` [PATCH v3 3/3] object-info: add option for retrieving object info Calvin Wan
@ 2022-03-29 19:57       ` Junio C Hamano
  2022-03-29 22:54       ` Junio C Hamano
                         ` (4 subsequent siblings)
  5 siblings, 0 replies; 103+ messages in thread
From: Junio C Hamano @ 2022-03-29 19:57 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Calvin Wan <calvinwan@google.com> writes:

> diff --git a/transport-internal.h b/transport-internal.h
> index c4ca0b733a..04fa015011 100644
> --- a/transport-internal.h
> +++ b/transport-internal.h
> @@ -59,6 +59,7 @@ struct transport_vtable {
>  	 * use. disconnect() releases these resources.
>  	 **/
>  	int (*disconnect)(struct transport *connection);
> +	int (*fetch_object_info)(struct transport *transport, struct oid_array *oids);
>  };
>  
>  #endif

We need something like this squashed in in order to pass "make hdr-check".

 transport-internal.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/transport-internal.h b/transport-internal.h
index 04fa015011..1416c9197b 100644
--- a/transport-internal.h
+++ b/transport-internal.h
@@ -5,6 +5,7 @@ struct ref;
 struct transport;
 struct strvec;
 struct transport_ls_refs_options;
+struct oid_array;
 
 struct transport_vtable {
 	/**
-- 
2.35.1-887-g427ebd35eb


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 0/3] object-info: add option for retrieving object info
  2022-03-28 19:11   ` [PATCH v3 0/3] object-info: add option for retrieving object info Calvin Wan
                       ` (2 preceding siblings ...)
  2022-03-28 19:11     ` [PATCH v3 3/3] object-info: add option for retrieving object info Calvin Wan
@ 2022-03-29 20:35     ` Junio C Hamano
  2022-03-29 22:40       ` Calvin Wan
  2022-03-31  1:50     ` Junio C Hamano
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
  5 siblings, 1 reply; 103+ messages in thread
From: Junio C Hamano @ 2022-03-29 20:35 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Calvin Wan <calvinwan@google.com> writes:

> Sometimes it is useful to get information about an object without
> having to download it completely. The server logic has already been
> implemented as “a2ba162cda (object-info: support for retrieving object
> info, 2021-04-20)”. This patch implements the client option for it.
>
> Add ‘--object-info’ option to fetch. This option allows the client to
> make an object-info command request to a server that supports protocol
> v2. If the server is v2, but does not allow for the object-info
> command request, the entire object is fetched instead and the relevant
> information is returned.
>
> Patch 1 is a small refactor. Patch 2 adds a config option for
> advertisement of object-info.  Patch 3 is the main implementation of
> fetch object-info.
>
> Major changes since v2:
>  * Added a fallback for servers that do not
>    allow object-info
>  * Refactored the fetch object-info code by moving it to fetch_refs in
>    the vtable rather than having its own function in the vtable. This
>    allows for the code to easily transition to the fallback if
>    object-info fails.
>
> Calvin Wan (3):
>   fetch-pack: refactor how packet is written to be more generic
>   objectinfo.advertise: add config option to advertise object-info
>   object-info: add option for retrieving object info
>
>  Documentation/config/transfer.txt |   4 +
>  Documentation/fetch-options.txt   |   5 ++
>  Documentation/git-fetch.txt       |   1 +
>  builtin/fetch.c                   |  36 +++++++-
>  fetch-pack.c                      |  66 ++++++++++----
>  fetch-pack.h                      |   9 ++
>  protocol-caps.c                   |  11 +++
>  protocol-caps.h                   |   6 ++
>  serve.c                           |   2 +-
>  t/t5583-fetch-object-info.sh      | 138 ++++++++++++++++++++++++++++++
>  transport-helper.c                |   8 +-
>  transport-internal.h              |   1 +
>  transport.c                       |  75 +++++++++++++++-
>  transport.h                       |   9 ++
>  14 files changed, 349 insertions(+), 22 deletions(-)
>  create mode 100755 t/t5583-fetch-object-info.sh
>
>
> base-commit: f01e51a7cfd75131b7266131b1f7540ce0a8e5c1

I am getting the following failures when they are queued on this
base, or on a more recent tip of 'master/main'.

Test Summary Report
-------------------
t5555-http-smart-common.sh (Wstat: 256 Tests: 10 Failed: 1)
  Failed test:  9
  Non-zero exit status: 1
t5583-fetch-object-info.sh (Wstat: 256 Tests: 11 Failed: 1)
  Failed test:  5
  Non-zero exit status: 1
t5701-git-serve.sh        (Wstat: 256 Tests: 20 Failed: 2)
  Failed tests:  1, 20
  Non-zero exit status: 1

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 2/3] transfer.advertiseObjectInfo: add object-info config
  2022-03-28 19:11     ` [PATCH v3 2/3] transfer.advertiseObjectInfo: add object-info config Calvin Wan
@ 2022-03-29 22:34       ` Junio C Hamano
  2022-03-29 22:48         ` Calvin Wan
  0 siblings, 1 reply; 103+ messages in thread
From: Junio C Hamano @ 2022-03-29 22:34 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Calvin Wan <calvinwan@google.com> writes:

> Currently, object-info is always advertised by the server. Add a
> config option to optionally advertise it. A subsequent patch uses this
> option for testing.
>
> ---
>  Documentation/config/transfer.txt |  4 ++++
>  protocol-caps.c                   | 11 +++++++++++
>  protocol-caps.h                   |  6 ++++++
>  serve.c                           |  2 +-
>  4 files changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/config/transfer.txt b/Documentation/config/transfer.txt
> index b49429eb4d..14892a3155 100644
> --- a/Documentation/config/transfer.txt
> +++ b/Documentation/config/transfer.txt
> @@ -77,3 +77,7 @@ transfer.unpackLimit::
>  transfer.advertiseSID::
>  	Boolean. When true, client and server processes will advertise their
>  	unique session IDs to their remote counterpart. Defaults to false.
> +
> +transfer.advertiseObjectInfo::
> +	Boolean. When true or not set, server processes will advertise its
> +	ability to accept `object-info` command requests
> \ No newline at end of file

Carefully proofread before sending your changes out ;-)

By mimicking the style of the previous item more closely, i.e.

	Boolean. When true, ... requests. Defaults to true.

it would make it easier to read, I suspect.

> diff --git a/protocol-caps.c b/protocol-caps.c
> index bbde91810a..f290e3cca2 100644
> --- a/protocol-caps.c
> +++ b/protocol-caps.c
> @@ -8,6 +8,9 @@
>  #include "object-store.h"
>  #include "string-list.h"
>  #include "strbuf.h"
> +#include "config.h"
> +
> +static int advertise_object_info = -1;

It strikes me somewhat odd that this is the _first_ such capability
that needs a variable here to control if it is advertised or not
depending on the configuration file settings.

In other words, "Why doesn't transfer.advertiseSID also have a
similar thing in this file?" should be the first question that comes
to any reviewers' mind.

Perhaps we just invented a better way to handle such conditional
capability, in which case we may want to port the existing ones that
do not use the same arrangement over to the new way.  Even though it
would be outside the scope of the series, it would be a good way to
make sure we won't accumulate too much technical debt to leave some
hint somewhere near here or in protoco-caps.h where this patch
touches.  I cannot quite tell what is going on.

> int object_info_advertise(struct repository *r, struct strbuf *value)
> {
> 	if (advertise_object_info == -1 &&
> 	    git_config_get_bool("transfer.advertiseObjectInfo", &advertise_object_info))
> 		advertise_object_info = 0;
> 	return advertise_object_info;
> }

"If unset(yet), try to get_bool() and if it fails, set it to 0"

So advertise_object_info becomes true only if transfer.advertiseObjectInfo
is set to true?

It is inconsistent with what we read in the documentation, isn't it?

> diff --git a/protocol-caps.h b/protocol-caps.h
> index 15c4550360..36f7a46b37 100644
> --- a/protocol-caps.h
> +++ b/protocol-caps.h
> @@ -5,4 +5,10 @@ struct repository;
>  struct packet_reader;
>  int cap_object_info(struct repository *r, struct packet_reader *request);
>  
> +/*
> + * Advertises object-info capability if "objectinfo.advertise" is either
> + * set to true or not set
> + */
> +int object_info_advertise(struct repository *r, struct strbuf *value);
> +
>  #endif /* PROTOCOL_CAPS_H */
> diff --git a/serve.c b/serve.c
> index b3fe9b5126..fb4ad367a7 100644
> --- a/serve.c
> +++ b/serve.c
> @@ -133,7 +133,7 @@ static struct protocol_capability capabilities[] = {
>  	},
>  	{
>  		.name = "object-info",
> -		.advertise = always_advertise,
> +		.advertise = object_info_advertise,
>  		.command = cap_object_info,
>  	},
>  };

This is a good step to add a test to see if the server does honor
the (1) lack of (2) true set to and (3) false set to the
configuration variable and sends (or does not send) the capability.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 0/3] object-info: add option for retrieving object info
  2022-03-29 20:35     ` [PATCH v3 0/3] " Junio C Hamano
@ 2022-03-29 22:40       ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-03-29 22:40 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, jonathantanmy, philipoakley, johncai86

I seem to have introduced a small bug with the advertisement while I
was making my final changes. Will re-roll later today

On Tue, Mar 29, 2022 at 1:35 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Calvin Wan <calvinwan@google.com> writes:
>
> > Sometimes it is useful to get information about an object without
> > having to download it completely. The server logic has already been
> > implemented as “a2ba162cda (object-info: support for retrieving object
> > info, 2021-04-20)”. This patch implements the client option for it.
> >
> > Add ‘--object-info’ option to fetch. This option allows the client to
> > make an object-info command request to a server that supports protocol
> > v2. If the server is v2, but does not allow for the object-info
> > command request, the entire object is fetched instead and the relevant
> > information is returned.
> >
> > Patch 1 is a small refactor. Patch 2 adds a config option for
> > advertisement of object-info.  Patch 3 is the main implementation of
> > fetch object-info.
> >
> > Major changes since v2:
> >  * Added a fallback for servers that do not
> >    allow object-info
> >  * Refactored the fetch object-info code by moving it to fetch_refs in
> >    the vtable rather than having its own function in the vtable. This
> >    allows for the code to easily transition to the fallback if
> >    object-info fails.
> >
> > Calvin Wan (3):
> >   fetch-pack: refactor how packet is written to be more generic
> >   objectinfo.advertise: add config option to advertise object-info
> >   object-info: add option for retrieving object info
> >
> >  Documentation/config/transfer.txt |   4 +
> >  Documentation/fetch-options.txt   |   5 ++
> >  Documentation/git-fetch.txt       |   1 +
> >  builtin/fetch.c                   |  36 +++++++-
> >  fetch-pack.c                      |  66 ++++++++++----
> >  fetch-pack.h                      |   9 ++
> >  protocol-caps.c                   |  11 +++
> >  protocol-caps.h                   |   6 ++
> >  serve.c                           |   2 +-
> >  t/t5583-fetch-object-info.sh      | 138 ++++++++++++++++++++++++++++++
> >  transport-helper.c                |   8 +-
> >  transport-internal.h              |   1 +
> >  transport.c                       |  75 +++++++++++++++-
> >  transport.h                       |   9 ++
> >  14 files changed, 349 insertions(+), 22 deletions(-)
> >  create mode 100755 t/t5583-fetch-object-info.sh
> >
> >
> > base-commit: f01e51a7cfd75131b7266131b1f7540ce0a8e5c1
>
> I am getting the following failures when they are queued on this
> base, or on a more recent tip of 'master/main'.
>
> Test Summary Report
> -------------------
> t5555-http-smart-common.sh (Wstat: 256 Tests: 10 Failed: 1)
>   Failed test:  9
>   Non-zero exit status: 1
> t5583-fetch-object-info.sh (Wstat: 256 Tests: 11 Failed: 1)
>   Failed test:  5
>   Non-zero exit status: 1
> t5701-git-serve.sh        (Wstat: 256 Tests: 20 Failed: 2)
>   Failed tests:  1, 20
>   Non-zero exit status: 1

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 2/3] transfer.advertiseObjectInfo: add object-info config
  2022-03-29 22:34       ` Junio C Hamano
@ 2022-03-29 22:48         ` Calvin Wan
  2022-03-29 23:09           ` Taylor Blau
  0 siblings, 1 reply; 103+ messages in thread
From: Calvin Wan @ 2022-03-29 22:48 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, jonathantanmy, philipoakley, johncai86

> It strikes me somewhat odd that this is the _first_ such capability
> that needs a variable here to control if it is advertised or not
> depending on the configuration file settings.

> In other words, "Why doesn't transfer.advertiseSID also have a
> similar thing in this file?" should be the first question that comes
> to any reviewers' mind.
It doesn't exist in protocol-caps.c, but instead in serve.c. Would be
a good discussion for whether the advertise function should be serve.c
or protocol-caps.c

> "If unset(yet), try to get_bool() and if it fails, set it to 0"

> So advertise_object_info becomes true only if transfer.advertiseObjectInfo
> is set to true?
This was the bug I introduced and will reroll

On Tue, Mar 29, 2022 at 3:34 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Calvin Wan <calvinwan@google.com> writes:
>
> > Currently, object-info is always advertised by the server. Add a
> > config option to optionally advertise it. A subsequent patch uses this
> > option for testing.
> >
> > ---
> >  Documentation/config/transfer.txt |  4 ++++
> >  protocol-caps.c                   | 11 +++++++++++
> >  protocol-caps.h                   |  6 ++++++
> >  serve.c                           |  2 +-
> >  4 files changed, 22 insertions(+), 1 deletion(-)
> >
> > diff --git a/Documentation/config/transfer.txt b/Documentation/config/transfer.txt
> > index b49429eb4d..14892a3155 100644
> > --- a/Documentation/config/transfer.txt
> > +++ b/Documentation/config/transfer.txt
> > @@ -77,3 +77,7 @@ transfer.unpackLimit::
> >  transfer.advertiseSID::
> >       Boolean. When true, client and server processes will advertise their
> >       unique session IDs to their remote counterpart. Defaults to false.
> > +
> > +transfer.advertiseObjectInfo::
> > +     Boolean. When true or not set, server processes will advertise its
> > +     ability to accept `object-info` command requests
> > \ No newline at end of file
>
> Carefully proofread before sending your changes out ;-)
>
> By mimicking the style of the previous item more closely, i.e.
>
>         Boolean. When true, ... requests. Defaults to true.
>
> it would make it easier to read, I suspect.
>
> > diff --git a/protocol-caps.c b/protocol-caps.c
> > index bbde91810a..f290e3cca2 100644
> > --- a/protocol-caps.c
> > +++ b/protocol-caps.c
> > @@ -8,6 +8,9 @@
> >  #include "object-store.h"
> >  #include "string-list.h"
> >  #include "strbuf.h"
> > +#include "config.h"
> > +
> > +static int advertise_object_info = -1;
>
> It strikes me somewhat odd that this is the _first_ such capability
> that needs a variable here to control if it is advertised or not
> depending on the configuration file settings.
>
> In other words, "Why doesn't transfer.advertiseSID also have a
> similar thing in this file?" should be the first question that comes
> to any reviewers' mind.
>
> Perhaps we just invented a better way to handle such conditional
> capability, in which case we may want to port the existing ones that
> do not use the same arrangement over to the new way.  Even though it
> would be outside the scope of the series, it would be a good way to
> make sure we won't accumulate too much technical debt to leave some
> hint somewhere near here or in protoco-caps.h where this patch
> touches.  I cannot quite tell what is going on.
>
> > int object_info_advertise(struct repository *r, struct strbuf *value)
> > {
> >       if (advertise_object_info == -1 &&
> >           git_config_get_bool("transfer.advertiseObjectInfo", &advertise_object_info))
> >               advertise_object_info = 0;
> >       return advertise_object_info;
> > }
>
> "If unset(yet), try to get_bool() and if it fails, set it to 0"
>
> So advertise_object_info becomes true only if transfer.advertiseObjectInfo
> is set to true?
>
> It is inconsistent with what we read in the documentation, isn't it?
>
> > diff --git a/protocol-caps.h b/protocol-caps.h
> > index 15c4550360..36f7a46b37 100644
> > --- a/protocol-caps.h
> > +++ b/protocol-caps.h
> > @@ -5,4 +5,10 @@ struct repository;
> >  struct packet_reader;
> >  int cap_object_info(struct repository *r, struct packet_reader *request);
> >
> > +/*
> > + * Advertises object-info capability if "objectinfo.advertise" is either
> > + * set to true or not set
> > + */
> > +int object_info_advertise(struct repository *r, struct strbuf *value);
> > +
> >  #endif /* PROTOCOL_CAPS_H */
> > diff --git a/serve.c b/serve.c
> > index b3fe9b5126..fb4ad367a7 100644
> > --- a/serve.c
> > +++ b/serve.c
> > @@ -133,7 +133,7 @@ static struct protocol_capability capabilities[] = {
> >       },
> >       {
> >               .name = "object-info",
> > -             .advertise = always_advertise,
> > +             .advertise = object_info_advertise,
> >               .command = cap_object_info,
> >       },
> >  };
>
> This is a good step to add a test to see if the server does honor
> the (1) lack of (2) true set to and (3) false set to the
> configuration variable and sends (or does not send) the capability.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 3/3] object-info: add option for retrieving object info
  2022-03-28 19:11     ` [PATCH v3 3/3] object-info: add option for retrieving object info Calvin Wan
  2022-03-29 19:57       ` Junio C Hamano
@ 2022-03-29 22:54       ` Junio C Hamano
  2022-03-30  0:49         ` Junio C Hamano
  2022-03-30 22:43         ` Jonathan Tan
  2022-03-29 23:19       ` Taylor Blau
                         ` (3 subsequent siblings)
  5 siblings, 2 replies; 103+ messages in thread
From: Junio C Hamano @ 2022-03-29 22:54 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Calvin Wan <calvinwan@google.com> writes:

> Add ‘--object-info’ option to fetch. This option allows the client to
> make an object-info command request to a server that supports protocol
> v2. If the server is v2, but does not allow for the object-info
> command request, fetch the objects as if it were a standard fetch
> (however without changing any refs). Therefore, hook `fetch
> object-info` into transport_fetch_refs() to easily fallback if needed.

Sorry, but I do not see anything common between "git fetch" that
fetches history and associated objects and "retrieving only object
info".  Other than "the way I happened to have chosen to implement,
what is used to implement fetch has the most commonality".

If we were to add more "server offloading", say "run blame on the
server side", are we going to piggy back over fetch-pack, too?

It is not necessarily bad way to experiment, to reuse the code to
establish connection, check if the other side is capable to serve
us, throw args at them and then retrieve the result, but exposing
that implemention detail makes a HORRIBLE UX to force users to say
"git fetch --blame='master Makefile' origin".

Shouldn't we be making the part that we truly need to reuse into a
separate API out of fetch-pack and either (1) allow new commands be
easily written reusing the code to create "git remote-object-info"
and "git remote-blame", or (2) come up with a single "do things on
remote" command with various subcommands, i.e. "git over-there
object-info" and "git over-there blame"?

> A previous patch added the `transfer.advertiseObjectInfo` config
> option, of which this patch utilizes to test the fallback.
>
> ---

Missing sign-off.

> diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt
> index 550c16ca61..a13d2ba7ad 100644
> --- a/Documentation/git-fetch.txt
> +++ b/Documentation/git-fetch.txt
> @@ -13,6 +13,7 @@ SYNOPSIS
>  'git fetch' [<options>] <group>
>  'git fetch' --multiple [<options>] [(<repository> | <group>)...]
>  'git fetch' --all [<options>]
> +'git fetch' --object-info=[<arguments>] <remote> [<object-ids>]

This is a start of slippery slope of making "fetch" a kitchen sink
that does not have much to do with "git fetch".  Let's not go this
route.

> @@ -2087,6 +2094,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	struct remote *remote = NULL;
>  	int result = 0;
>  	int prune_tags_ok = 1;
> +	struct oid_array object_info_oids = OID_ARRAY_INIT;
>  
>  	packet_trace_identity("fetch");
>  
> @@ -2168,7 +2176,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	if (dry_run)
>  		write_fetch_head = 0;
>  
> -	if (all) {
> +	if (object_info_options.nr > 0) {
> +		if (argc == 0 || argc == 1) {
> +			die(_("must supply remote and object ids when using --object-info"));
> +		} else {
> +			struct object_id oid;
> +			remote = remote_get(argv[0]);
> +			for (i = 1; i < argc; i++) {
> +				if (get_oid(argv[i], &oid))
> +					return error(_("malformed object id '%s'"), argv[i]);
> +				oid_array_append(&object_info_oids, &oid);
> +			}
> +		}
> +	} else if (all) {
>  		if (argc == 1)
>  			die(_("fetch --all does not take a repository argument"));
>  		else if (argc > 1)

Yuck.  Let's read this again, realize how little commonality we have
in processing args and barf.  That's all because we tried to
piggy-back on "fetch" because the patch wanted to reuse the parts
that are not shown in the patch text.  The right way to do so would
be to factor that "part that do not appear in the patch, because
they are common" out into callable from new code.  That way, we do
not have to contaminate this if/else conditional, both arms of which
is still about running "fetch" to retrieve history and connected
objects, and has nothing to do with "fetch object info".  The way
this patch changes this part of the code is an unnecessary risk to
break the normal "git fetch", and it does not have to be.

I like the idea to give client side a way to ask server to do random
things.  I do not think I want to see unrelated things piggy-backing
on existing client programs only because that is an expedient way to
implement it.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 1/3] fetch-pack: refactor packet writing and fetch options
  2022-03-28 19:11     ` [PATCH v3 1/3] fetch-pack: refactor packet writing and fetch options Calvin Wan
@ 2022-03-29 22:54       ` Junio C Hamano
  2022-03-29 23:01       ` Taylor Blau
  2022-03-30 21:55       ` Jonathan Tan
  2 siblings, 0 replies; 103+ messages in thread
From: Junio C Hamano @ 2022-03-29 22:54 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Calvin Wan <calvinwan@google.com> writes:

> -static void write_fetch_command_and_capabilities(struct strbuf *req_buf,
> -						 const struct string_list *server_options)
> +static void write_command_and_capabilities(struct strbuf *req_buf,
> +						 const struct string_list *server_options, const char* command)

Reindent the second line to take advantage of the fact that the
function name is now slightly shorter?

It is named "command" and "capabilities", but the parameters it
takes has the "command" at the end.  We probably want to swap them,
i.e.

static void write_command_and_capabilities(struct strbuf *req_buf,
					   const char *command,
					   const struct string_list *server_options)
	
Note that asterisk sticks to identifier, not to type, in this code base.

>  {
>  	const char *hash_name;
>  
> -	if (server_supports_v2("fetch", 1))
> -		packet_buf_write(req_buf, "command=fetch");
> +	if (server_supports_v2(command, 1))
> +		packet_buf_write(req_buf, "command=%s", command);
>  	if (server_supports_v2("agent", 0))
>  		packet_buf_write(req_buf, "agent=%s", git_user_agent_sanitized());
>  	if (advertise_sid && server_supports_v2("session-id", 0))
> @@ -1279,7 +1279,7 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
>  	int done_sent = 0;
>  	struct strbuf req_buf = STRBUF_INIT;
>  
> -	write_fetch_command_and_capabilities(&req_buf, args->server_options);
> +	write_command_and_capabilities(&req_buf, args->server_options, "fetch");
>  
>  	if (args->use_thin_pack)
>  		packet_buf_write(&req_buf, "thin-pack");

The above two hunks (and there is another one at the end) are
trivial no-op refactoring, and the first paragraph of the proposed
log message clearly explains it.

> @@ -1598,18 +1598,18 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  		reader.me = "fetch-pack";
>  	}
>  
> +	/* v2 supports these by default */
> +	allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1;
> +	use_sideband = 2;
> +	if (args->depth > 0 || args->deepen_since || args->deepen_not)
> +		args->deepen = 1;
> +
>  	while (state != FETCH_DONE) {
>  		switch (state) {
>  		case FETCH_CHECK_LOCAL:
>  			sort_ref_list(&ref, ref_compare_name);
>  			QSORT(sought, nr_sought, cmp_ref_by_name);
>  
> -			/* v2 supports these by default */
> -			allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1;
> -			use_sideband = 2;
> -			if (args->depth > 0 || args->deepen_since || args->deepen_not)
> -				args->deepen = 1;
> -
>  			/* Filter 'ref' by 'sought' and those that aren't local */
>  			mark_complete_and_common_ref(negotiator, args, &ref);
>  			filter_refs(args, &ref, sought, nr_sought);

The second paragraph of the proposed log message touched briefly
about the existence of this change.  It is a no-op for the current
code if two conditions are met: (1) the initial state is CHECK_LOCAL,
(2) we only enter CHECK_LOCAL once and never return to it.  And they
both hold true, so this is a no-op change.

But I am not sure if it is a good idea to have this in this patch.
Possibilities are (1) this patch as-is is OK, (2) move it in a
separate patch as it has nothing to do with the other refactoring,
or (3) do it as part of the change that wants this change.

I may form an opinion once I see the step that wants this change,
but not yet.

> @@ -2060,7 +2060,7 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
>  		int received_ready = 0;
>  
>  		strbuf_reset(&req_buf);
> -		write_fetch_command_and_capabilities(&req_buf, server_options);
> +		write_command_and_capabilities(&req_buf, server_options, "fetch");
>  
>  		packet_buf_write(&req_buf, "wait-for-done");

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 1/3] fetch-pack: refactor packet writing and fetch options
  2022-03-28 19:11     ` [PATCH v3 1/3] fetch-pack: refactor packet writing and fetch options Calvin Wan
  2022-03-29 22:54       ` Junio C Hamano
@ 2022-03-29 23:01       ` Taylor Blau
  2022-03-30 21:55       ` Jonathan Tan
  2 siblings, 0 replies; 103+ messages in thread
From: Taylor Blau @ 2022-03-29 23:01 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

On Mon, Mar 28, 2022 at 07:11:10PM +0000, Calvin Wan wrote:
> A subsequent commit will need to write capabilities for another
> command, so refactor write_fetch_command_and_capabilities() to be able
> to used by both fetch and future command.

Makes obvious sense, and this was something that jumped out to me when I
looked at the first and second versions of this patch. I'm glad that
this is getting factored out.

> Move fetch options set in `FETCH_CHECK_LOCAL` from the fetch state
> machine to above the state machine so it is set by default. The
> initial state of the state machine is always `FETCH_CHECK_LOCAL` so
> this does not affect any current functionality. This change prepares
> for a subsequent commit that doesn't need to check the local state, but
> still requires those options to be set before sending the fetch request.

This took a little more thinking, but I agree that this is behaviorally
equivalent. The key points are (and here I'm basically restating what
you already wrote):

  - when the state machine in do_fetch_pack_v2() is in
    FETCH_CHECK_LOCAL, we set a few v2-specific defaults

  - it will be helpful for a future patch to have these defaults set
    _before_ we enter the main loop

  - and we can safely make that change, since the initial state is
    always FETCH_CHECK_LOCAL anyway, so we're guaranteed to hit that
    code whether it's in the loop or not.

So this seems safe to me, too. I'll have to see why this is necessary,
but I suspect we'll find out in subsequent patches, so let's read on...

Thanks,
Taylor

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 2/3] transfer.advertiseObjectInfo: add object-info config
  2022-03-29 22:48         ` Calvin Wan
@ 2022-03-29 23:09           ` Taylor Blau
  0 siblings, 0 replies; 103+ messages in thread
From: Taylor Blau @ 2022-03-29 23:09 UTC (permalink / raw)
  To: Calvin Wan; +Cc: Junio C Hamano, git, jonathantanmy, philipoakley, johncai86

On Tue, Mar 29, 2022 at 03:48:28PM -0700, Calvin Wan wrote:
> > It strikes me somewhat odd that this is the _first_ such capability
> > that needs a variable here to control if it is advertised or not
> > depending on the configuration file settings.
>
> > In other words, "Why doesn't transfer.advertiseSID also have a
> > similar thing in this file?" should be the first question that comes
> > to any reviewers' mind.
>
> It doesn't exist in protocol-caps.c, but instead in serve.c. Would be
> a good discussion for whether the advertise function should be serve.c
> or protocol-caps.c

Most of the others are defined statically within serve.c, but there are
a couple of exceptions:

  - `ls_refs_advertise()` is defined in ls-refs.c, and
  - `upload_pack_advertise()` is defined in upload-pack.c.

Even though cap_object_info() is defined in protocol-caps.c, I think we
should probably keep the definition of `object_info_advertise()` local
to serve.c.

If there were a object-info.c, it would make sense to put it there, but
since there isn't, it makes more sense to keep it local to serve.c,
which avoids us having to declare the function in a header file.

Thanks,
Taylor

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 3/3] object-info: add option for retrieving object info
  2022-03-28 19:11     ` [PATCH v3 3/3] object-info: add option for retrieving object info Calvin Wan
  2022-03-29 19:57       ` Junio C Hamano
  2022-03-29 22:54       ` Junio C Hamano
@ 2022-03-29 23:19       ` Taylor Blau
  2022-03-30 22:47         ` Calvin Wan
  2022-03-30 22:06       ` John Cai
                         ` (2 subsequent siblings)
  5 siblings, 1 reply; 103+ messages in thread
From: Taylor Blau @ 2022-03-29 23:19 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Hi Calvin,

Junio took a deeper look below, but here are a few small code-hygiene
issues that I noticed while taking a quicker look through this patch.

On Mon, Mar 28, 2022 at 07:11:12PM +0000, Calvin Wan wrote:
> Sometimes it is useful to get information about an object without
> having to download it completely. The server logic has already been
> implemented as “a2ba162cda (object-info: support for retrieving object
> info, 2021-04-20)”. This patch implements the client option for
> it. Currently, only 'size' is supported, however future patches can
> implement additional options.
>
> Add ‘--object-info’ option to fetch. This option allows the client to

The single-quotes here look like smart quotes. I don't think we forbid
these explicitly within commit messages, but using the standard '
(ASCII 0x27) character is more common.

> make an object-info command request to a server that supports protocol
> v2. If the server is v2, but does not allow for the object-info
> command request, fetch the objects as if it were a standard fetch
> (however without changing any refs). Therefore, hook `fetch
> object-info` into transport_fetch_refs() to easily fallback if needed.
>
> A previous patch added the `transfer.advertiseObjectInfo` config
> option, of which this patch utilizes to test the fallback.
>
> ---
>  Documentation/fetch-options.txt |   5 ++
>  Documentation/git-fetch.txt     |   1 +
>  builtin/fetch.c                 |  36 ++++++++-
>  fetch-pack.c                    |  42 +++++++++-
>  fetch-pack.h                    |   9 +++
>  t/t5583-fetch-object-info.sh    | 138 ++++++++++++++++++++++++++++++++
>  transport-helper.c              |   8 +-
>  transport-internal.h            |   1 +
>  transport.c                     |  75 ++++++++++++++++-
>  transport.h                     |   9 +++
>  10 files changed, 315 insertions(+), 9 deletions(-)
>  create mode 100755 t/t5583-fetch-object-info.sh
>
> diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
> index f903683189..f1ca95c32d 100644
> --- a/Documentation/fetch-options.txt
> +++ b/Documentation/fetch-options.txt
> @@ -299,3 +299,8 @@ endif::git-pull[]
>  -6::
>  --ipv6::
>  	Use IPv6 addresses only, ignoring IPv4 addresses.
> +
> +--object-info=[<arguments>]::
> +	Fetches only the relevant information passed in arguments from a
> +	specific remote and set of object ids. Object 'size' is currently
> +	the only supported argument.
> diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt
> index 550c16ca61..a13d2ba7ad 100644
> --- a/Documentation/git-fetch.txt
> +++ b/Documentation/git-fetch.txt
> @@ -13,6 +13,7 @@ SYNOPSIS
>  'git fetch' [<options>] <group>
>  'git fetch' --multiple [<options>] [(<repository> | <group>)...]
>  'git fetch' --all [<options>]
> +'git fetch' --object-info=[<arguments>] <remote> [<object-ids>]
>
>
>  DESCRIPTION
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index 4d12c2fd4d..0b21bebfca 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -29,6 +29,9 @@
>  #include "commit-graph.h"
>  #include "shallow.h"
>  #include "worktree.h"
> +#include "protocol.h"
> +#include "pkt-line.h"
> +#include "connect.h"
>
>  #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
>
> @@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
>  	N_("git fetch [<options>] <group>"),
>  	N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
>  	N_("git fetch --all [<options>]"),
> +	N_("git fetch --object-info=[<arguments>] <remote> [<object-ids>]"),
>  	NULL
>  };
>
> @@ -86,6 +90,7 @@ static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
>  static int fetch_write_commit_graph = -1;
>  static int stdin_refspecs = 0;
>  static int negotiate_only;
> +static struct string_list object_info_options = STRING_LIST_INIT_NODUP;
>
>  static int git_fetch_config(const char *k, const char *v, void *cb)
>  {
> @@ -221,6 +226,8 @@ static struct option builtin_fetch_options[] = {
>  		 N_("write the commit-graph after fetching")),
>  	OPT_BOOL(0, "stdin", &stdin_refspecs,
>  		 N_("accept refspecs from stdin")),
> +	OPT_STRING_LIST(0, "object-info", &object_info_options, N_("option"),
> +		 N_("command request arguments")),
>  	OPT_END()
>  };
>
> @@ -2087,6 +2094,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	struct remote *remote = NULL;
>  	int result = 0;
>  	int prune_tags_ok = 1;
> +	struct oid_array object_info_oids = OID_ARRAY_INIT;
>
>  	packet_trace_identity("fetch");
>
> @@ -2168,7 +2176,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	if (dry_run)
>  		write_fetch_head = 0;
>
> -	if (all) {
> +	if (object_info_options.nr > 0) {

Small nit; since object_info_options.nr should never be negative, we
should instead write "if (object_info_options)".

> +		if (argc == 0 || argc == 1) {
> +			die(_("must supply remote and object ids when using --object-info"));
> +		} else {
> +			struct object_id oid;
> +			remote = remote_get(argv[0]);
> +			for (i = 1; i < argc; i++) {
> +				if (get_oid(argv[i], &oid))
> +					return error(_("malformed object id '%s'"), argv[i]);
> +				oid_array_append(&object_info_oids, &oid);
> +			}
> +		}
> +	} else if (all) {
>  		if (argc == 1)
>  			die(_("fetch --all does not take a repository argument"));
>  		else if (argc > 1)
> @@ -2199,7 +2219,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  		}
>  	}
>
> -	if (negotiate_only) {
> +	if (object_info_options.nr > 0) {
> +		if (!remote)
> +			die(_("must supply remote when using --object-info"));
> +		gtransport = prepare_transport(remote, 1);
> +		if (gtransport->smart_options) {
> +			gtransport->smart_options->object_info = 1;
> +			gtransport->smart_options->object_info_oids = &object_info_oids;
> +			gtransport->smart_options->object_info_options = &object_info_options;
> +		}
> +		if (server_options.nr)
> +			gtransport->server_options = &server_options;
> +		result = transport_fetch_refs(gtransport, NULL);
> +	} else if (negotiate_only) {
>  		struct oidset acked_commits = OIDSET_INIT;
>  		struct oidset_iter iter;
>  		const struct object_id *oid;
> diff --git a/fetch-pack.c b/fetch-pack.c
> index b709a61baf..36e3d1c5d0 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -1269,6 +1269,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
>  	packet_buf_delim(req_buf);
>  }
>
> +void send_object_info_request(int fd_out, struct object_info_args *args)
> +{
> +	struct strbuf req_buf = STRBUF_INIT;
> +	int i;

I don't think this would ever be practically exploit-able, since we'd
run past the argument limit long before we hit INT_MAX, but this should
be a size_t instead of an int to match the type of args->oid->nr.

> +
> +	write_command_and_capabilities(&req_buf, args->server_options, "object-info");
> +
> +	if (string_list_has_string(args->object_info_options, "size"))
> +		packet_buf_write(&req_buf, "size");
> +
> +	for (i = 0; i < args->oids->nr; i++) {
> +		packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
> +	}

Small nit, the braces around this for-loop are not required.

>  static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
>  			      struct fetch_pack_args *args,
>  			      const struct ref *wants, struct oidset *common,
> @@ -1604,6 +1625,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  	if (args->depth > 0 || args->deepen_since || args->deepen_not)
>  		args->deepen = 1;
>
> +	if (args->object_info) {
> +		state = FETCH_SEND_REQUEST;
> +	}
> +

Ditto here.

>  	while (state != FETCH_DONE) {
>  		switch (state) {
>  		case FETCH_CHECK_LOCAL:
> @@ -1613,7 +1638,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  			/* Filter 'ref' by 'sought' and those that aren't local */
>  			mark_complete_and_common_ref(negotiator, args, &ref);
>  			filter_refs(args, &ref, sought, nr_sought);
> -			if (everything_local(args, &ref))
> +			if (!args->object_info && everything_local(args, &ref))
>  				state = FETCH_DONE;
>  			else
>  				state = FETCH_SEND_REQUEST;
> @@ -1999,8 +2024,19 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
>  		}
>  		args->connectivity_checked = 1;
>  	}
> -
> -	update_shallow(args, sought, nr_sought, &si);
> +
> +	if (args->object_info) {
> +		struct ref *ref_cpy_reader = ref_cpy;
> +		unsigned long size = 0;
> +		while (ref_cpy_reader) {
> +			oid_object_info(the_repository, &(ref_cpy_reader->old_oid), &size);
> +			printf("%s %li\n", oid_to_hex(&(ref_cpy_reader->old_oid)), size);

Both expressions like "&(x)" can be written instead as "&x" without the
outer parenthesis.

Likewise, we do not use %li, that line should instead read:

    printf("%s %"PRIuMAX"\n", oid_to_hex(&ref_cpy_reader->old_oid), (uintmax_t)size);

Thanks,
Taylor

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 3/3] object-info: add option for retrieving object info
  2022-03-29 22:54       ` Junio C Hamano
@ 2022-03-30  0:49         ` Junio C Hamano
  2022-03-30 22:31           ` Calvin Wan
  2022-03-30 22:43         ` Jonathan Tan
  1 sibling, 1 reply; 103+ messages in thread
From: Junio C Hamano @ 2022-03-30  0:49 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Junio C Hamano <gitster@pobox.com> writes:

> Shouldn't we be making the part that we truly need to reuse into a
> separate API out of fetch-pack and either (1) allow new commands be
> easily written reusing the code to create "git remote-object-info"
> and "git remote-blame", or (2) come up with a single "do things on
> remote" command with various subcommands, i.e. "git over-there
> object-info" and "git over-there blame"?

For completeness, we could also make the "git archive" the gold
standard.  If we want "git blame" that is mostly executed on the
remote site, we could teach "git blame --remote <there>", which may
be where a user, who does not know nor care about how things are
implemented, would expect to find the feature.  Is "object-info" in
principle like executing "cat-file --batch-check" remotely?

I think that "Object 'size' is currently the only one the server
knows to emit", combined with the lack of any client side support so
far, means that it is not too late to rework the object-info thing
and possibly reuse more from "cat-file --batch-check".

If we can create a system where running things locally and remotely
are comparable in feature, that would be great, but I do not think
that is a realistic goal to aim for.

Thoughts?




^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 1/3] fetch-pack: refactor packet writing and fetch options
  2022-03-28 19:11     ` [PATCH v3 1/3] fetch-pack: refactor packet writing and fetch options Calvin Wan
  2022-03-29 22:54       ` Junio C Hamano
  2022-03-29 23:01       ` Taylor Blau
@ 2022-03-30 21:55       ` Jonathan Tan
  2 siblings, 0 replies; 103+ messages in thread
From: Jonathan Tan @ 2022-03-30 21:55 UTC (permalink / raw)
  To: Calvin Wan; +Cc: Jonathan Tan, git, philipoakley, johncai86

Calvin Wan <calvinwan@google.com> writes:
> A subsequent commit will need to write capabilities for another
> command, so refactor write_fetch_command_and_capabilities() to be able
> to used by both fetch and future command.
> 
> Move fetch options set in `FETCH_CHECK_LOCAL` from the fetch state
> machine to above the state machine so it is set by default. The
> initial state of the state machine is always `FETCH_CHECK_LOCAL` so
> this does not affect any current functionality. This change prepares
> for a subsequent commit that doesn't need to check the local state, but
> still requires those options to be set before sending the fetch request.

I think it would be clearer to rephrase "that doesn't need to check the
local state" to "that will skip FETCH_CHECK_LOCAL". It does literally
mean the same thing, but I had a mental block with "initial state...is
always FETCH_CHECK_LOCAL" in the preceding sentence.

I agree with some of the others who said that this change should go into
its own patch.

Otherwise, this patch makes sense.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 3/3] object-info: add option for retrieving object info
  2022-03-28 19:11     ` [PATCH v3 3/3] object-info: add option for retrieving object info Calvin Wan
                         ` (2 preceding siblings ...)
  2022-03-29 23:19       ` Taylor Blau
@ 2022-03-30 22:06       ` John Cai
  2022-03-31 19:56         ` Calvin Wan
  2022-03-30 22:07       ` Jonathan Tan
  2022-03-30 22:12       ` Josh Steadmon
  5 siblings, 1 reply; 103+ messages in thread
From: John Cai @ 2022-03-30 22:06 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley



On 28 Mar 2022, at 15:11, Calvin Wan wrote:

> Sometimes it is useful to get information about an object without
> having to download it completely. The server logic has already been
> implemented as “a2ba162cda (object-info: support for retrieving object
> info, 2021-04-20)”. This patch implements the client option for
> it. Currently, only 'size' is supported, however future patches can
> implement additional options.
>
> Add ‘--object-info’ option to fetch. This option allows the client to
> make an object-info command request to a server that supports protocol
> v2. If the server is v2, but does not allow for the object-info
> command request, fetch the objects as if it were a standard fetch
> (however without changing any refs). Therefore, hook `fetch
> object-info` into transport_fetch_refs() to easily fallback if needed.
>
> A previous patch added the `transfer.advertiseObjectInfo` config
> option, of which this patch utilizes to test the fallback.
>
> ---
>  Documentation/fetch-options.txt |   5 ++
>  Documentation/git-fetch.txt     |   1 +
>  builtin/fetch.c                 |  36 ++++++++-
>  fetch-pack.c                    |  42 +++++++++-
>  fetch-pack.h                    |   9 +++
>  t/t5583-fetch-object-info.sh    | 138 ++++++++++++++++++++++++++++++++
>  transport-helper.c              |   8 +-
>  transport-internal.h            |   1 +
>  transport.c                     |  75 ++++++++++++++++-
>  transport.h                     |   9 +++
>  10 files changed, 315 insertions(+), 9 deletions(-)
>  create mode 100755 t/t5583-fetch-object-info.sh
>
> diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
> index f903683189..f1ca95c32d 100644
> --- a/Documentation/fetch-options.txt
> +++ b/Documentation/fetch-options.txt
> @@ -299,3 +299,8 @@ endif::git-pull[]
>  -6::
>  --ipv6::
>  	Use IPv6 addresses only, ignoring IPv4 addresses.
> +
> +--object-info=[<arguments>]::
> +	Fetches only the relevant information passed in arguments from a
> +	specific remote and set of object ids. Object 'size' is currently
> +	the only supported argument.
> diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt
> index 550c16ca61..a13d2ba7ad 100644
> --- a/Documentation/git-fetch.txt
> +++ b/Documentation/git-fetch.txt
> @@ -13,6 +13,7 @@ SYNOPSIS
>  'git fetch' [<options>] <group>
>  'git fetch' --multiple [<options>] [(<repository> | <group>)...]
>  'git fetch' --all [<options>]
> +'git fetch' --object-info=[<arguments>] <remote> [<object-ids>]
>
>
>  DESCRIPTION
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index 4d12c2fd4d..0b21bebfca 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -29,6 +29,9 @@
>  #include "commit-graph.h"
>  #include "shallow.h"
>  #include "worktree.h"
> +#include "protocol.h"
> +#include "pkt-line.h"
> +#include "connect.h"
>
>  #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
>
> @@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
>  	N_("git fetch [<options>] <group>"),
>  	N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
>  	N_("git fetch --all [<options>]"),
> +	N_("git fetch --object-info=[<arguments>] <remote> [<object-ids>]"),
>  	NULL
>  };
>
> @@ -86,6 +90,7 @@ static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
>  static int fetch_write_commit_graph = -1;
>  static int stdin_refspecs = 0;
>  static int negotiate_only;
> +static struct string_list object_info_options = STRING_LIST_INIT_NODUP;
>
>  static int git_fetch_config(const char *k, const char *v, void *cb)
>  {
> @@ -221,6 +226,8 @@ static struct option builtin_fetch_options[] = {
>  		 N_("write the commit-graph after fetching")),
>  	OPT_BOOL(0, "stdin", &stdin_refspecs,
>  		 N_("accept refspecs from stdin")),
> +	OPT_STRING_LIST(0, "object-info", &object_info_options, N_("option"),
> +		 N_("command request arguments")),
>  	OPT_END()
>  };
>
> @@ -2087,6 +2094,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	struct remote *remote = NULL;
>  	int result = 0;
>  	int prune_tags_ok = 1;
> +	struct oid_array object_info_oids = OID_ARRAY_INIT;
>
>  	packet_trace_identity("fetch");
>
> @@ -2168,7 +2176,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	if (dry_run)
>  		write_fetch_head = 0;
>
> -	if (all) {
> +	if (object_info_options.nr > 0) {
> +		if (argc == 0 || argc == 1) {
> +			die(_("must supply remote and object ids when using --object-info"));
> +		} else {
> +			struct object_id oid;
> +			remote = remote_get(argv[0]);
> +			for (i = 1; i < argc; i++) {
> +				if (get_oid(argv[i], &oid))
> +					return error(_("malformed object id '%s'"), argv[i]);
> +				oid_array_append(&object_info_oids, &oid);
> +			}
> +		}
> +	} else if (all) {
>  		if (argc == 1)
>  			die(_("fetch --all does not take a repository argument"));
>  		else if (argc > 1)
> @@ -2199,7 +2219,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  		}
>  	}
>
> -	if (negotiate_only) {
> +	if (object_info_options.nr > 0) {
> +		if (!remote)
> +			die(_("must supply remote when using --object-info"));
> +		gtransport = prepare_transport(remote, 1);
> +		if (gtransport->smart_options) {
> +			gtransport->smart_options->object_info = 1;
> +			gtransport->smart_options->object_info_oids = &object_info_oids;
> +			gtransport->smart_options->object_info_options = &object_info_options;
> +		}
> +		if (server_options.nr)
> +			gtransport->server_options = &server_options;
> +		result = transport_fetch_refs(gtransport, NULL);
> +	} else if (negotiate_only) {
>  		struct oidset acked_commits = OIDSET_INIT;
>  		struct oidset_iter iter;
>  		const struct object_id *oid;
> diff --git a/fetch-pack.c b/fetch-pack.c
> index b709a61baf..36e3d1c5d0 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -1269,6 +1269,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
>  	packet_buf_delim(req_buf);
>  }
>
> +void send_object_info_request(int fd_out, struct object_info_args *args)
> +{
> +	struct strbuf req_buf = STRBUF_INIT;
> +	int i;
> +
> +	write_command_and_capabilities(&req_buf, args->server_options, "object-info");
> +
> +	if (string_list_has_string(args->object_info_options, "size"))
> +		packet_buf_write(&req_buf, "size");
> +
> +	for (i = 0; i < args->oids->nr; i++) {
> +		packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
> +	}
> +
> +	packet_buf_flush(&req_buf);
> +	if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
> +		die_errno(_("unable to write request to remote"));
> +
> +	strbuf_release(&req_buf);
> +}
> +
>  static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
>  			      struct fetch_pack_args *args,
>  			      const struct ref *wants, struct oidset *common,
> @@ -1604,6 +1625,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  	if (args->depth > 0 || args->deepen_since || args->deepen_not)
>  		args->deepen = 1;
>
> +	if (args->object_info) {
> +		state = FETCH_SEND_REQUEST;
> +	}
> +
>  	while (state != FETCH_DONE) {
>  		switch (state) {
>  		case FETCH_CHECK_LOCAL:
> @@ -1613,7 +1638,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  			/* Filter 'ref' by 'sought' and those that aren't local */
>  			mark_complete_and_common_ref(negotiator, args, &ref);
>  			filter_refs(args, &ref, sought, nr_sought);
> -			if (everything_local(args, &ref))
> +			if (!args->object_info && everything_local(args, &ref))
>  				state = FETCH_DONE;
>  			else
>  				state = FETCH_SEND_REQUEST;
> @@ -1999,8 +2024,19 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
>  		}
>  		args->connectivity_checked = 1;
>  	}
> -
> -	update_shallow(args, sought, nr_sought, &si);
> +	
> +	if (args->object_info) {
> +		struct ref *ref_cpy_reader = ref_cpy;
> +		unsigned long size = 0;
> +		while (ref_cpy_reader) {
> +			oid_object_info(the_repository, &(ref_cpy_reader->old_oid), &size);
> +			printf("%s %li\n", oid_to_hex(&(ref_cpy_reader->old_oid)), size);
> +			ref_cpy_reader = ref_cpy_reader->next;
> +		}
> +	}
> +	else {
> +		update_shallow(args, sought, nr_sought, &si);
> +	}
>  cleanup:
>  	clear_shallow_info(&si);
>  	oid_array_clear(&shallows_scratch);
> diff --git a/fetch-pack.h b/fetch-pack.h
> index 7f94a2a583..23e69bb218 100644
> --- a/fetch-pack.h
> +++ b/fetch-pack.h
> @@ -42,6 +42,7 @@ struct fetch_pack_args {
>  	unsigned update_shallow:1;
>  	unsigned reject_shallow_remote:1;
>  	unsigned deepen:1;
> +	unsigned object_info:1;
>
>  	/*
>  	 * Indicate that the remote of this request is a promisor remote. The
> @@ -68,6 +69,12 @@ struct fetch_pack_args {
>  	unsigned connectivity_checked:1;
>  };
>
> +struct object_info_args {
> +	const struct string_list *object_info_options;
> +	const struct string_list *server_options;
> +	const struct oid_array *oids;
> +};
> +
>  /*
>   * sought represents remote references that should be updated from.
>   * On return, the names that were found on the remote will have been
> @@ -101,4 +108,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
>   */
>  int report_unmatched_refs(struct ref **sought, int nr_sought);
>
> +void send_object_info_request(int fd_out, struct object_info_args *args);
> +
>  #endif
> diff --git a/t/t5583-fetch-object-info.sh b/t/t5583-fetch-object-info.sh
> new file mode 100755
> index 0000000000..e5db6d953c
> --- /dev/null
> +++ b/t/t5583-fetch-object-info.sh
> @@ -0,0 +1,138 @@
> +#!/bin/sh
> +
> +test_description='test git fetch object-info version 2'
> +
> +TEST_NO_CREATE_REPO=1
> +
> +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
> +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
> +
> +. ./test-lib.sh
> +
> +# Test fetch object-info with 'git://' transport
> +
> +. "$TEST_DIRECTORY"/lib-git-daemon.sh
> +start_git_daemon --export-all --enable=receive-pack
> +daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent
> +
> +
> +test_expect_success 'create repo to be served by git-daemon' '
> +	git init "$daemon_parent" &&
> +	test_commit -C "$daemon_parent" message1 a.txt
> +'
> +
> +test_expect_success 'fetch object-info with git:// using protocol v2' '
> +	(
> +		cd "$daemon_parent" &&
> +		object_id=$(git rev-parse message1:a.txt) &&
> +		length=$(wc -c <a.txt) &&
> +
> +		printf "%s %d\n" "$object_id" "$length" >expect &&
> +		git -c protocol.version=2 fetch --object-info=size "$GIT_DAEMON_URL/parent" "$object_id" >actual &&
> +		test_cmp expect actual
> +	)
> +'
> +stop_git_daemon
> +
> +# Test protocol v2 with 'http://' transport
> +
> +. "$TEST_DIRECTORY"/lib-httpd.sh
> +start_httpd
> +
> +test_expect_success 'create repo to be served by http:// transport' '
> +	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +	git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true &&
> +	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message1 a.txt &&
> +	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message2 b.txt
> +'
> +
> +test_expect_success 'fetch object-info with http:// using protocol v2' '
> +	(
> +		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +		object_id=$(git rev-parse message1:a.txt) &&
> +		length=$(wc -c <a.txt) &&
> +
> +		printf "%s %d\n" "$object_id" "$length" >expect &&
> +		git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual &&
> +		test_cmp expect actual
> +	)
> +'
> +
> +test_expect_success 'fetch object-info for multiple objects' '
> +	(
> +		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +		object_id1=$(git rev-parse message1:a.txt) &&
> +		object_id2=$(git rev-parse message2:b.txt) &&
> +		length1=$(wc -c <a.txt) &&
> +		length2=$(wc -c <b.txt) &&
> +
> +		printf "%s %d\n" "$object_id1" "$length1" >expect &&
> +		printf "%s %d\n" "$object_id2" "$length2" >>expect &&
> +		git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id1" "$object_id2" >actual &&
> +		test_cmp expect actual
> +	)
> +'
> +
> +test_expect_success 'fetch object-info fallbacks to standard fetch if object-info is not supported by the server' '
> +	(
> +		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +		object_id=$(git rev-parse message1:a.txt) &&
> +		length=$(wc -c <a.txt) &&
> +
> +		printf "%s %d\n" "$object_id" "$length" >expect &&
> +		git config objectinfo.advertise false &&
> +		git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual &&
> +		test_cmp expect actual
> +	)

I noticed this test doesn't confirm that standard fetch was actually used. Would
it be useful to check for the existence of the object to prove it got fetched?

Also, is seems the way this test is set up, it's fetching from itself.
This is logically fine in most cases that client == server, but it was a little
confusing at first because it looked like the invocation of git config was on
the client rather than the server, until I realized they are the same.

Since the server is being used as the client, we wouldn't be able to confirm that
the object got fetched. I realize for most of these tests, it doesn't really matter
to conflate client with server. I think this is the one exception.

But, as a larger question for the list, I'm wondering if it's best practice to separate client from
server in tests, or if it's an accepted style to just use one repository in cases
when it doesn't really matter?

Thanks
John

> +'
> +
> +test_expect_success 'fetch object-info fails on server with legacy protocol' '
> +	(
> +		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +		object_id=$(git rev-parse message1:a.txt) &&
> +		test_must_fail git -c protocol.version=0 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err &&
> +		test_i18ngrep "object-info requires protocol v2" err
> +	)
> +'
> +
> +test_expect_success 'fetch object-info fails on malformed OID' '
> +	(
> +		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +		malformed_object_id="this_id_is_not_valid" &&
> +		test_must_fail git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$malformed_object_id" 2>err &&
> +		test_i18ngrep "malformed object id '$malformed_object_id'" err
> +	)
> +'
> +
> +test_expect_success 'fetch object-info fails on missing OID' '
> +	git clone "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" missing_oid_repo &&
> +	test_commit -C missing_oid_repo message3 c.txt &&
> +	(
> +		cd missing_oid_repo &&
> +		object_id=$(git rev-parse message3:c.txt) &&
> +		test_must_fail env GIT_TRACE_PACKET=1 git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err &&
> +		test_i18ngrep "fatal: remote error: upload-pack: not our ref $object_id" err
> +	)
> +'
> +
> +# Test fetch object-info with 'file://' transport
> +
> +test_expect_success 'create repo to be served by file:// transport' '
> +	git init server &&
> +	test_commit -C server message1 a.txt &&
> +	git -C server config protocol.version 2
> +'
> +
> +test_expect_success 'fetch object-info with file:// using protocol v2' '
> +	(
> +		cd server &&
> +		object_id=$(git rev-parse message1:a.txt) &&
> +		length=$(wc -c <a.txt) &&
> +
> +		printf "%s %d\n" "$object_id" "$length" >expect &&
> +		git -c protocol.version=2 fetch --object-info=size "file://$(pwd)" "$object_id" >actual &&
> +		test_cmp expect actual
> +	)
> +'
> +
> +test_done
> diff --git a/transport-helper.c b/transport-helper.c
> index a0297b0986..64c175e904 100644
> --- a/transport-helper.c
> +++ b/transport-helper.c
> @@ -686,13 +686,17 @@ static int fetch_refs(struct transport *transport,
>
>  	/*
>  	 * If we reach here, then the server, the client, and/or the transport
> -	 * helper does not support protocol v2. --negotiate-only requires
> -	 * protocol v2.
> +	 * helper does not support protocol v2. --negotiate-only and --object-info
> +	 * require protocol v2.
>  	 */
>  	if (data->transport_options.acked_commits) {
>  		warning(_("--negotiate-only requires protocol v2"));
>  		return -1;
>  	}
> +	if (transport->smart_options->object_info) {
> +		warning(_("--object-info requires protocol v2"));
> +		return -1;
> +	}
>
>  	if (!data->get_refs_list_called)
>  		get_refs_list_using_list(transport, 0);
> diff --git a/transport-internal.h b/transport-internal.h
> index c4ca0b733a..04fa015011 100644
> --- a/transport-internal.h
> +++ b/transport-internal.h
> @@ -59,6 +59,7 @@ struct transport_vtable {
>  	 * use. disconnect() releases these resources.
>  	 **/
>  	int (*disconnect)(struct transport *connection);
> +	int (*fetch_object_info)(struct transport *transport, struct oid_array *oids);
>  };
>
>  #endif
> diff --git a/transport.c b/transport.c
> index 70e9840a90..65a1b1fdb3 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -350,6 +350,67 @@ static struct ref *handshake(struct transport *transport, int for_push,
>  	return refs;
>  }
>
> +/*
> + * Fetches object info if server supports object-info command
> + * Make sure to fallback to normal fetch if this fails
> + */
> +static int fetch_object_info(struct transport *transport)
> +{
> +	struct git_transport_data *data = transport->data;
> +	struct object_info_args args;
> +	struct packet_reader reader;
> +
> +	memset(&args, 0, sizeof(args));
> +	args.server_options = transport->server_options;
> +	args.object_info_options = transport->smart_options->object_info_options;
> +	args.oids = transport->smart_options->object_info_oids;
> +
> +	connect_setup(transport, 0);
> +	packet_reader_init(&reader, data->fd[0], NULL, 0,
> +			PACKET_READ_CHOMP_NEWLINE |
> +			PACKET_READ_DIE_ON_ERR_PACKET);
> +	data->version = discover_version(&reader);
> +
> +	transport->hash_algo = reader.hash_algo;
> +
> +	switch (data->version) {
> +	case protocol_v2:
> +		if (!server_supports_v2("object-info", 0))
> +			return 0;
> +		send_object_info_request(data->fd[1], &args);
> +		break;
> +	case protocol_v1:
> +	case protocol_v0:
> +		die(_("wrong protocol version. expected v2"));
> +	case protocol_unknown_version:
> +		BUG("unknown protocol version");
> +	}
> +
> +	if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> +		die(_("error reading object info header"));
> +	}
> +	if (strcmp(reader.line, "size")) {
> +		die(_("expected 'size', received '%s'"), reader.line);
> +	}
> +	while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
> +		printf("%s\n", reader.line);
> +	}
> +	check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> +
> +	return 1;
> +}
> +
> +static int append_oid_to_ref(const struct object_id *oid, void *data)
> +{
> +	struct ref *ref = data;
> +	struct ref *temp_ref = xcalloc(1, sizeof (struct ref));
> +	temp_ref->old_oid = *oid;
> +	temp_ref->exact_oid = 1;
> +	ref->next = temp_ref;
> +	ref = ref->next;
> +	return 0;
> +}
> +
>  static struct ref *get_refs_via_connect(struct transport *transport, int for_push,
>  					struct transport_ls_refs_options *options)
>  {
> @@ -364,6 +425,7 @@ static int fetch_refs_via_pack(struct transport *transport,
>  	struct ref *refs = NULL;
>  	struct fetch_pack_args args;
>  	struct ref *refs_tmp = NULL;
> +	struct ref *wanted_refs = xcalloc(1, sizeof (struct ref));
>
>  	memset(&args, 0, sizeof(args));
>  	args.uploadpack = data->options.uploadpack;
> @@ -388,8 +450,17 @@ static int fetch_refs_via_pack(struct transport *transport,
>  	args.server_options = transport->server_options;
>  	args.negotiation_tips = data->options.negotiation_tips;
>  	args.reject_shallow_remote = transport->smart_options->reject_shallow;
> +	args.object_info = transport->smart_options->object_info;
>
> -	if (!data->got_remote_heads) {
> +	if (transport->smart_options && transport->smart_options->object_info) {
> +		struct ref *temp_ref = wanted_refs;
> +		if (fetch_object_info(transport)) {
> +			goto cleanup;
> +		}
> +		oid_array_for_each(transport->smart_options->object_info_oids, append_oid_to_ref, temp_ref);
> +		wanted_refs = wanted_refs->next;
> +		transport->remote_refs = wanted_refs;
> +	} else if (!data->got_remote_heads) {
>  		int i;
>  		int must_list_refs = 0;
>  		for (i = 0; i < nr_heads; i++) {
> @@ -406,7 +477,7 @@ static int fetch_refs_via_pack(struct transport *transport,
>  	else if (data->version <= protocol_v1)
>  		die_if_server_options(transport);
>
> -	if (data->options.acked_commits) {
> +	if (data->options.acked_commits && !transport->smart_options->object_info) {
>  		if (data->version < protocol_v2) {
>  			warning(_("--negotiate-only requires protocol v2"));
>  			ret = -1;
> diff --git a/transport.h b/transport.h
> index a0bc6a1e9e..3bba16f86e 100644
> --- a/transport.h
> +++ b/transport.h
> @@ -30,6 +30,12 @@ struct git_transport_options {
>  	 */
>  	unsigned connectivity_checked:1;
>
> +	/*
> +	 * Transport will attempt to pull only object-info. Fallbacks
> +	 * to pulling entire object if object-info is not supported
> +	 */
> +	unsigned object_info : 1;
> +
>  	int depth;
>  	const char *deepen_since;
>  	const struct string_list *deepen_not;
> @@ -53,6 +59,9 @@ struct git_transport_options {
>  	 * common commits to this oidset instead of fetching any packfiles.
>  	 */
>  	struct oidset *acked_commits;
> +
> +	struct oid_array *object_info_oids;
> +	struct string_list *object_info_options;
>  };
>
>  enum transport_family {
> -- 
> 2.33.0.664.g0785eb7698

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 3/3] object-info: add option for retrieving object info
  2022-03-28 19:11     ` [PATCH v3 3/3] object-info: add option for retrieving object info Calvin Wan
                         ` (3 preceding siblings ...)
  2022-03-30 22:06       ` John Cai
@ 2022-03-30 22:07       ` Jonathan Tan
  2022-03-30 22:12       ` Josh Steadmon
  5 siblings, 0 replies; 103+ messages in thread
From: Jonathan Tan @ 2022-03-30 22:07 UTC (permalink / raw)
  To: Calvin Wan; +Cc: Jonathan Tan, git, philipoakley, johncai86

Calvin Wan <calvinwan@google.com> writes:
> @@ -1604,6 +1625,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  	if (args->depth > 0 || args->deepen_since || args->deepen_not)
>  		args->deepen = 1;
>  
> +	if (args->object_info) {
> +		state = FETCH_SEND_REQUEST;
> +	}
> +
>  	while (state != FETCH_DONE) {
>  		switch (state) {
>  		case FETCH_CHECK_LOCAL:
> @@ -1613,7 +1638,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  			/* Filter 'ref' by 'sought' and those that aren't local */
>  			mark_complete_and_common_ref(negotiator, args, &ref);
>  			filter_refs(args, &ref, sought, nr_sought);
> -			if (everything_local(args, &ref))
> +			if (!args->object_info && everything_local(args, &ref))
>  				state = FETCH_DONE;
>  			else
>  				state = FETCH_SEND_REQUEST;

I haven't looked into this deeply, but one thing to consider is to do
this in a separate function (instead of reusing do_fetch_pack_v2())
because the object-info request and response has almost nothing in
common with fetch.

> diff --git a/t/t5583-fetch-object-info.sh b/t/t5583-fetch-object-info.sh

[skip tests]

Also test when: the user gives an invalid --object-info (e.g.
"--object-info=foo") and when the user gives two parameters, one valid
and one invalid. (Both should fail.)

> diff --git a/transport-internal.h b/transport-internal.h
> index c4ca0b733a..04fa015011 100644
> --- a/transport-internal.h
> +++ b/transport-internal.h
> @@ -59,6 +59,7 @@ struct transport_vtable {
>  	 * use. disconnect() releases these resources.
>  	 **/
>  	int (*disconnect)(struct transport *connection);
> +	int (*fetch_object_info)(struct transport *transport, struct oid_array *oids);
>  };

Do we need another entry in the vtable, or can we do without? (As it is,
I don't think we need it, as evidenced by the fact that this patch did
not add any entries to the transport_vtable instances.)

> diff --git a/transport.c b/transport.c
> index 70e9840a90..65a1b1fdb3 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -350,6 +350,67 @@ static struct ref *handshake(struct transport *transport, int for_push,
>  	return refs;
>  }
>  
> +/*
> + * Fetches object info if server supports object-info command
> + * Make sure to fallback to normal fetch if this fails
> + */
> +static int fetch_object_info(struct transport *transport)
> +{
> +	struct git_transport_data *data = transport->data;
> +	struct object_info_args args;
> +	struct packet_reader reader;
> +
> +	memset(&args, 0, sizeof(args));
> +	args.server_options = transport->server_options;
> +	args.object_info_options = transport->smart_options->object_info_options;
> +	args.oids = transport->smart_options->object_info_oids;

I think we can avoid code duplication in this function too. So from
here (the connect_setup line below)...

> +	connect_setup(transport, 0);
> +	packet_reader_init(&reader, data->fd[0], NULL, 0,
> +			PACKET_READ_CHOMP_NEWLINE |
> +			PACKET_READ_DIE_ON_ERR_PACKET);
> +	data->version = discover_version(&reader);
> +
> +	transport->hash_algo = reader.hash_algo;
> +
> +	switch (data->version) {
> +	case protocol_v2:
> +		if (!server_supports_v2("object-info", 0))
> +			return 0;
> +		send_object_info_request(data->fd[1], &args);
> +		break;
> +	case protocol_v1:
> +	case protocol_v0:
> +		die(_("wrong protocol version. expected v2"));
> +	case protocol_unknown_version:
> +		BUG("unknown protocol version");
> +	}

...up to here can be a function (except that we need to take out
send_object_info_request() and maybe parameterize the server_supports_v2
call).

From here...

> +	if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> +		die(_("error reading object info header"));
> +	}
> +	if (strcmp(reader.line, "size")) {
> +		die(_("expected 'size', received '%s'"), reader.line);
> +	}
> +	while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
> +		printf("%s\n", reader.line);
> +	}

...until here is object-info-specific, yes.

And this...

> +	check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> +
> +	return 1;
> +}

is common code once again.

Also instead of returning 0 from this function if the server doesn't
support object-info, it's probably clearer to have one function that
checks what the server supports, and then have an "if" block which
either does the object-info part or the fetch part depending on what the
server says.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 3/3] object-info: add option for retrieving object info
  2022-03-28 19:11     ` [PATCH v3 3/3] object-info: add option for retrieving object info Calvin Wan
                         ` (4 preceding siblings ...)
  2022-03-30 22:07       ` Jonathan Tan
@ 2022-03-30 22:12       ` Josh Steadmon
  2022-03-30 22:46         ` Calvin Wan
  5 siblings, 1 reply; 103+ messages in thread
From: Josh Steadmon @ 2022-03-30 22:12 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

On 2022.03.28 19:11, Calvin Wan wrote:
> Sometimes it is useful to get information about an object without
> having to download it completely. The server logic has already been
> implemented as “a2ba162cda (object-info: support for retrieving object
> info, 2021-04-20)”. This patch implements the client option for
> it. Currently, only 'size' is supported, however future patches can
> implement additional options.
> 
> Add ‘--object-info’ option to fetch. This option allows the client to
> make an object-info command request to a server that supports protocol
> v2. If the server is v2, but does not allow for the object-info
> command request, fetch the objects as if it were a standard fetch
> (however without changing any refs). Therefore, hook `fetch
> object-info` into transport_fetch_refs() to easily fallback if needed.
> 
> A previous patch added the `transfer.advertiseObjectInfo` config
> option, of which this patch utilizes to test the fallback.
> 
> ---
>  Documentation/fetch-options.txt |   5 ++
>  Documentation/git-fetch.txt     |   1 +
>  builtin/fetch.c                 |  36 ++++++++-
>  fetch-pack.c                    |  42 +++++++++-
>  fetch-pack.h                    |   9 +++
>  t/t5583-fetch-object-info.sh    | 138 ++++++++++++++++++++++++++++++++
>  transport-helper.c              |   8 +-
>  transport-internal.h            |   1 +
>  transport.c                     |  75 ++++++++++++++++-
>  transport.h                     |   9 +++
>  10 files changed, 315 insertions(+), 9 deletions(-)
>  create mode 100755 t/t5583-fetch-object-info.sh

> diff --git a/fetch-pack.c b/fetch-pack.c
> index b709a61baf..36e3d1c5d0 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -1269,6 +1269,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
>  	packet_buf_delim(req_buf);
>  }
>  
> +void send_object_info_request(int fd_out, struct object_info_args *args)
> +{
> +	struct strbuf req_buf = STRBUF_INIT;
> +	int i;
> +
> +	write_command_and_capabilities(&req_buf, args->server_options, "object-info");
> +
> +	if (string_list_has_string(args->object_info_options, "size"))
> +		packet_buf_write(&req_buf, "size");

Do we need a newline after "size" here? It's not clear to me that it's
necessary; Documentation/technical/protocol-v2.txt just says
"command-specific-args are packet line framed arguments defined by each
individual command", but the oid arguments below have a newline, so we
should be consistent one way or the other.

It looks like the server-side implementation wants just a bare "size"
string (no newline), but I suspect that either is OK because the
packet_reader is probably using PACKET_READ_CHOMP_NEWLINE.


> +	for (i = 0; i < args->oids->nr; i++) {
> +		packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
> +	}
> +
> +	packet_buf_flush(&req_buf);
> +	if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
> +		die_errno(_("unable to write request to remote"));
> +
> +	strbuf_release(&req_buf);
> +}
> +

> diff --git a/transport.c b/transport.c
> index 70e9840a90..65a1b1fdb3 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -350,6 +350,67 @@ static struct ref *handshake(struct transport *transport, int for_push,
>  	return refs;
>  }
>  
> +/*
> + * Fetches object info if server supports object-info command
> + * Make sure to fallback to normal fetch if this fails
> + */
> +static int fetch_object_info(struct transport *transport)
> +{
> +	struct git_transport_data *data = transport->data;
> +	struct object_info_args args;
> +	struct packet_reader reader;
> +
> +	memset(&args, 0, sizeof(args));
> +	args.server_options = transport->server_options;
> +	args.object_info_options = transport->smart_options->object_info_options;
> +	args.oids = transport->smart_options->object_info_oids;
> +
> +	connect_setup(transport, 0);
> +	packet_reader_init(&reader, data->fd[0], NULL, 0,
> +			PACKET_READ_CHOMP_NEWLINE |
> +			PACKET_READ_DIE_ON_ERR_PACKET);
> +	data->version = discover_version(&reader);
> +
> +	transport->hash_algo = reader.hash_algo;
> +
> +	switch (data->version) {
> +	case protocol_v2:
> +		if (!server_supports_v2("object-info", 0))
> +			return 0;
> +		send_object_info_request(data->fd[1], &args);
> +		break;
> +	case protocol_v1:
> +	case protocol_v0:
> +		die(_("wrong protocol version. expected v2"));

The comment at the top of this function says that callers should be
prepared to fallback to normal fetch if this function fails. The only
way it can currently fail is if we are using protocol V2 but the server
doesn't support object-info. Rather than die() if we're on protocol V1
or V0, can we also return a failure here and let callers fallback to
fetch?

> +	case protocol_unknown_version:
> +		BUG("unknown protocol version");
> +	}
> +
> +	if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> +		die(_("error reading object info header"));
> +	}
> +	if (strcmp(reader.line, "size")) {
> +		die(_("expected 'size', received '%s'"), reader.line);
> +	}
> +	while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
> +		printf("%s\n", reader.line);
> +	}
> +	check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> +
> +	return 1;
> +}
> +

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 3/3] object-info: add option for retrieving object info
  2022-03-30  0:49         ` Junio C Hamano
@ 2022-03-30 22:31           ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-03-30 22:31 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, jonathantanmy, philipoakley, johncai86

Overall, I agree that this option does not belong in fetch. I wouldn't
say I chose to put it there for the sake of implementation
convenience, but because "it does what fetch is supposed to do minus
the object itself". This logic no longer makes any sense to me, and I
regret not seeing the problems with it as I was coming up with
workarounds.

> I think that "Object 'size' is currently the only one the server
> knows to emit", combined with the lack of any client side support so
> far, means that it is not too late to rework the object-info thing
> and possibly reuse more from "cat-file --batch-check".

I talked this over with Jonathan Tan and we came to the conclusion
that this is the next option I should look into, rather than
separating it out into its own command.

Thank you for the review Junio! I appreciate how straightforward all
of your comments are.

On Tue, Mar 29, 2022 at 5:49 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Junio C Hamano <gitster@pobox.com> writes:
>
> > Shouldn't we be making the part that we truly need to reuse into a
> > separate API out of fetch-pack and either (1) allow new commands be
> > easily written reusing the code to create "git remote-object-info"
> > and "git remote-blame", or (2) come up with a single "do things on
> > remote" command with various subcommands, i.e. "git over-there
> > object-info" and "git over-there blame"?
>
> For completeness, we could also make the "git archive" the gold
> standard.  If we want "git blame" that is mostly executed on the
> remote site, we could teach "git blame --remote <there>", which may
> be where a user, who does not know nor care about how things are
> implemented, would expect to find the feature.  Is "object-info" in
> principle like executing "cat-file --batch-check" remotely?
>
> I think that "Object 'size' is currently the only one the server
> knows to emit", combined with the lack of any client side support so
> far, means that it is not too late to rework the object-info thing
> and possibly reuse more from "cat-file --batch-check".
>
> If we can create a system where running things locally and remotely
> are comparable in feature, that would be great, but I do not think
> that is a realistic goal to aim for.
>
> Thoughts?
>
>
>

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 3/3] object-info: add option for retrieving object info
  2022-03-29 22:54       ` Junio C Hamano
  2022-03-30  0:49         ` Junio C Hamano
@ 2022-03-30 22:43         ` Jonathan Tan
  2022-03-30 23:42           ` Junio C Hamano
  1 sibling, 1 reply; 103+ messages in thread
From: Jonathan Tan @ 2022-03-30 22:43 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jonathan Tan, Calvin Wan, git, philipoakley, johncai86

Junio C Hamano <gitster@pobox.com> writes:
> Calvin Wan <calvinwan@google.com> writes:
> 
> > Add ‘--object-info’ option to fetch. This option allows the client to
> > make an object-info command request to a server that supports protocol
> > v2. If the server is v2, but does not allow for the object-info
> > command request, fetch the objects as if it were a standard fetch
> > (however without changing any refs). Therefore, hook `fetch
> > object-info` into transport_fetch_refs() to easily fallback if needed.
> 
> Sorry, but I do not see anything common between "git fetch" that
> fetches history and associated objects and "retrieving only object
> info".  Other than "the way I happened to have chosen to implement,
> what is used to implement fetch has the most commonality".

Calvin and I discussed details of implementation before he wrote these
patches, and my thinking at that time was that to get the relevant
information, we're prepared to fetch the objects themselves from the
remote if need be, but prefer to fetch only metadata if possible, so
putting it in "fetch" seemed reasonable to me. But I agree that it's
probably better to put it somewhere else.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 3/3] object-info: add option for retrieving object info
  2022-03-30 22:12       ` Josh Steadmon
@ 2022-03-30 22:46         ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-03-30 22:46 UTC (permalink / raw)
  To: Josh Steadmon, Calvin Wan, git, jonathantanmy, philipoakley, johncai86

> but the oid arguments below have a newline, so we
> should be consistent one way or the other.

To match what the other packet functions do, I will get rid of the new
line for the oid loop.

> Rather than die() if we're on protocol V1
> or V0, can we also return a failure here and let callers fallback to
> fetch?

Sounds reasonable. Will look into it.

On Wed, Mar 30, 2022 at 3:12 PM Josh Steadmon <steadmon@google.com> wrote:
>
> On 2022.03.28 19:11, Calvin Wan wrote:
> > Sometimes it is useful to get information about an object without
> > having to download it completely. The server logic has already been
> > implemented as “a2ba162cda (object-info: support for retrieving object
> > info, 2021-04-20)”. This patch implements the client option for
> > it. Currently, only 'size' is supported, however future patches can
> > implement additional options.
> >
> > Add ‘--object-info’ option to fetch. This option allows the client to
> > make an object-info command request to a server that supports protocol
> > v2. If the server is v2, but does not allow for the object-info
> > command request, fetch the objects as if it were a standard fetch
> > (however without changing any refs). Therefore, hook `fetch
> > object-info` into transport_fetch_refs() to easily fallback if needed.
> >
> > A previous patch added the `transfer.advertiseObjectInfo` config
> > option, of which this patch utilizes to test the fallback.
> >
> > ---
> >  Documentation/fetch-options.txt |   5 ++
> >  Documentation/git-fetch.txt     |   1 +
> >  builtin/fetch.c                 |  36 ++++++++-
> >  fetch-pack.c                    |  42 +++++++++-
> >  fetch-pack.h                    |   9 +++
> >  t/t5583-fetch-object-info.sh    | 138 ++++++++++++++++++++++++++++++++
> >  transport-helper.c              |   8 +-
> >  transport-internal.h            |   1 +
> >  transport.c                     |  75 ++++++++++++++++-
> >  transport.h                     |   9 +++
> >  10 files changed, 315 insertions(+), 9 deletions(-)
> >  create mode 100755 t/t5583-fetch-object-info.sh
>
> > diff --git a/fetch-pack.c b/fetch-pack.c
> > index b709a61baf..36e3d1c5d0 100644
> > --- a/fetch-pack.c
> > +++ b/fetch-pack.c
> > @@ -1269,6 +1269,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
> >       packet_buf_delim(req_buf);
> >  }
> >
> > +void send_object_info_request(int fd_out, struct object_info_args *args)
> > +{
> > +     struct strbuf req_buf = STRBUF_INIT;
> > +     int i;
> > +
> > +     write_command_and_capabilities(&req_buf, args->server_options, "object-info");
> > +
> > +     if (string_list_has_string(args->object_info_options, "size"))
> > +             packet_buf_write(&req_buf, "size");
>
> Do we need a newline after "size" here? It's not clear to me that it's
> necessary; Documentation/technical/protocol-v2.txt just says
> "command-specific-args are packet line framed arguments defined by each
> individual command", but the oid arguments below have a newline, so we
> should be consistent one way or the other.
>
> It looks like the server-side implementation wants just a bare "size"
> string (no newline), but I suspect that either is OK because the
> packet_reader is probably using PACKET_READ_CHOMP_NEWLINE.
>
>
> > +     for (i = 0; i < args->oids->nr; i++) {
> > +             packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
> > +     }
> > +
> > +     packet_buf_flush(&req_buf);
> > +     if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
> > +             die_errno(_("unable to write request to remote"));
> > +
> > +     strbuf_release(&req_buf);
> > +}
> > +
>
> > diff --git a/transport.c b/transport.c
> > index 70e9840a90..65a1b1fdb3 100644
> > --- a/transport.c
> > +++ b/transport.c
> > @@ -350,6 +350,67 @@ static struct ref *handshake(struct transport *transport, int for_push,
> >       return refs;
> >  }
> >
> > +/*
> > + * Fetches object info if server supports object-info command
> > + * Make sure to fallback to normal fetch if this fails
> > + */
> > +static int fetch_object_info(struct transport *transport)
> > +{
> > +     struct git_transport_data *data = transport->data;
> > +     struct object_info_args args;
> > +     struct packet_reader reader;
> > +
> > +     memset(&args, 0, sizeof(args));
> > +     args.server_options = transport->server_options;
> > +     args.object_info_options = transport->smart_options->object_info_options;
> > +     args.oids = transport->smart_options->object_info_oids;
> > +
> > +     connect_setup(transport, 0);
> > +     packet_reader_init(&reader, data->fd[0], NULL, 0,
> > +                     PACKET_READ_CHOMP_NEWLINE |
> > +                     PACKET_READ_DIE_ON_ERR_PACKET);
> > +     data->version = discover_version(&reader);
> > +
> > +     transport->hash_algo = reader.hash_algo;
> > +
> > +     switch (data->version) {
> > +     case protocol_v2:
> > +             if (!server_supports_v2("object-info", 0))
> > +                     return 0;
> > +             send_object_info_request(data->fd[1], &args);
> > +             break;
> > +     case protocol_v1:
> > +     case protocol_v0:
> > +             die(_("wrong protocol version. expected v2"));
>
> The comment at the top of this function says that callers should be
> prepared to fallback to normal fetch if this function fails. The only
> way it can currently fail is if we are using protocol V2 but the server
> doesn't support object-info. Rather than die() if we're on protocol V1
> or V0, can we also return a failure here and let callers fallback to
> fetch?
>
> > +     case protocol_unknown_version:
> > +             BUG("unknown protocol version");
> > +     }
> > +
> > +     if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> > +             die(_("error reading object info header"));
> > +     }
> > +     if (strcmp(reader.line, "size")) {
> > +             die(_("expected 'size', received '%s'"), reader.line);
> > +     }
> > +     while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
> > +             printf("%s\n", reader.line);
> > +     }
> > +     check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> > +
> > +     return 1;
> > +}
> > +

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 3/3] object-info: add option for retrieving object info
  2022-03-29 23:19       ` Taylor Blau
@ 2022-03-30 22:47         ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-03-30 22:47 UTC (permalink / raw)
  To: Taylor Blau; +Cc: git, jonathantanmy, philipoakley, johncai86

Thank you for the code hygiene tips, Taylor! All of these sounds like
good ideas and will follow through

On Tue, Mar 29, 2022 at 4:19 PM Taylor Blau <me@ttaylorr.com> wrote:
>
> Hi Calvin,
>
> Junio took a deeper look below, but here are a few small code-hygiene
> issues that I noticed while taking a quicker look through this patch.
>
> On Mon, Mar 28, 2022 at 07:11:12PM +0000, Calvin Wan wrote:
> > Sometimes it is useful to get information about an object without
> > having to download it completely. The server logic has already been
> > implemented as “a2ba162cda (object-info: support for retrieving object
> > info, 2021-04-20)”. This patch implements the client option for
> > it. Currently, only 'size' is supported, however future patches can
> > implement additional options.
> >
> > Add ‘--object-info’ option to fetch. This option allows the client to
>
> The single-quotes here look like smart quotes. I don't think we forbid
> these explicitly within commit messages, but using the standard '
> (ASCII 0x27) character is more common.
>
> > make an object-info command request to a server that supports protocol
> > v2. If the server is v2, but does not allow for the object-info
> > command request, fetch the objects as if it were a standard fetch
> > (however without changing any refs). Therefore, hook `fetch
> > object-info` into transport_fetch_refs() to easily fallback if needed.
> >
> > A previous patch added the `transfer.advertiseObjectInfo` config
> > option, of which this patch utilizes to test the fallback.
> >
> > ---
> >  Documentation/fetch-options.txt |   5 ++
> >  Documentation/git-fetch.txt     |   1 +
> >  builtin/fetch.c                 |  36 ++++++++-
> >  fetch-pack.c                    |  42 +++++++++-
> >  fetch-pack.h                    |   9 +++
> >  t/t5583-fetch-object-info.sh    | 138 ++++++++++++++++++++++++++++++++
> >  transport-helper.c              |   8 +-
> >  transport-internal.h            |   1 +
> >  transport.c                     |  75 ++++++++++++++++-
> >  transport.h                     |   9 +++
> >  10 files changed, 315 insertions(+), 9 deletions(-)
> >  create mode 100755 t/t5583-fetch-object-info.sh
> >
> > diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
> > index f903683189..f1ca95c32d 100644
> > --- a/Documentation/fetch-options.txt
> > +++ b/Documentation/fetch-options.txt
> > @@ -299,3 +299,8 @@ endif::git-pull[]
> >  -6::
> >  --ipv6::
> >       Use IPv6 addresses only, ignoring IPv4 addresses.
> > +
> > +--object-info=[<arguments>]::
> > +     Fetches only the relevant information passed in arguments from a
> > +     specific remote and set of object ids. Object 'size' is currently
> > +     the only supported argument.
> > diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt
> > index 550c16ca61..a13d2ba7ad 100644
> > --- a/Documentation/git-fetch.txt
> > +++ b/Documentation/git-fetch.txt
> > @@ -13,6 +13,7 @@ SYNOPSIS
> >  'git fetch' [<options>] <group>
> >  'git fetch' --multiple [<options>] [(<repository> | <group>)...]
> >  'git fetch' --all [<options>]
> > +'git fetch' --object-info=[<arguments>] <remote> [<object-ids>]
> >
> >
> >  DESCRIPTION
> > diff --git a/builtin/fetch.c b/builtin/fetch.c
> > index 4d12c2fd4d..0b21bebfca 100644
> > --- a/builtin/fetch.c
> > +++ b/builtin/fetch.c
> > @@ -29,6 +29,9 @@
> >  #include "commit-graph.h"
> >  #include "shallow.h"
> >  #include "worktree.h"
> > +#include "protocol.h"
> > +#include "pkt-line.h"
> > +#include "connect.h"
> >
> >  #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
> >
> > @@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
> >       N_("git fetch [<options>] <group>"),
> >       N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
> >       N_("git fetch --all [<options>]"),
> > +     N_("git fetch --object-info=[<arguments>] <remote> [<object-ids>]"),
> >       NULL
> >  };
> >
> > @@ -86,6 +90,7 @@ static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
> >  static int fetch_write_commit_graph = -1;
> >  static int stdin_refspecs = 0;
> >  static int negotiate_only;
> > +static struct string_list object_info_options = STRING_LIST_INIT_NODUP;
> >
> >  static int git_fetch_config(const char *k, const char *v, void *cb)
> >  {
> > @@ -221,6 +226,8 @@ static struct option builtin_fetch_options[] = {
> >                N_("write the commit-graph after fetching")),
> >       OPT_BOOL(0, "stdin", &stdin_refspecs,
> >                N_("accept refspecs from stdin")),
> > +     OPT_STRING_LIST(0, "object-info", &object_info_options, N_("option"),
> > +              N_("command request arguments")),
> >       OPT_END()
> >  };
> >
> > @@ -2087,6 +2094,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >       struct remote *remote = NULL;
> >       int result = 0;
> >       int prune_tags_ok = 1;
> > +     struct oid_array object_info_oids = OID_ARRAY_INIT;
> >
> >       packet_trace_identity("fetch");
> >
> > @@ -2168,7 +2176,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >       if (dry_run)
> >               write_fetch_head = 0;
> >
> > -     if (all) {
> > +     if (object_info_options.nr > 0) {
>
> Small nit; since object_info_options.nr should never be negative, we
> should instead write "if (object_info_options)".
>
> > +             if (argc == 0 || argc == 1) {
> > +                     die(_("must supply remote and object ids when using --object-info"));
> > +             } else {
> > +                     struct object_id oid;
> > +                     remote = remote_get(argv[0]);
> > +                     for (i = 1; i < argc; i++) {
> > +                             if (get_oid(argv[i], &oid))
> > +                                     return error(_("malformed object id '%s'"), argv[i]);
> > +                             oid_array_append(&object_info_oids, &oid);
> > +                     }
> > +             }
> > +     } else if (all) {
> >               if (argc == 1)
> >                       die(_("fetch --all does not take a repository argument"));
> >               else if (argc > 1)
> > @@ -2199,7 +2219,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >               }
> >       }
> >
> > -     if (negotiate_only) {
> > +     if (object_info_options.nr > 0) {
> > +             if (!remote)
> > +                     die(_("must supply remote when using --object-info"));
> > +             gtransport = prepare_transport(remote, 1);
> > +             if (gtransport->smart_options) {
> > +                     gtransport->smart_options->object_info = 1;
> > +                     gtransport->smart_options->object_info_oids = &object_info_oids;
> > +                     gtransport->smart_options->object_info_options = &object_info_options;
> > +             }
> > +             if (server_options.nr)
> > +                     gtransport->server_options = &server_options;
> > +             result = transport_fetch_refs(gtransport, NULL);
> > +     } else if (negotiate_only) {
> >               struct oidset acked_commits = OIDSET_INIT;
> >               struct oidset_iter iter;
> >               const struct object_id *oid;
> > diff --git a/fetch-pack.c b/fetch-pack.c
> > index b709a61baf..36e3d1c5d0 100644
> > --- a/fetch-pack.c
> > +++ b/fetch-pack.c
> > @@ -1269,6 +1269,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
> >       packet_buf_delim(req_buf);
> >  }
> >
> > +void send_object_info_request(int fd_out, struct object_info_args *args)
> > +{
> > +     struct strbuf req_buf = STRBUF_INIT;
> > +     int i;
>
> I don't think this would ever be practically exploit-able, since we'd
> run past the argument limit long before we hit INT_MAX, but this should
> be a size_t instead of an int to match the type of args->oid->nr.
>
> > +
> > +     write_command_and_capabilities(&req_buf, args->server_options, "object-info");
> > +
> > +     if (string_list_has_string(args->object_info_options, "size"))
> > +             packet_buf_write(&req_buf, "size");
> > +
> > +     for (i = 0; i < args->oids->nr; i++) {
> > +             packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
> > +     }
>
> Small nit, the braces around this for-loop are not required.
>
> >  static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
> >                             struct fetch_pack_args *args,
> >                             const struct ref *wants, struct oidset *common,
> > @@ -1604,6 +1625,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
> >       if (args->depth > 0 || args->deepen_since || args->deepen_not)
> >               args->deepen = 1;
> >
> > +     if (args->object_info) {
> > +             state = FETCH_SEND_REQUEST;
> > +     }
> > +
>
> Ditto here.
>
> >       while (state != FETCH_DONE) {
> >               switch (state) {
> >               case FETCH_CHECK_LOCAL:
> > @@ -1613,7 +1638,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
> >                       /* Filter 'ref' by 'sought' and those that aren't local */
> >                       mark_complete_and_common_ref(negotiator, args, &ref);
> >                       filter_refs(args, &ref, sought, nr_sought);
> > -                     if (everything_local(args, &ref))
> > +                     if (!args->object_info && everything_local(args, &ref))
> >                               state = FETCH_DONE;
> >                       else
> >                               state = FETCH_SEND_REQUEST;
> > @@ -1999,8 +2024,19 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
> >               }
> >               args->connectivity_checked = 1;
> >       }
> > -
> > -     update_shallow(args, sought, nr_sought, &si);
> > +
> > +     if (args->object_info) {
> > +             struct ref *ref_cpy_reader = ref_cpy;
> > +             unsigned long size = 0;
> > +             while (ref_cpy_reader) {
> > +                     oid_object_info(the_repository, &(ref_cpy_reader->old_oid), &size);
> > +                     printf("%s %li\n", oid_to_hex(&(ref_cpy_reader->old_oid)), size);
>
> Both expressions like "&(x)" can be written instead as "&x" without the
> outer parenthesis.
>
> Likewise, we do not use %li, that line should instead read:
>
>     printf("%s %"PRIuMAX"\n", oid_to_hex(&ref_cpy_reader->old_oid), (uintmax_t)size);
>
> Thanks,
> Taylor

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 3/3] object-info: add option for retrieving object info
  2022-03-30 22:43         ` Jonathan Tan
@ 2022-03-30 23:42           ` Junio C Hamano
  0 siblings, 0 replies; 103+ messages in thread
From: Junio C Hamano @ 2022-03-30 23:42 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: Calvin Wan, git, philipoakley, johncai86

Jonathan Tan <jonathantanmy@google.com> writes:

> Calvin and I discussed details of implementation before he wrote these
> patches, and my thinking at that time was that to get the relevant
> information, we're prepared to fetch the objects themselves from the
> remote if need be, but prefer to fetch only metadata if possible, so
> putting it in "fetch" seemed reasonable to me. But I agree that it's
> probably better to put it somewhere else.

Yeah, I am sympathetic that every "learning something from remote"
looks like a "fetch".  After all, a remote blame may be "fetching
the result of running blame".  But it may not be a good way to
decide what the feature should be called.

Thanks.


^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 0/3] object-info: add option for retrieving object info
  2022-03-28 19:11   ` [PATCH v3 0/3] object-info: add option for retrieving object info Calvin Wan
                       ` (3 preceding siblings ...)
  2022-03-29 20:35     ` [PATCH v3 0/3] " Junio C Hamano
@ 2022-03-31  1:50     ` Junio C Hamano
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
  5 siblings, 0 replies; 103+ messages in thread
From: Junio C Hamano @ 2022-03-31  1:50 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Calvin Wan <calvinwan@google.com> writes:

> Sometimes it is useful to get information about an object without
> having to download it completely. The server logic has already been
> implemented as “a2ba162cda (object-info: support for retrieving object
> info, 2021-04-20)”. This patch implements the client option for it.

Merged to 'seen', this seems to break a test in t5583.
e.g. https://github.com/git/git/runs/5763421702?check_suite_focus=true

Thanks.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 3/3] object-info: add option for retrieving object info
  2022-03-30 22:06       ` John Cai
@ 2022-03-31 19:56         ` Calvin Wan
  2022-04-01 16:16           ` Junio C Hamano
  0 siblings, 1 reply; 103+ messages in thread
From: Calvin Wan @ 2022-03-31 19:56 UTC (permalink / raw)
  To: John Cai; +Cc: git, jonathantanmy, philipoakley

(Just a small nit before I respond. If you could reply with only the
code you're referring to, that would be much appreciated).

> Would it be useful to check for the existence of the object to prove it got fetched?
The object is not saved into the filesystem after fetching for only
object-info so checking for its existence wouldn't prove anything.

> Since the server is being used as the client, we wouldn't be able to confirm that
> the object got fetched
I agree that this test case can be written better to confirm that the
object was fetched through the standard channel. I think I could use
GIT_TRACE_PACKET=1 with the output piped to a log file and then
grepping that log file for packet information resembling a standard
fetch to confirm. (See t5702-protocol-v2.sh as a reference)

> But, as a larger question for the list, I'm wondering if it's best practice to separate client from
> server in tests, or if it's an accepted style to just use one repository in cases
> when it doesn't really matter?
I am as well!

On Wed, Mar 30, 2022 at 3:06 PM John Cai <johncai86@gmail.com> wrote:
>
>
>
> On 28 Mar 2022, at 15:11, Calvin Wan wrote:
>
> > Sometimes it is useful to get information about an object without
> > having to download it completely. The server logic has already been
> > implemented as “a2ba162cda (object-info: support for retrieving object
> > info, 2021-04-20)”. This patch implements the client option for
> > it. Currently, only 'size' is supported, however future patches can
> > implement additional options.
> >
> > Add ‘--object-info’ option to fetch. This option allows the client to
> > make an object-info command request to a server that supports protocol
> > v2. If the server is v2, but does not allow for the object-info
> > command request, fetch the objects as if it were a standard fetch
> > (however without changing any refs). Therefore, hook `fetch
> > object-info` into transport_fetch_refs() to easily fallback if needed.
> >
> > A previous patch added the `transfer.advertiseObjectInfo` config
> > option, of which this patch utilizes to test the fallback.
> >
> > ---
> >  Documentation/fetch-options.txt |   5 ++
> >  Documentation/git-fetch.txt     |   1 +
> >  builtin/fetch.c                 |  36 ++++++++-
> >  fetch-pack.c                    |  42 +++++++++-
> >  fetch-pack.h                    |   9 +++
> >  t/t5583-fetch-object-info.sh    | 138 ++++++++++++++++++++++++++++++++
> >  transport-helper.c              |   8 +-
> >  transport-internal.h            |   1 +
> >  transport.c                     |  75 ++++++++++++++++-
> >  transport.h                     |   9 +++
> >  10 files changed, 315 insertions(+), 9 deletions(-)
> >  create mode 100755 t/t5583-fetch-object-info.sh
> >
> > diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
> > index f903683189..f1ca95c32d 100644
> > --- a/Documentation/fetch-options.txt
> > +++ b/Documentation/fetch-options.txt
> > @@ -299,3 +299,8 @@ endif::git-pull[]
> >  -6::
> >  --ipv6::
> >       Use IPv6 addresses only, ignoring IPv4 addresses.
> > +
> > +--object-info=[<arguments>]::
> > +     Fetches only the relevant information passed in arguments from a
> > +     specific remote and set of object ids. Object 'size' is currently
> > +     the only supported argument.
> > diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt
> > index 550c16ca61..a13d2ba7ad 100644
> > --- a/Documentation/git-fetch.txt
> > +++ b/Documentation/git-fetch.txt
> > @@ -13,6 +13,7 @@ SYNOPSIS
> >  'git fetch' [<options>] <group>
> >  'git fetch' --multiple [<options>] [(<repository> | <group>)...]
> >  'git fetch' --all [<options>]
> > +'git fetch' --object-info=[<arguments>] <remote> [<object-ids>]
> >
> >
> >  DESCRIPTION
> > diff --git a/builtin/fetch.c b/builtin/fetch.c
> > index 4d12c2fd4d..0b21bebfca 100644
> > --- a/builtin/fetch.c
> > +++ b/builtin/fetch.c
> > @@ -29,6 +29,9 @@
> >  #include "commit-graph.h"
> >  #include "shallow.h"
> >  #include "worktree.h"
> > +#include "protocol.h"
> > +#include "pkt-line.h"
> > +#include "connect.h"
> >
> >  #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
> >
> > @@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
> >       N_("git fetch [<options>] <group>"),
> >       N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
> >       N_("git fetch --all [<options>]"),
> > +     N_("git fetch --object-info=[<arguments>] <remote> [<object-ids>]"),
> >       NULL
> >  };
> >
> > @@ -86,6 +90,7 @@ static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
> >  static int fetch_write_commit_graph = -1;
> >  static int stdin_refspecs = 0;
> >  static int negotiate_only;
> > +static struct string_list object_info_options = STRING_LIST_INIT_NODUP;
> >
> >  static int git_fetch_config(const char *k, const char *v, void *cb)
> >  {
> > @@ -221,6 +226,8 @@ static struct option builtin_fetch_options[] = {
> >                N_("write the commit-graph after fetching")),
> >       OPT_BOOL(0, "stdin", &stdin_refspecs,
> >                N_("accept refspecs from stdin")),
> > +     OPT_STRING_LIST(0, "object-info", &object_info_options, N_("option"),
> > +              N_("command request arguments")),
> >       OPT_END()
> >  };
> >
> > @@ -2087,6 +2094,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >       struct remote *remote = NULL;
> >       int result = 0;
> >       int prune_tags_ok = 1;
> > +     struct oid_array object_info_oids = OID_ARRAY_INIT;
> >
> >       packet_trace_identity("fetch");
> >
> > @@ -2168,7 +2176,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >       if (dry_run)
> >               write_fetch_head = 0;
> >
> > -     if (all) {
> > +     if (object_info_options.nr > 0) {
> > +             if (argc == 0 || argc == 1) {
> > +                     die(_("must supply remote and object ids when using --object-info"));
> > +             } else {
> > +                     struct object_id oid;
> > +                     remote = remote_get(argv[0]);
> > +                     for (i = 1; i < argc; i++) {
> > +                             if (get_oid(argv[i], &oid))
> > +                                     return error(_("malformed object id '%s'"), argv[i]);
> > +                             oid_array_append(&object_info_oids, &oid);
> > +                     }
> > +             }
> > +     } else if (all) {
> >               if (argc == 1)
> >                       die(_("fetch --all does not take a repository argument"));
> >               else if (argc > 1)
> > @@ -2199,7 +2219,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >               }
> >       }
> >
> > -     if (negotiate_only) {
> > +     if (object_info_options.nr > 0) {
> > +             if (!remote)
> > +                     die(_("must supply remote when using --object-info"));
> > +             gtransport = prepare_transport(remote, 1);
> > +             if (gtransport->smart_options) {
> > +                     gtransport->smart_options->object_info = 1;
> > +                     gtransport->smart_options->object_info_oids = &object_info_oids;
> > +                     gtransport->smart_options->object_info_options = &object_info_options;
> > +             }
> > +             if (server_options.nr)
> > +                     gtransport->server_options = &server_options;
> > +             result = transport_fetch_refs(gtransport, NULL);
> > +     } else if (negotiate_only) {
> >               struct oidset acked_commits = OIDSET_INIT;
> >               struct oidset_iter iter;
> >               const struct object_id *oid;
> > diff --git a/fetch-pack.c b/fetch-pack.c
> > index b709a61baf..36e3d1c5d0 100644
> > --- a/fetch-pack.c
> > +++ b/fetch-pack.c
> > @@ -1269,6 +1269,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
> >       packet_buf_delim(req_buf);
> >  }
> >
> > +void send_object_info_request(int fd_out, struct object_info_args *args)
> > +{
> > +     struct strbuf req_buf = STRBUF_INIT;
> > +     int i;
> > +
> > +     write_command_and_capabilities(&req_buf, args->server_options, "object-info");
> > +
> > +     if (string_list_has_string(args->object_info_options, "size"))
> > +             packet_buf_write(&req_buf, "size");
> > +
> > +     for (i = 0; i < args->oids->nr; i++) {
> > +             packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
> > +     }
> > +
> > +     packet_buf_flush(&req_buf);
> > +     if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
> > +             die_errno(_("unable to write request to remote"));
> > +
> > +     strbuf_release(&req_buf);
> > +}
> > +
> >  static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
> >                             struct fetch_pack_args *args,
> >                             const struct ref *wants, struct oidset *common,
> > @@ -1604,6 +1625,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
> >       if (args->depth > 0 || args->deepen_since || args->deepen_not)
> >               args->deepen = 1;
> >
> > +     if (args->object_info) {
> > +             state = FETCH_SEND_REQUEST;
> > +     }
> > +
> >       while (state != FETCH_DONE) {
> >               switch (state) {
> >               case FETCH_CHECK_LOCAL:
> > @@ -1613,7 +1638,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
> >                       /* Filter 'ref' by 'sought' and those that aren't local */
> >                       mark_complete_and_common_ref(negotiator, args, &ref);
> >                       filter_refs(args, &ref, sought, nr_sought);
> > -                     if (everything_local(args, &ref))
> > +                     if (!args->object_info && everything_local(args, &ref))
> >                               state = FETCH_DONE;
> >                       else
> >                               state = FETCH_SEND_REQUEST;
> > @@ -1999,8 +2024,19 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
> >               }
> >               args->connectivity_checked = 1;
> >       }
> > -
> > -     update_shallow(args, sought, nr_sought, &si);
> > +
> > +     if (args->object_info) {
> > +             struct ref *ref_cpy_reader = ref_cpy;
> > +             unsigned long size = 0;
> > +             while (ref_cpy_reader) {
> > +                     oid_object_info(the_repository, &(ref_cpy_reader->old_oid), &size);
> > +                     printf("%s %li\n", oid_to_hex(&(ref_cpy_reader->old_oid)), size);
> > +                     ref_cpy_reader = ref_cpy_reader->next;
> > +             }
> > +     }
> > +     else {
> > +             update_shallow(args, sought, nr_sought, &si);
> > +     }
> >  cleanup:
> >       clear_shallow_info(&si);
> >       oid_array_clear(&shallows_scratch);
> > diff --git a/fetch-pack.h b/fetch-pack.h
> > index 7f94a2a583..23e69bb218 100644
> > --- a/fetch-pack.h
> > +++ b/fetch-pack.h
> > @@ -42,6 +42,7 @@ struct fetch_pack_args {
> >       unsigned update_shallow:1;
> >       unsigned reject_shallow_remote:1;
> >       unsigned deepen:1;
> > +     unsigned object_info:1;
> >
> >       /*
> >        * Indicate that the remote of this request is a promisor remote. The
> > @@ -68,6 +69,12 @@ struct fetch_pack_args {
> >       unsigned connectivity_checked:1;
> >  };
> >
> > +struct object_info_args {
> > +     const struct string_list *object_info_options;
> > +     const struct string_list *server_options;
> > +     const struct oid_array *oids;
> > +};
> > +
> >  /*
> >   * sought represents remote references that should be updated from.
> >   * On return, the names that were found on the remote will have been
> > @@ -101,4 +108,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
> >   */
> >  int report_unmatched_refs(struct ref **sought, int nr_sought);
> >
> > +void send_object_info_request(int fd_out, struct object_info_args *args);
> > +
> >  #endif
> > diff --git a/t/t5583-fetch-object-info.sh b/t/t5583-fetch-object-info.sh
> > new file mode 100755
> > index 0000000000..e5db6d953c
> > --- /dev/null
> > +++ b/t/t5583-fetch-object-info.sh
> > @@ -0,0 +1,138 @@
> > +#!/bin/sh
> > +
> > +test_description='test git fetch object-info version 2'
> > +
> > +TEST_NO_CREATE_REPO=1
> > +
> > +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
> > +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
> > +
> > +. ./test-lib.sh
> > +
> > +# Test fetch object-info with 'git://' transport
> > +
> > +. "$TEST_DIRECTORY"/lib-git-daemon.sh
> > +start_git_daemon --export-all --enable=receive-pack
> > +daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent
> > +
> > +
> > +test_expect_success 'create repo to be served by git-daemon' '
> > +     git init "$daemon_parent" &&
> > +     test_commit -C "$daemon_parent" message1 a.txt
> > +'
> > +
> > +test_expect_success 'fetch object-info with git:// using protocol v2' '
> > +     (
> > +             cd "$daemon_parent" &&
> > +             object_id=$(git rev-parse message1:a.txt) &&
> > +             length=$(wc -c <a.txt) &&
> > +
> > +             printf "%s %d\n" "$object_id" "$length" >expect &&
> > +             git -c protocol.version=2 fetch --object-info=size "$GIT_DAEMON_URL/parent" "$object_id" >actual &&
> > +             test_cmp expect actual
> > +     )
> > +'
> > +stop_git_daemon
> > +
> > +# Test protocol v2 with 'http://' transport
> > +
> > +. "$TEST_DIRECTORY"/lib-httpd.sh
> > +start_httpd
> > +
> > +test_expect_success 'create repo to be served by http:// transport' '
> > +     git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> > +     git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true &&
> > +     test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message1 a.txt &&
> > +     test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message2 b.txt
> > +'
> > +
> > +test_expect_success 'fetch object-info with http:// using protocol v2' '
> > +     (
> > +             cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> > +             object_id=$(git rev-parse message1:a.txt) &&
> > +             length=$(wc -c <a.txt) &&
> > +
> > +             printf "%s %d\n" "$object_id" "$length" >expect &&
> > +             git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual &&
> > +             test_cmp expect actual
> > +     )
> > +'
> > +
> > +test_expect_success 'fetch object-info for multiple objects' '
> > +     (
> > +             cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> > +             object_id1=$(git rev-parse message1:a.txt) &&
> > +             object_id2=$(git rev-parse message2:b.txt) &&
> > +             length1=$(wc -c <a.txt) &&
> > +             length2=$(wc -c <b.txt) &&
> > +
> > +             printf "%s %d\n" "$object_id1" "$length1" >expect &&
> > +             printf "%s %d\n" "$object_id2" "$length2" >>expect &&
> > +             git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id1" "$object_id2" >actual &&
> > +             test_cmp expect actual
> > +     )
> > +'
> > +
> > +test_expect_success 'fetch object-info fallbacks to standard fetch if object-info is not supported by the server' '
> > +     (
> > +             cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> > +             object_id=$(git rev-parse message1:a.txt) &&
> > +             length=$(wc -c <a.txt) &&
> > +
> > +             printf "%s %d\n" "$object_id" "$length" >expect &&
> > +             git config objectinfo.advertise false &&
> > +             git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual &&
> > +             test_cmp expect actual
> > +     )
>
> I noticed this test doesn't confirm that standard fetch was actually used. Would
> it be useful to check for the existence of the object to prove it got fetched?
>
> Also, is seems the way this test is set up, it's fetching from itself.
> This is logically fine in most cases that client == server, but it was a little
> confusing at first because it looked like the invocation of git config was on
> the client rather than the server, until I realized they are the same.
>
> Since the server is being used as the client, we wouldn't be able to confirm that
> the object got fetched. I realize for most of these tests, it doesn't really matter
> to conflate client with server. I think this is the one exception.
>
> But, as a larger question for the list, I'm wondering if it's best practice to separate client from
> server in tests, or if it's an accepted style to just use one repository in cases
> when it doesn't really matter?
>
> Thanks
> John
>
> > +'
> > +
> > +test_expect_success 'fetch object-info fails on server with legacy protocol' '
> > +     (
> > +             cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> > +             object_id=$(git rev-parse message1:a.txt) &&
> > +             test_must_fail git -c protocol.version=0 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err &&
> > +             test_i18ngrep "object-info requires protocol v2" err
> > +     )
> > +'
> > +
> > +test_expect_success 'fetch object-info fails on malformed OID' '
> > +     (
> > +             cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> > +             malformed_object_id="this_id_is_not_valid" &&
> > +             test_must_fail git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$malformed_object_id" 2>err &&
> > +             test_i18ngrep "malformed object id '$malformed_object_id'" err
> > +     )
> > +'
> > +
> > +test_expect_success 'fetch object-info fails on missing OID' '
> > +     git clone "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" missing_oid_repo &&
> > +     test_commit -C missing_oid_repo message3 c.txt &&
> > +     (
> > +             cd missing_oid_repo &&
> > +             object_id=$(git rev-parse message3:c.txt) &&
> > +             test_must_fail env GIT_TRACE_PACKET=1 git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err &&
> > +             test_i18ngrep "fatal: remote error: upload-pack: not our ref $object_id" err
> > +     )
> > +'
> > +
> > +# Test fetch object-info with 'file://' transport
> > +
> > +test_expect_success 'create repo to be served by file:// transport' '
> > +     git init server &&
> > +     test_commit -C server message1 a.txt &&
> > +     git -C server config protocol.version 2
> > +'
> > +
> > +test_expect_success 'fetch object-info with file:// using protocol v2' '
> > +     (
> > +             cd server &&
> > +             object_id=$(git rev-parse message1:a.txt) &&
> > +             length=$(wc -c <a.txt) &&
> > +
> > +             printf "%s %d\n" "$object_id" "$length" >expect &&
> > +             git -c protocol.version=2 fetch --object-info=size "file://$(pwd)" "$object_id" >actual &&
> > +             test_cmp expect actual
> > +     )
> > +'
> > +
> > +test_done
> > diff --git a/transport-helper.c b/transport-helper.c
> > index a0297b0986..64c175e904 100644
> > --- a/transport-helper.c
> > +++ b/transport-helper.c
> > @@ -686,13 +686,17 @@ static int fetch_refs(struct transport *transport,
> >
> >       /*
> >        * If we reach here, then the server, the client, and/or the transport
> > -      * helper does not support protocol v2. --negotiate-only requires
> > -      * protocol v2.
> > +      * helper does not support protocol v2. --negotiate-only and --object-info
> > +      * require protocol v2.
> >        */
> >       if (data->transport_options.acked_commits) {
> >               warning(_("--negotiate-only requires protocol v2"));
> >               return -1;
> >       }
> > +     if (transport->smart_options->object_info) {
> > +             warning(_("--object-info requires protocol v2"));
> > +             return -1;
> > +     }
> >
> >       if (!data->get_refs_list_called)
> >               get_refs_list_using_list(transport, 0);
> > diff --git a/transport-internal.h b/transport-internal.h
> > index c4ca0b733a..04fa015011 100644
> > --- a/transport-internal.h
> > +++ b/transport-internal.h
> > @@ -59,6 +59,7 @@ struct transport_vtable {
> >        * use. disconnect() releases these resources.
> >        **/
> >       int (*disconnect)(struct transport *connection);
> > +     int (*fetch_object_info)(struct transport *transport, struct oid_array *oids);
> >  };
> >
> >  #endif
> > diff --git a/transport.c b/transport.c
> > index 70e9840a90..65a1b1fdb3 100644
> > --- a/transport.c
> > +++ b/transport.c
> > @@ -350,6 +350,67 @@ static struct ref *handshake(struct transport *transport, int for_push,
> >       return refs;
> >  }
> >
> > +/*
> > + * Fetches object info if server supports object-info command
> > + * Make sure to fallback to normal fetch if this fails
> > + */
> > +static int fetch_object_info(struct transport *transport)
> > +{
> > +     struct git_transport_data *data = transport->data;
> > +     struct object_info_args args;
> > +     struct packet_reader reader;
> > +
> > +     memset(&args, 0, sizeof(args));
> > +     args.server_options = transport->server_options;
> > +     args.object_info_options = transport->smart_options->object_info_options;
> > +     args.oids = transport->smart_options->object_info_oids;
> > +
> > +     connect_setup(transport, 0);
> > +     packet_reader_init(&reader, data->fd[0], NULL, 0,
> > +                     PACKET_READ_CHOMP_NEWLINE |
> > +                     PACKET_READ_DIE_ON_ERR_PACKET);
> > +     data->version = discover_version(&reader);
> > +
> > +     transport->hash_algo = reader.hash_algo;
> > +
> > +     switch (data->version) {
> > +     case protocol_v2:
> > +             if (!server_supports_v2("object-info", 0))
> > +                     return 0;
> > +             send_object_info_request(data->fd[1], &args);
> > +             break;
> > +     case protocol_v1:
> > +     case protocol_v0:
> > +             die(_("wrong protocol version. expected v2"));
> > +     case protocol_unknown_version:
> > +             BUG("unknown protocol version");
> > +     }
> > +
> > +     if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> > +             die(_("error reading object info header"));
> > +     }
> > +     if (strcmp(reader.line, "size")) {
> > +             die(_("expected 'size', received '%s'"), reader.line);
> > +     }
> > +     while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
> > +             printf("%s\n", reader.line);
> > +     }
> > +     check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> > +
> > +     return 1;
> > +}
> > +
> > +static int append_oid_to_ref(const struct object_id *oid, void *data)
> > +{
> > +     struct ref *ref = data;
> > +     struct ref *temp_ref = xcalloc(1, sizeof (struct ref));
> > +     temp_ref->old_oid = *oid;
> > +     temp_ref->exact_oid = 1;
> > +     ref->next = temp_ref;
> > +     ref = ref->next;
> > +     return 0;
> > +}
> > +
> >  static struct ref *get_refs_via_connect(struct transport *transport, int for_push,
> >                                       struct transport_ls_refs_options *options)
> >  {
> > @@ -364,6 +425,7 @@ static int fetch_refs_via_pack(struct transport *transport,
> >       struct ref *refs = NULL;
> >       struct fetch_pack_args args;
> >       struct ref *refs_tmp = NULL;
> > +     struct ref *wanted_refs = xcalloc(1, sizeof (struct ref));
> >
> >       memset(&args, 0, sizeof(args));
> >       args.uploadpack = data->options.uploadpack;
> > @@ -388,8 +450,17 @@ static int fetch_refs_via_pack(struct transport *transport,
> >       args.server_options = transport->server_options;
> >       args.negotiation_tips = data->options.negotiation_tips;
> >       args.reject_shallow_remote = transport->smart_options->reject_shallow;
> > +     args.object_info = transport->smart_options->object_info;
> >
> > -     if (!data->got_remote_heads) {
> > +     if (transport->smart_options && transport->smart_options->object_info) {
> > +             struct ref *temp_ref = wanted_refs;
> > +             if (fetch_object_info(transport)) {
> > +                     goto cleanup;
> > +             }
> > +             oid_array_for_each(transport->smart_options->object_info_oids, append_oid_to_ref, temp_ref);
> > +             wanted_refs = wanted_refs->next;
> > +             transport->remote_refs = wanted_refs;
> > +     } else if (!data->got_remote_heads) {
> >               int i;
> >               int must_list_refs = 0;
> >               for (i = 0; i < nr_heads; i++) {
> > @@ -406,7 +477,7 @@ static int fetch_refs_via_pack(struct transport *transport,
> >       else if (data->version <= protocol_v1)
> >               die_if_server_options(transport);
> >
> > -     if (data->options.acked_commits) {
> > +     if (data->options.acked_commits && !transport->smart_options->object_info) {
> >               if (data->version < protocol_v2) {
> >                       warning(_("--negotiate-only requires protocol v2"));
> >                       ret = -1;
> > diff --git a/transport.h b/transport.h
> > index a0bc6a1e9e..3bba16f86e 100644
> > --- a/transport.h
> > +++ b/transport.h
> > @@ -30,6 +30,12 @@ struct git_transport_options {
> >        */
> >       unsigned connectivity_checked:1;
> >
> > +     /*
> > +      * Transport will attempt to pull only object-info. Fallbacks
> > +      * to pulling entire object if object-info is not supported
> > +      */
> > +     unsigned object_info : 1;
> > +
> >       int depth;
> >       const char *deepen_since;
> >       const struct string_list *deepen_not;
> > @@ -53,6 +59,9 @@ struct git_transport_options {
> >        * common commits to this oidset instead of fetching any packfiles.
> >        */
> >       struct oidset *acked_commits;
> > +
> > +     struct oid_array *object_info_oids;
> > +     struct string_list *object_info_options;
> >  };
> >
> >  enum transport_family {
> > --
> > 2.33.0.664.g0785eb7698

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v3 3/3] object-info: add option for retrieving object info
  2022-03-31 19:56         ` Calvin Wan
@ 2022-04-01 16:16           ` Junio C Hamano
  0 siblings, 0 replies; 103+ messages in thread
From: Junio C Hamano @ 2022-04-01 16:16 UTC (permalink / raw)
  To: Calvin Wan; +Cc: John Cai, git, jonathantanmy, philipoakley

Calvin Wan <calvinwan@google.com> writes:

>> But, as a larger question for the list, I'm wondering if it's
>> best practice to separate client from server in tests, or if it's
>> an accepted style to just use one repository in cases when it
>> doesn't really matter?
> I am as well!

I do not speak for "the list", but many fetch-push tests were
written to use the same repository, and many early submodule tests
imitated them to use the initial test repository and add it as its
submodule, out of pure laziness (and I am probably the most to be
blamed)---these scenarios are not very useful but when the transfer
itself is not the focus of the tests it was a handy way.  Of course
it is not encouraged, and those tests that do care about seeing
objects getting transferred do make sure that they are as
independent as the real world settings that we want the feature
being tested to be working in.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command
  2022-03-28 19:11   ` [PATCH v3 0/3] object-info: add option for retrieving object info Calvin Wan
                       ` (4 preceding siblings ...)
  2022-03-31  1:50     ` Junio C Hamano
@ 2022-05-02 17:08     ` Calvin Wan
  2022-05-02 17:08       ` [PATCH v4 1/8] fetch-pack: refactor packet writing Calvin Wan
                         ` (16 more replies)
  5 siblings, 17 replies; 103+ messages in thread
From: Calvin Wan @ 2022-05-02 17:08 UTC (permalink / raw)
  To: git; +Cc: gitster, jonathantanmy, philipoakley, johncai86, calvinwan, me

Sometimes it is useful to get information about an object without having
to download it completely. The server logic has already been implemented
as “a2ba162cda (object-info: support for retrieving object info,
2021-04-20)”. This patch implements the client option for it.

Add `--object-info` option to `cat-file --batch-command`. This option
allows the client to make an object-info command request to a server
that supports protocol v2. If the server is v2, but does not allow for
the object-info command request, the entire object is fetched and the
relevant object info is returned.

Summary of changes ==================

Patches 1, 2, 3, and 7 are small changes that setup the main
implementation. Patch 4 sets up object-info to be backwards compatible
for future patch series that adds additional attributes.  Patch 5 adds
internal trasnport functions to send and receive object-info command
request packets. Patch 6 adds the fallback if object-info is not
supported or fails.  Patch 8 adds the cat-file implementation.

Changes since V3 ================

 * Object-info is now implemented in cat-file --batch-command rather
   than fetch (new base commit)
 * Removed config option to advertise object-info
 * Added forwards and backwards compability for object-info
 * Split up some patches to better describe and visualize changes

Signed-off-by: Calvin Wan <calvinwan@google.com>
Helped-by: Jonathan Tan <jonathantanmy@google.com>

Calvin Wan (8):
  fetch-pack: refactor packet writing
  fetch-pack: move fetch default settings
  object-store: add function to free object_info contents
  object-info: send attribute packet regardless of object ids
  transport: add client side capability to request object-info
  transport: add object-info fallback to fetch
  cat-file: move parse_cmd and DEFAULT_FORMAT up
  cat-file: add --batch-command remote-object-info command

 Documentation/git-cat-file.txt |  16 +-
 builtin/cat-file.c             | 225 ++++++++++++++++-----
 fetch-pack.c                   |  61 ++++--
 fetch-pack.h                   |  10 +
 object-file.c                  |  16 ++
 object-store.h                 |   3 +
 protocol-caps.c                |  14 +-
 t/t1006-cat-file.sh            | 347 +++++++++++++++++++++++++++++++++
 transport-helper.c             |   7 +-
 transport.c                    |  97 ++++++++-
 transport.h                    |  11 ++
 11 files changed, 728 insertions(+), 79 deletions(-)

-- 
2.36.0.rc2.10170.gb555eefa6f


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH v4 1/8] fetch-pack: refactor packet writing
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
@ 2022-05-02 17:08       ` Calvin Wan
  2022-05-02 17:08       ` [PATCH v4 2/8] fetch-pack: move fetch default settings Calvin Wan
                         ` (15 subsequent siblings)
  16 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-05-02 17:08 UTC (permalink / raw)
  To: git; +Cc: gitster, jonathantanmy, philipoakley, johncai86, calvinwan, me

A subsequent patch will need to write capabilities for another command,
so refactor write_fetch_command_and_capabilities() to be used by both
fetch and future command.

---
 fetch-pack.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fetch-pack.c b/fetch-pack.c
index 4e1e88eea0..e06125c90a 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1246,13 +1246,13 @@ static int add_haves(struct fetch_negotiator *negotiator,
 	return haves_added;
 }
 
-static void write_fetch_command_and_capabilities(struct strbuf *req_buf,
-						 const struct string_list *server_options)
+static void write_command_and_capabilities(struct strbuf *req_buf,
+						 const struct string_list *server_options, const char* command)
 {
 	const char *hash_name;
 
-	if (server_supports_v2("fetch", 1))
-		packet_buf_write(req_buf, "command=fetch");
+	if (server_supports_v2(command, 1))
+		packet_buf_write(req_buf, "command=%s", command);
 	if (server_supports_v2("agent", 0))
 		packet_buf_write(req_buf, "agent=%s", git_user_agent_sanitized());
 	if (advertise_sid && server_supports_v2("session-id", 0))
@@ -1288,7 +1288,7 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 	int done_sent = 0;
 	struct strbuf req_buf = STRBUF_INIT;
 
-	write_fetch_command_and_capabilities(&req_buf, args->server_options);
+	write_command_and_capabilities(&req_buf, args->server_options, "fetch");
 
 	if (args->use_thin_pack)
 		packet_buf_write(&req_buf, "thin-pack");
@@ -2072,7 +2072,7 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
 		int received_ready = 0;
 
 		strbuf_reset(&req_buf);
-		write_fetch_command_and_capabilities(&req_buf, server_options);
+		write_command_and_capabilities(&req_buf, server_options, "fetch");
 
 		packet_buf_write(&req_buf, "wait-for-done");
 
-- 
2.36.0.rc2.10170.gb555eefa6f


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* [PATCH v4 2/8] fetch-pack: move fetch default settings
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
  2022-05-02 17:08       ` [PATCH v4 1/8] fetch-pack: refactor packet writing Calvin Wan
@ 2022-05-02 17:08       ` Calvin Wan
  2022-05-02 22:58         ` Junio C Hamano
  2022-05-02 17:08       ` [PATCH v4 3/8] object-store: add function to free object_info contents Calvin Wan
                         ` (14 subsequent siblings)
  16 siblings, 1 reply; 103+ messages in thread
From: Calvin Wan @ 2022-05-02 17:08 UTC (permalink / raw)
  To: git; +Cc: gitster, jonathantanmy, philipoakley, johncai86, calvinwan, me

When the state machine in do_fetch_pack_v2() is in FETCH_CHECK_LOCAL, we
set a few v2-specific defaults. It will be helpful for a future patch to
have these defaults set if the initial state is not FETCH_CHECK_LOCAL.
This is a safe change since the initial state is currently always
FETCH_CHECK_LOCAL, so we're guaranteed to hit that code whether it's in
the state machine or not.

---
 fetch-pack.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fetch-pack.c b/fetch-pack.c
index e06125c90a..45473b4602 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1610,18 +1610,18 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 		reader.me = "fetch-pack";
 	}
 
+	/* v2 supports these by default */
+	allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1;
+	use_sideband = 2;
+	if (args->depth > 0 || args->deepen_since || args->deepen_not)
+		args->deepen = 1;
+
 	while (state != FETCH_DONE) {
 		switch (state) {
 		case FETCH_CHECK_LOCAL:
 			sort_ref_list(&ref, ref_compare_name);
 			QSORT(sought, nr_sought, cmp_ref_by_name);
 
-			/* v2 supports these by default */
-			allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1;
-			use_sideband = 2;
-			if (args->depth > 0 || args->deepen_since || args->deepen_not)
-				args->deepen = 1;
-
 			/* Filter 'ref' by 'sought' and those that aren't local */
 			mark_complete_and_common_ref(negotiator, args, &ref);
 			filter_refs(args, &ref, sought, nr_sought);
-- 
2.36.0.rc2.10170.gb555eefa6f


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* [PATCH v4 3/8] object-store: add function to free object_info contents
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
  2022-05-02 17:08       ` [PATCH v4 1/8] fetch-pack: refactor packet writing Calvin Wan
  2022-05-02 17:08       ` [PATCH v4 2/8] fetch-pack: move fetch default settings Calvin Wan
@ 2022-05-02 17:08       ` Calvin Wan
  2022-05-02 23:23         ` Junio C Hamano
  2022-05-02 17:09       ` [PATCH v4 4/8] object-info: send attribute packet regardless of object ids Calvin Wan
                         ` (13 subsequent siblings)
  16 siblings, 1 reply; 103+ messages in thread
From: Calvin Wan @ 2022-05-02 17:08 UTC (permalink / raw)
  To: git; +Cc: gitster, jonathantanmy, philipoakley, johncai86, calvinwan, me

Introduce free_object_info_contents.

---
 object-file.c  | 16 ++++++++++++++++
 object-store.h |  3 +++
 2 files changed, 19 insertions(+)

diff --git a/object-file.c b/object-file.c
index 5ffbf3d4fd..34a6e34adf 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2659,3 +2659,19 @@ int read_loose_object(const char *path,
 		munmap(map, mapsize);
 	return ret;
 }
+
+void free_object_info_contents(struct object_info *object_info)
+{
+	if (!object_info)
+		return;
+	if (object_info->typep)
+		free(object_info->typep);
+	if (object_info->sizep)
+		free(object_info->sizep);
+	if (object_info->disk_sizep)
+		free(object_info->disk_sizep);
+	if (object_info->delta_base_oid)
+		free(object_info->delta_base_oid);
+	if (object_info->type_name)
+		free(object_info->type_name);
+}
\ No newline at end of file
diff --git a/object-store.h b/object-store.h
index bd2322ed8c..840e04b56f 100644
--- a/object-store.h
+++ b/object-store.h
@@ -533,4 +533,7 @@ int for_each_object_in_pack(struct packed_git *p,
 int for_each_packed_object(each_packed_object_fn, void *,
 			   enum for_each_object_flags flags);
 
+/* Free pointers inside of object_info, but not object_info itself */
+void free_object_info_contents(struct object_info *object_info);
+
 #endif /* OBJECT_STORE_H */
-- 
2.36.0.rc2.10170.gb555eefa6f


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* [PATCH v4 4/8] object-info: send attribute packet regardless of object ids
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
                         ` (2 preceding siblings ...)
  2022-05-02 17:08       ` [PATCH v4 3/8] object-store: add function to free object_info contents Calvin Wan
@ 2022-05-02 17:09       ` Calvin Wan
  2022-05-03  0:05         ` Junio C Hamano
  2022-05-03 23:11         ` Jonathan Tan
  2022-05-02 17:09       ` [PATCH v4 5/8] transport: add client side capability to request object-info Calvin Wan
                         ` (12 subsequent siblings)
  16 siblings, 2 replies; 103+ messages in thread
From: Calvin Wan @ 2022-05-02 17:09 UTC (permalink / raw)
  To: git; +Cc: gitster, jonathantanmy, philipoakley, johncai86, calvinwan, me

Currently on the server side of object-info, if the client does not send
any object ids in the request or if the client requests an attribute the
server does not support, the server will either not send any packets
back or error out. Consider the scenario where the client git version is
ahead of the server git version, and the client can request additional
object-info besides 'size'. There needs to be a way to tell whether the
server can honor all of the client attribute requests before the client
sends a request with all of the object ids.  In a future patch, if the
client were to make an initial command request with only attributes, the
server would be able to confirm which attributes it could return.

---
 protocol-caps.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/protocol-caps.c b/protocol-caps.c
index bbde91810a..bc7def0727 100644
--- a/protocol-caps.c
+++ b/protocol-caps.c
@@ -11,6 +11,7 @@
 
 struct requested_info {
 	unsigned size : 1;
+	unsigned unknown : 1;
 };
 
 /*
@@ -40,12 +41,12 @@ static void send_info(struct repository *r, struct packet_writer *writer,
 	struct string_list_item *item;
 	struct strbuf send_buffer = STRBUF_INIT;
 
-	if (!oid_str_list->nr)
-		return;
-
 	if (info->size)
 		packet_writer_write(writer, "size");
 
+	if (info->unknown || !oid_str_list->nr)
+		goto release;
+
 	for_each_string_list_item (item, oid_str_list) {
 		const char *oid_str = item->string;
 		struct object_id oid;
@@ -72,12 +73,13 @@ static void send_info(struct repository *r, struct packet_writer *writer,
 		packet_writer_write(writer, "%s", send_buffer.buf);
 		strbuf_reset(&send_buffer);
 	}
+release:
 	strbuf_release(&send_buffer);
 }
 
 int cap_object_info(struct repository *r, struct packet_reader *request)
 {
-	struct requested_info info;
+	struct requested_info info = { 0 };
 	struct packet_writer writer;
 	struct string_list oid_str_list = STRING_LIST_INIT_DUP;
 
@@ -92,9 +94,7 @@ int cap_object_info(struct repository *r, struct packet_reader *request)
 		if (parse_oid(request->line, &oid_str_list))
 			continue;
 
-		packet_writer_error(&writer,
-				    "object-info: unexpected line: '%s'",
-				    request->line);
+		info.unknown = 1;
 	}
 
 	if (request->status != PACKET_READ_FLUSH) {
-- 
2.36.0.rc2.10170.gb555eefa6f


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* [PATCH v4 5/8] transport: add client side capability to request object-info
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
                         ` (3 preceding siblings ...)
  2022-05-02 17:09       ` [PATCH v4 4/8] object-info: send attribute packet regardless of object ids Calvin Wan
@ 2022-05-02 17:09       ` Calvin Wan
  2022-05-03  0:54         ` Junio C Hamano
  2022-05-03 23:15         ` Jonathan Tan
  2022-05-02 17:09       ` [PATCH v4 6/8] transport: add object-info fallback to fetch Calvin Wan
                         ` (11 subsequent siblings)
  16 siblings, 2 replies; 103+ messages in thread
From: Calvin Wan @ 2022-05-02 17:09 UTC (permalink / raw)
  To: git; +Cc: gitster, jonathantanmy, philipoakley, johncai86, calvinwan, me

Sometimes it is useful to get information about an object without having
to download it completely. The server logic has already been implemented
as “a2ba162cda (object-info: support for retrieving object info,
2021-04-20)”. This patch implements the client option for it. Currently,
only 'size' is supported and the server must be v2, however future
patches can implement additional options.

The question of version mismatch often comes up with client/server
relationships. There are two cases to consider here (assuming either
client or server functionality for object-info changes between the
different versions):

 1. client version > server version
 	- client can request additional attributes from server
 2. server version > client version
 	- server can return additional info to the client

The second case is a non-issue since the client would never be able to
request additional info from the server.  In order to solve the first
case, recall an earlier patch where the server sends back the attributes
even if no object ids are sent by the client. This allows the client to
first check whether the server can accept the requested attributes
before sending the entire request.

---
 fetch-pack.c       | 23 +++++++++++++
 fetch-pack.h       |  8 +++++
 transport-helper.c |  7 ++--
 transport.c        | 81 +++++++++++++++++++++++++++++++++++++++++++++-
 transport.h        | 11 +++++++
 5 files changed, 127 insertions(+), 3 deletions(-)

diff --git a/fetch-pack.c b/fetch-pack.c
index 45473b4602..506ca28e05 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1278,6 +1278,29 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
 	packet_buf_delim(req_buf);
 }
 
+void send_object_info_request(int fd_out, struct object_info_args *args)
+{
+	struct strbuf req_buf = STRBUF_INIT;
+	struct string_list unsorted_object_info_options = *args->object_info_options;
+	size_t i;
+
+	write_command_and_capabilities(&req_buf, args->server_options, "object-info");
+
+	if (unsorted_string_list_has_string(&unsorted_object_info_options, "size"))
+		packet_buf_write(&req_buf, "size");
+
+	if (args->oids) {
+		for (i = 0; i < args->oids->nr; i++)
+			packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i]));
+	}
+
+	packet_buf_flush(&req_buf);
+	if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
+		die_errno(_("unable to write request to remote"));
+
+	strbuf_release(&req_buf);
+}
+
 static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 			      struct fetch_pack_args *args,
 			      const struct ref *wants, struct oidset *common,
diff --git a/fetch-pack.h b/fetch-pack.h
index 8c7752fc82..c27018d48c 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -69,6 +69,12 @@ struct fetch_pack_args {
 	unsigned connectivity_checked:1;
 };
 
+struct object_info_args {
+	const struct string_list *object_info_options;
+	const struct string_list *server_options;
+	const struct oid_array *oids;
+};
+
 /*
  * sought represents remote references that should be updated from.
  * On return, the names that were found on the remote will have been
@@ -102,4 +108,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
  */
 int report_unmatched_refs(struct ref **sought, int nr_sought);
 
+void send_object_info_request(int fd_out, struct object_info_args *args);
+
 #endif
diff --git a/transport-helper.c b/transport-helper.c
index b4dbbabb0c..093946f7fd 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -686,13 +686,16 @@ static int fetch_refs(struct transport *transport,
 
 	/*
 	 * If we reach here, then the server, the client, and/or the transport
-	 * helper does not support protocol v2. --negotiate-only requires
-	 * protocol v2.
+	 * helper does not support protocol v2. --negotiate-only and --object-info
+	 * require protocol v2.
 	 */
 	if (data->transport_options.acked_commits) {
 		warning(_("--negotiate-only requires protocol v2"));
 		return -1;
 	}
+	if (transport->smart_options->object_info) {
+		die(_("--object-info requires protocol v2"));
+	}
 
 	if (!data->get_refs_list_called)
 		get_refs_list_using_list(transport, 0);
diff --git a/transport.c b/transport.c
index 3d64a43ab3..08c505e1d0 100644
--- a/transport.c
+++ b/transport.c
@@ -353,6 +353,79 @@ static struct ref *handshake(struct transport *transport, int for_push,
 	return refs;
 }
 
+static int fetch_object_info(struct transport *transport, struct object_info **object_info_data)
+{
+	size_t i;
+	int size_index = -1;
+	struct git_transport_data *data = transport->data;
+	struct object_info_args args;
+	struct packet_reader reader;
+	struct string_list server_attributes = STRING_LIST_INIT_DUP;
+
+	memset(&args, 0, sizeof(args));
+	args.server_options = transport->server_options;
+	args.object_info_options = transport->smart_options->object_info_options;
+
+	connect_setup(transport, 0);
+	packet_reader_init(&reader, data->fd[0], NULL, 0,
+			PACKET_READ_CHOMP_NEWLINE |
+			PACKET_READ_DIE_ON_ERR_PACKET);
+	data->version = discover_version(&reader);
+
+	transport->hash_algo = reader.hash_algo;
+
+	switch (data->version) {
+	case protocol_v2:
+		if (!server_supports_v2("object-info", 0))
+			return -1;
+		/**
+		 * Send a request with only attributes first. If server can return all
+		 * of the requested attributes, then send request with object ids
+		 */
+		send_object_info_request(data->fd[1], &args);
+		if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
+			check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
+			return -1;
+		}
+		string_list_split(&server_attributes, reader.line, ' ', -1);
+		packet_reader_read(&reader);
+		check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
+		if (server_attributes.nr != args.object_info_options->nr)
+			return -1;
+		for (i = 0; i < server_attributes.nr; i++) {
+			if (!strcmp(server_attributes.items[i].string, "size"))
+				size_index = i + 1;
+		}
+		args.oids = transport->smart_options->object_info_oids;
+		send_object_info_request(data->fd[1], &args);
+		break;
+	case protocol_v1:
+	case protocol_v0:
+		die(_("wrong protocol version. expected v2"));
+	case protocol_unknown_version:
+		BUG("unknown protocol version");
+	}
+
+	if (packet_reader_read(&reader) != PACKET_READ_NORMAL)
+		die(_("error reading object info header"));
+	i = 0;
+	while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
+		struct string_list object_info_values = STRING_LIST_INIT_DUP;
+
+		string_list_split(&object_info_values, reader.line, ' ', -1);
+		if (size_index > 0) {
+			if (!strcmp(object_info_values.items[size_index].string, ""))
+				die("object-info: not our ref %s",
+					object_info_values.items[0].string);
+			*(*object_info_data)[i].sizep = strtoul(object_info_values.items[size_index].string, NULL, 10);
+		}
+		i++;
+	}
+	check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
+
+	return 0;
+}
+
 static struct ref *get_refs_via_connect(struct transport *transport, int for_push,
 					struct transport_ls_refs_options *options)
 {
@@ -392,8 +465,14 @@ static int fetch_refs_via_pack(struct transport *transport,
 	args.server_options = transport->server_options;
 	args.negotiation_tips = data->options.negotiation_tips;
 	args.reject_shallow_remote = transport->smart_options->reject_shallow;
+	args.object_info = transport->smart_options->object_info;
 
-	if (!data->got_remote_heads) {
+	if (transport->smart_options && transport->smart_options->object_info) {
+		if (!fetch_object_info(transport, data->options.object_info_data))
+			goto cleanup;
+		ret = -1;
+		goto cleanup;
+	} else if (!data->got_remote_heads) {
 		int i;
 		int must_list_refs = 0;
 		for (i = 0; i < nr_heads; i++) {
diff --git a/transport.h b/transport.h
index 12bc08fc33..dbf60bb710 100644
--- a/transport.h
+++ b/transport.h
@@ -6,6 +6,7 @@
 #include "remote.h"
 #include "list-objects-filter-options.h"
 #include "string-list.h"
+#include "object-store.h"
 
 struct git_transport_options {
 	unsigned thin : 1;
@@ -31,6 +32,12 @@ struct git_transport_options {
 	 */
 	unsigned connectivity_checked:1;
 
+	/*
+	 * Transport will attempt to pull only object-info. Fallbacks
+	 * to pulling entire object if object-info is not supported
+	 */
+	unsigned object_info : 1;
+
 	int depth;
 	const char *deepen_since;
 	const struct string_list *deepen_not;
@@ -54,6 +61,10 @@ struct git_transport_options {
 	 * common commits to this oidset instead of fetching any packfiles.
 	 */
 	struct oidset *acked_commits;
+
+	struct oid_array *object_info_oids;
+	struct object_info **object_info_data;
+	struct string_list *object_info_options;
 };
 
 enum transport_family {
-- 
2.36.0.rc2.10170.gb555eefa6f


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* [PATCH v4 6/8] transport: add object-info fallback to fetch
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
                         ` (4 preceding siblings ...)
  2022-05-02 17:09       ` [PATCH v4 5/8] transport: add client side capability to request object-info Calvin Wan
@ 2022-05-02 17:09       ` Calvin Wan
  2022-05-03 23:27         ` Jonathan Tan
  2022-05-02 17:09       ` [PATCH v4 7/8] cat-file: move parse_cmd and DEFAULT_FORMAT up Calvin Wan
                         ` (10 subsequent siblings)
  16 siblings, 1 reply; 103+ messages in thread
From: Calvin Wan @ 2022-05-02 17:09 UTC (permalink / raw)
  To: git; +Cc: gitster, jonathantanmy, philipoakley, johncai86, calvinwan, me

If a v2 server does not support object-info or if the client requests
information that isn't supported by object-info, fetch the objects as if
it were a standard v2 fetch (however without changing any refs).

---
 fetch-pack.c | 14 +++++++++++++-
 fetch-pack.h |  2 ++
 transport.c  | 18 +++++++++++++++---
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/fetch-pack.c b/fetch-pack.c
index 506ca28e05..938d082b80 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1639,6 +1639,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 	if (args->depth > 0 || args->deepen_since || args->deepen_not)
 		args->deepen = 1;
 
+	if (args->object_info)
+		state = FETCH_SEND_REQUEST;
+
 	while (state != FETCH_DONE) {
 		switch (state) {
 		case FETCH_CHECK_LOCAL:
@@ -1648,7 +1651,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 			/* Filter 'ref' by 'sought' and those that aren't local */
 			mark_complete_and_common_ref(negotiator, args, &ref);
 			filter_refs(args, &ref, sought, nr_sought);
-			if (!args->refetch && everything_local(args, &ref))
+			if (!args->refetch && !args->object_info && everything_local(args, &ref))
 				state = FETCH_DONE;
 			else
 				state = FETCH_SEND_REQUEST;
@@ -2035,6 +2038,15 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
 		args->connectivity_checked = 1;
 	}
 
+	if (args->object_info) {
+		struct ref *ref_cpy_reader = ref_cpy;
+		int i = 0;
+		while (ref_cpy_reader) {
+			oid_object_info_extended(the_repository, &ref_cpy_reader->old_oid, &(*args->object_info_data)[i], OBJECT_INFO_LOOKUP_REPLACE);
+			ref_cpy_reader = ref_cpy_reader->next;
+			i++;
+		}
+	}
 	update_shallow(args, sought, nr_sought, &si);
 cleanup:
 	clear_shallow_info(&si);
diff --git a/fetch-pack.h b/fetch-pack.h
index c27018d48c..552fd7bde0 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -17,6 +17,7 @@ struct fetch_pack_args {
 	const struct string_list *deepen_not;
 	struct list_objects_filter_options filter_options;
 	const struct string_list *server_options;
+	struct object_info **object_info_data;
 
 	/*
 	 * If not NULL, during packfile negotiation, fetch-pack will send "have"
@@ -43,6 +44,7 @@ struct fetch_pack_args {
 	unsigned reject_shallow_remote:1;
 	unsigned deepen:1;
 	unsigned refetch:1;
+	unsigned object_info:1;
 
 	/*
 	 * Indicate that the remote of this request is a promisor remote. The
diff --git a/transport.c b/transport.c
index 08c505e1d0..b85e52d9a8 100644
--- a/transport.c
+++ b/transport.c
@@ -436,10 +436,12 @@ static int fetch_refs_via_pack(struct transport *transport,
 			       int nr_heads, struct ref **to_fetch)
 {
 	int ret = 0;
+	size_t i;
 	struct git_transport_data *data = transport->data;
 	struct ref *refs = NULL;
 	struct fetch_pack_args args;
 	struct ref *refs_tmp = NULL;
+	struct ref *wanted_refs = xcalloc(1, sizeof (struct ref));
 
 	memset(&args, 0, sizeof(args));
 	args.uploadpack = data->options.uploadpack;
@@ -468,10 +470,19 @@ static int fetch_refs_via_pack(struct transport *transport,
 	args.object_info = transport->smart_options->object_info;
 
 	if (transport->smart_options && transport->smart_options->object_info) {
+		struct ref *ref = wanted_refs;
+
 		if (!fetch_object_info(transport, data->options.object_info_data))
 			goto cleanup;
-		ret = -1;
-		goto cleanup;
+		args.object_info_data = data->options.object_info_data;
+		for (i = 0; i < transport->smart_options->object_info_oids->nr; i++) {
+			struct ref *temp_ref = xcalloc(1, sizeof (struct ref));
+			temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i);
+			temp_ref->exact_oid = 1;
+			ref->next = temp_ref;
+			ref = ref->next;
+		}
+		transport->remote_refs = wanted_refs->next;
 	} else if (!data->got_remote_heads) {
 		int i;
 		int must_list_refs = 0;
@@ -489,7 +500,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 	else if (data->version <= protocol_v1)
 		die_if_server_options(transport);
 
-	if (data->options.acked_commits) {
+	if (data->options.acked_commits && !transport->smart_options->object_info) {
 		if (data->version < protocol_v2) {
 			warning(_("--negotiate-only requires protocol v2"));
 			ret = -1;
@@ -532,6 +543,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 
 	free_refs(refs_tmp);
 	free_refs(refs);
+	free_refs(wanted_refs);
 	return ret;
 }
 
-- 
2.36.0.rc2.10170.gb555eefa6f


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* [PATCH v4 7/8] cat-file: move parse_cmd and DEFAULT_FORMAT up
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
                         ` (5 preceding siblings ...)
  2022-05-02 17:09       ` [PATCH v4 6/8] transport: add object-info fallback to fetch Calvin Wan
@ 2022-05-02 17:09       ` Calvin Wan
  2022-05-02 17:09       ` [PATCH v4 8/8] cat-file: add --batch-command remote-object-info command Calvin Wan
                         ` (9 subsequent siblings)
  16 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-05-02 17:09 UTC (permalink / raw)
  To: git; +Cc: gitster, jonathantanmy, philipoakley, johncai86, calvinwan, me

It would be useful for a future patch to use these definitions so moving
them higher.

---
 builtin/cat-file.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 50cf38999d..cfe74c36eb 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -34,6 +34,8 @@ struct batch_options {
 	const char *format;
 };
 
+#define DEFAULT_FORMAT "%(objectname) %(objecttype) %(objectsize)"
+
 static const char *force_path;
 
 static int filter_object(const char *path, unsigned mode,
@@ -556,6 +558,16 @@ static void parse_cmd_info(struct batch_options *opt,
 	batch_one_object(line, output, opt, data);
 }
 
+static const struct parse_cmd {
+	const char *name;
+	parse_cmd_fn_t fn;
+	unsigned takes_args;
+} commands[] = {
+	{ "contents", parse_cmd_contents, 1},
+	{ "info", parse_cmd_info, 1},
+	{ "flush", NULL, 0},
+};
+
 static void dispatch_calls(struct batch_options *opt,
 		struct strbuf *output,
 		struct expand_data *data,
@@ -583,17 +595,6 @@ static void free_cmds(struct queued_cmd *cmd, size_t *nr)
 	*nr = 0;
 }
 
-
-static const struct parse_cmd {
-	const char *name;
-	parse_cmd_fn_t fn;
-	unsigned takes_args;
-} commands[] = {
-	{ "contents", parse_cmd_contents, 1},
-	{ "info", parse_cmd_info, 1},
-	{ "flush", NULL, 0},
-};
-
 static void batch_objects_command(struct batch_options *opt,
 				    struct strbuf *output,
 				    struct expand_data *data)
@@ -659,8 +660,6 @@ static void batch_objects_command(struct batch_options *opt,
 	strbuf_release(&input);
 }
 
-#define DEFAULT_FORMAT "%(objectname) %(objecttype) %(objectsize)"
-
 static int batch_objects(struct batch_options *opt)
 {
 	struct strbuf input = STRBUF_INIT;
-- 
2.36.0.rc2.10170.gb555eefa6f


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* [PATCH v4 8/8] cat-file: add --batch-command remote-object-info command
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
                         ` (6 preceding siblings ...)
  2022-05-02 17:09       ` [PATCH v4 7/8] cat-file: move parse_cmd and DEFAULT_FORMAT up Calvin Wan
@ 2022-05-02 17:09       ` Calvin Wan
  2022-05-04 21:27         ` Jonathan Tan
  2022-07-31 15:24         ` ZheNing Hu
  2022-07-28 23:02       ` [PATCH v5 0/6] " Calvin Wan
                         ` (8 subsequent siblings)
  16 siblings, 2 replies; 103+ messages in thread
From: Calvin Wan @ 2022-05-02 17:09 UTC (permalink / raw)
  To: git; +Cc: gitster, jonathantanmy, philipoakley, johncai86, calvinwan, me

Since the `info` command in cat-file --batch-command prints object info
for a given object, it is natural to add another command in cat-file
--batch-command to print object info for a given object from a remote.
Add `remote-object-info` to cat-file --batch-command.

While `info` takes object ids one at a time, this creates overhead when
making requests to a server so `remote-object-info` instead can take
multiple object ids at once.

cat-file --batch-command is generally implemented in the following
manner:

 - Receive and parse input from user
 - Call respective function attached to command
 - Set batch mode state, get object info, print object info

In --buffer mode, this changes to:

 - Receive and parse input from user
 - Store respective function attached to command in a queue
 - After flush, loop through commands in queue
    - Call respective function attached to command
    - Set batch mode state, get object info, print object info

Notice how the getting and printing of object info is accomplished one
at a time. As described above, this creates a problem for making
requests to a server. Therefore, `remote-object-info` is implemented in
the following manner:

 - Receive and parse input from user
 If command is `remote-object-info`:
    - Get object info from remote
    - Loop through object info
        - Call respective function attached to `info`
        - Set batch mode state, use passed in object info, print object
          info
 Else:
    - Call respective function attached to command
    - Parse input, get object info, print object info

And finally for --buffer mode `remote-object-info`:
 - Receive and parse input from user
 - Store respective function attached to command in a queue
 - After flush, loop through commands in queue:
    If command is `remote-object-info`:
        - Get object info from remote
        - Loop through object info
            - Call respective function attached to `info`
            - Set batch mode state, use passed in object info, print
              object info
    Else:
        - Call respective function attached to command
        - Set batch mode state, get object info, print object info

To summarize, `remote-object-info` gets object info from the remote and
then generates multiples `info` commands with the object info passed in.
It cannot be implemented similarly to `info` and `content` because of
the overhead with remote commenication.

---
 Documentation/git-cat-file.txt |  16 +-
 builtin/cat-file.c             | 200 +++++++++++++++----
 t/t1006-cat-file.sh            | 347 +++++++++++++++++++++++++++++++++
 3 files changed, 522 insertions(+), 41 deletions(-)

diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index 24a811f0ef..0d9e8e6214 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -115,6 +115,10 @@ info <object>::
 	Print object info for object reference `<object>`. This corresponds to the
 	output of `--batch-check`.
 
+remote-object-info <remote> [<object>]::
+	Print object info for object references `<object>` at specified <remote>.
+	This command may only be combined with `--buffer`.
+
 flush::
 	Used with `--buffer` to execute all preceding commands that were issued
 	since the beginning or since the last flush was issued. When `--buffer`
@@ -245,7 +249,8 @@ newline. The available atoms are:
 	The full hex representation of the object name.
 
 `objecttype`::
-	The type of the object (the same as `cat-file -t` reports).
+	The type of the object (the same as `cat-file -t` reports). See
+	`CAVEATS` below.
 
 `objectsize`::
 	The size, in bytes, of the object (the same as `cat-file -s`
@@ -253,13 +258,14 @@ newline. The available atoms are:
 
 `objectsize:disk`::
 	The size, in bytes, that the object takes up on disk. See the
-	note about on-disk sizes in the `CAVEATS` section below.
+	note about on-disk sizes in the `CAVEATS` section below. Not
+	supported by `remote-object-info`.
 
 `deltabase`::
 	If the object is stored as a delta on-disk, this expands to the
 	full hex representation of the delta base object name.
 	Otherwise, expands to the null OID (all zeroes). See `CAVEATS`
-	below.
+	below. Not supported by `remote-object-info`.
 
 `rest`::
 	If this atom is used in the output string, input lines are split
@@ -346,6 +352,10 @@ directory name.
 CAVEATS
 -------
 
+Note that since object type is currently not supported by the
+object-info command request, git fetches the entire object instead to
+get the object info.
+
 Note that the sizes of objects on disk are reported accurately, but care
 should be taken in drawing conclusions about which refs or objects are
 responsible for disk usage. The size of a packed non-delta object may be
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index cfe74c36eb..7baa9bed61 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -16,6 +16,9 @@
 #include "packfile.h"
 #include "object-store.h"
 #include "promisor-remote.h"
+#include "alias.h"
+#include "remote.h"
+#include "transport.h"
 
 enum batch_mode {
 	BATCH_MODE_CONTENTS,
@@ -32,11 +35,14 @@ struct batch_options {
 	int unordered;
 	int transform_mode; /* may be 'w' or 'c' for --filters or --textconv */
 	const char *format;
+	int use_remote_info;
 };
 
 #define DEFAULT_FORMAT "%(objectname) %(objecttype) %(objectsize)"
 
 static const char *force_path;
+static struct object_info *remote_object_info;
+static struct oid_array object_info_oids = OID_ARRAY_INIT;
 
 static int filter_object(const char *path, unsigned mode,
 			 const struct object_id *oid,
@@ -425,48 +431,115 @@ static void batch_one_object(const char *obj_name,
 	int flags = opt->follow_symlinks ? GET_OID_FOLLOW_SYMLINKS : 0;
 	enum get_oid_result result;
 
-	result = get_oid_with_context(the_repository, obj_name,
-				      flags, &data->oid, &ctx);
-	if (result != FOUND) {
-		switch (result) {
-		case MISSING_OBJECT:
-			printf("%s missing\n", obj_name);
-			break;
-		case SHORT_NAME_AMBIGUOUS:
-			printf("%s ambiguous\n", obj_name);
-			break;
-		case DANGLING_SYMLINK:
-			printf("dangling %"PRIuMAX"\n%s\n",
-			       (uintmax_t)strlen(obj_name), obj_name);
-			break;
-		case SYMLINK_LOOP:
-			printf("loop %"PRIuMAX"\n%s\n",
-			       (uintmax_t)strlen(obj_name), obj_name);
-			break;
-		case NOT_DIR:
-			printf("notdir %"PRIuMAX"\n%s\n",
-			       (uintmax_t)strlen(obj_name), obj_name);
-			break;
-		default:
-			BUG("unknown get_sha1_with_context result %d\n",
-			       result);
-			break;
+	if (!opt->use_remote_info) {
+		result = get_oid_with_context(the_repository, obj_name,
+						flags, &data->oid, &ctx);
+		if (result != FOUND) {
+			switch (result) {
+			case MISSING_OBJECT:
+				printf("%s missing\n", obj_name);
+				break;
+			case SHORT_NAME_AMBIGUOUS:
+				printf("%s ambiguous\n", obj_name);
+				break;
+			case DANGLING_SYMLINK:
+				printf("dangling %"PRIuMAX"\n%s\n",
+					(uintmax_t)strlen(obj_name), obj_name);
+				break;
+			case SYMLINK_LOOP:
+				printf("loop %"PRIuMAX"\n%s\n",
+					(uintmax_t)strlen(obj_name), obj_name);
+				break;
+			case NOT_DIR:
+				printf("notdir %"PRIuMAX"\n%s\n",
+					(uintmax_t)strlen(obj_name), obj_name);
+				break;
+			default:
+				BUG("unknown get_sha1_with_context result %d\n",
+					result);
+				break;
+			}
+			fflush(stdout);
+			return;
 		}
-		fflush(stdout);
-		return;
-	}
 
-	if (ctx.mode == 0) {
-		printf("symlink %"PRIuMAX"\n%s\n",
-		       (uintmax_t)ctx.symlink_path.len,
-		       ctx.symlink_path.buf);
-		fflush(stdout);
-		return;
+		if (ctx.mode == 0) {
+			printf("symlink %"PRIuMAX"\n%s\n",
+				(uintmax_t)ctx.symlink_path.len,
+				ctx.symlink_path.buf);
+			fflush(stdout);
+			return;
+		}
 	}
 
 	batch_object_write(obj_name, scratch, opt, data, NULL, 0);
 }
 
+static int get_remote_info(struct batch_options *opt, int argc, const char **argv)
+{
+	int retval = 0;
+	size_t i;
+	struct remote *remote = NULL;
+	struct object_id oid;
+	struct string_list object_info_options = STRING_LIST_INIT_NODUP;
+	static struct transport *gtransport;
+	char *format = DEFAULT_FORMAT;
+
+	if (opt->format)
+		format = xstrdup(opt->format);
+
+	remote = remote_get(argv[0]);
+	if (!remote)
+		die(_("must supply valid remote when using --object-info"));
+	oid_array_clear(&object_info_oids);
+	for (i = 1; i < argc; i++) {
+		if (get_oid(argv[i], &oid))
+			die(_("malformed object id '%s'"), argv[i]);
+		oid_array_append(&object_info_oids, &oid);
+	}
+
+	gtransport = transport_get(remote, NULL);
+	if (gtransport->smart_options) {
+		size_t j;
+		int include_size = 0, include_type = 0;
+
+		remote_object_info = xcalloc(object_info_oids.nr, sizeof(struct object_info));
+		gtransport->smart_options->object_info = 1;
+		gtransport->smart_options->object_info_oids = &object_info_oids;
+		/**
+		 * 'size' is the only option currently supported.
+		 * Other options that are passed in the format will default to a
+		 * standard fetch request rather than object-info.
+		 */
+		if (strstr(format, "%(objectsize)")) {
+			string_list_append(&object_info_options, "size");
+			include_size = 1;
+		}
+		if (strstr(format, "%(objecttype)")) {
+			string_list_append(&object_info_options, "type");
+			include_type = 1;
+		}
+		if (strstr(format, "%(objectsize:disk)"))
+			die(_("objectsize:disk is currently not supported with remote-object-info"));
+		if (strstr(format, "%(deltabase)"))
+			die(_("deltabase is currently not supported with remote-object-info"));
+		if (object_info_options.nr > 0) {
+			gtransport->smart_options->object_info_options = &object_info_options;
+			for (j = 0; j < object_info_oids.nr; j++) {
+				if (include_size)
+					remote_object_info[j].sizep = xcalloc(1, sizeof(long));
+				if (include_type)
+					remote_object_info[j].typep = xcalloc(1, sizeof(enum object_type));
+			}
+			gtransport->smart_options->object_info_data = &remote_object_info;
+			retval = transport_fetch_refs(gtransport, NULL);
+		}
+	} else {
+		retval = -1;
+	}
+	return retval;
+}
+
 struct object_cb_data {
 	struct batch_options *opt;
 	struct expand_data *expand;
@@ -538,6 +611,7 @@ typedef void (*parse_cmd_fn_t)(struct batch_options *, const char *,
 struct queued_cmd {
 	parse_cmd_fn_t fn;
 	char *line;
+	const char *name;
 };
 
 static void parse_cmd_contents(struct batch_options *opt,
@@ -565,9 +639,49 @@ static const struct parse_cmd {
 } commands[] = {
 	{ "contents", parse_cmd_contents, 1},
 	{ "info", parse_cmd_info, 1},
+	{ "remote-object-info", parse_cmd_info, 1},
 	{ "flush", NULL, 0},
 };
 
+static void parse_remote_info(struct batch_options *opt,
+			   char *line,
+			   struct strbuf *output,
+			   struct expand_data *data,
+			   const struct parse_cmd *p_cmd,
+			   struct queued_cmd *q_cmd)
+{
+	int count;
+	size_t i;
+	const char **argv;
+
+	count = split_cmdline(line, &argv);
+	if (get_remote_info(opt, count, argv))
+		goto cleanup;
+	opt->use_remote_info = 1;
+	data->skip_object_info = 1;
+	data->mark_query = 0;
+	for (i = 0; i < object_info_oids.nr; i++) {
+		if (remote_object_info[i].sizep)
+			data->size = *remote_object_info[i].sizep;
+		if (remote_object_info[i].typep)
+			data->type = *remote_object_info[i].typep;
+
+		data->oid = object_info_oids.oid[i];
+		if (p_cmd)
+			p_cmd->fn(opt, argv[i+1], output, data);
+		else
+			q_cmd->fn(opt, argv[i+1], output, data);
+	}
+	opt->use_remote_info = 0;
+	data->skip_object_info = 0;
+	data->mark_query = 1;
+
+cleanup:
+	for (i = 0; i < object_info_oids.nr; i++)
+		free_object_info_contents(&remote_object_info[i]);
+	free(remote_object_info);
+}
+
 static void dispatch_calls(struct batch_options *opt,
 		struct strbuf *output,
 		struct expand_data *data,
@@ -579,8 +693,12 @@ static void dispatch_calls(struct batch_options *opt,
 	if (!opt->buffer_output)
 		die(_("flush is only for --buffer mode"));
 
-	for (i = 0; i < nr; i++)
-		cmd[i].fn(opt, cmd[i].line, output, data);
+	for (i = 0; i < nr; i++) {
+		if (!strcmp(cmd[i].name, "remote-object-info"))
+			parse_remote_info(opt, cmd[i].line, output, data, NULL, &cmd[i]);
+		else
+			cmd[i].fn(opt, cmd[i].line, output, data);
+	}
 
 	fflush(stdout);
 }
@@ -640,11 +758,17 @@ static void batch_objects_command(struct batch_options *opt,
 			dispatch_calls(opt, output, data, queued_cmd, nr);
 			free_cmds(queued_cmd, &nr);
 		} else if (!opt->buffer_output) {
-			cmd->fn(opt, p, output, data);
+			if (!strcmp(cmd->name, "remote-object-info")) {
+				char *line = xstrdup_or_null(p);
+				parse_remote_info(opt, line, output, data, cmd, NULL);
+			} else {
+				cmd->fn(opt, p, output, data);
+			}
 		} else {
 			ALLOC_GROW(queued_cmd, nr + 1, alloc);
 			call.fn = cmd->fn;
 			call.line = xstrdup_or_null(p);
+			call.name = cmd->name;
 			queued_cmd[nr++] = call;
 		}
 	}
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 1b85207694..69ee3f8330 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -2,6 +2,9 @@
 
 test_description='git cat-file'
 
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
 . ./test-lib.sh
 
 test_cmdmode_usage () {
@@ -1087,4 +1090,348 @@ test_expect_success 'batch-command flush without --buffer' '
 	grep "^fatal:.*flush is only for --buffer mode.*" err
 '
 
+# This section tests --batch-command with remote-object-info command
+# If a filter is not set, the filter defaults to "%(objectname) %(objectsize) %(objecttype)"
+# Since "%(objecttype)" is currently not supported by the command request, object-info,
+# the filters are set to "%(objectname) %(objectsize)".
+
+set_transport_variables () {
+    tree_sha1=$(git -C "$1" write-tree)
+	commit_sha1=$(echo_without_newline "$commit_message" | git -C "$1" commit-tree $tree_sha1)
+	tag_sha1=$(echo_without_newline "$tag_content" | git -C "$1" hash-object -t tag --stdin -w)
+	tag_size=$(strlen "$tag_content")
+}
+
+# Test --batch-command remote-object-info with 'git://' transport
+
+. "$TEST_DIRECTORY"/lib-git-daemon.sh
+start_git_daemon --export-all --enable=receive-pack
+daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent
+
+test_expect_success 'create repo to be served by git-daemon' '
+	git init "$daemon_parent" &&
+	echo_without_newline "$hello_content" > $daemon_parent/hello &&
+	git -C "$daemon_parent" update-index --add hello
+'
+
+set_transport_variables "$daemon_parent"
+
+test_expect_success 'batch-command remote-object-info git://' '
+	(
+		cd "$daemon_parent" &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF &&
+		remote-object-info "$GIT_DAEMON_URL/parent" $hello_sha1
+		remote-object-info "$GIT_DAEMON_URL/parent" $tree_sha1
+		remote-object-info "$GIT_DAEMON_URL/parent" $commit_sha1
+		remote-object-info "$GIT_DAEMON_URL/parent" $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command remote-object-info git:// multiple sha1 per line' '
+	(
+		cd "$daemon_parent" &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF &&
+		remote-object-info "$GIT_DAEMON_URL/parent" $hello_sha1 $tree_sha1 $commit_sha1 $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command remote-object-info http:// default filter' '
+	(
+		cd "$daemon_parent" &&
+
+		echo "$hello_sha1 blob $hello_size" >expect &&
+		echo "$tree_sha1 tree $tree_size" >>expect &&
+		echo "$commit_sha1 commit $commit_size" >>expect &&
+		echo "$tag_sha1 tag $tag_size" >>expect &&
+		git cat-file --batch-command >actual <<-EOF &&
+		remote-object-info "$GIT_DAEMON_URL/parent" $hello_sha1 $tree_sha1
+		remote-object-info "$GIT_DAEMON_URL/parent" $commit_sha1 $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command --buffer remote-object-info git://' '
+	(
+		cd "$daemon_parent" &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" --buffer >actual <<-EOF &&
+		remote-object-info "$GIT_DAEMON_URL/parent" $hello_sha1 $tree_sha1
+		remote-object-info "$GIT_DAEMON_URL/parent" $commit_sha1 $tag_sha1
+		flush
+		EOF
+		test_cmp expect actual
+	)
+'
+
+stop_git_daemon
+
+# Test --batch-command remote-object-info with 'http://' transport
+
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+test_expect_success 'create repo to be served by http:// transport' '
+	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true &&
+	echo_without_newline "$hello_content" > $HTTPD_DOCUMENT_ROOT_PATH/http_parent/hello &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" update-index --add hello
+'
+set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent"
+
+test_expect_success 'batch-command remote-object-info http://' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1
+		remote-object-info "$HTTPD_URL/smart/http_parent" $tree_sha1
+		remote-object-info "$HTTPD_URL/smart/http_parent" $commit_sha1
+		remote-object-info "$HTTPD_URL/smart/http_parent" $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command remote-object-info http:// one line' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1 $tree_sha1 $commit_sha1 $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command --buffer remote-object-info http://' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+
+		git cat-file --batch-command="%(objectname) %(objectsize)" --buffer >actual <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1 $tree_sha1
+		remote-object-info "$HTTPD_URL/smart/http_parent" $commit_sha1 $tag_sha1
+		flush
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command remote-object-info http:// default filter' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		echo "$hello_sha1 blob $hello_size" >expect &&
+		echo "$tree_sha1 tree $tree_size" >>expect &&
+		echo "$commit_sha1 commit $commit_size" >>expect &&
+		echo "$tag_sha1 tag $tag_size" >>expect &&
+
+		git cat-file --batch-command >actual <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1 $tree_sha1
+		remote-object-info "$HTTPD_URL/smart/http_parent" $commit_sha1 $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'remote-object-info fails on unspported filter option (objectsize:disk)' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		test_must_fail git cat-file --batch-command="%(objectsize:disk)" 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1
+		EOF
+		test_i18ngrep "objectsize:disk is currently not supported with remote-object-info" err
+	)
+'
+
+test_expect_success 'remote-object-info fails on unspported filter option (deltabase)' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		test_must_fail git cat-file --batch-command="%(deltabase)" 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1
+		EOF
+		test_i18ngrep "deltabase is currently not supported with remote-object-info" err
+	)
+'
+
+test_expect_success 'remote-object-info fails on server with legacy protocol' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		test_must_fail git -c protocol.version=0 cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1
+		EOF
+		test_i18ngrep "object-info requires protocol v2" err
+	)
+'
+
+test_expect_success 'remote-object-info fails on server with legacy protocol fallback' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		test_must_fail git -c protocol.version=0 cat-file --batch-command 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1
+		EOF
+		test_i18ngrep "object-info requires protocol v2" err
+	)
+'
+
+test_expect_success 'remote-object-info fails on malformed OID' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		malformed_object_id="this_id_is_not_valid" &&
+
+		test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $malformed_object_id
+		EOF
+		test_i18ngrep "malformed object id '$malformed_object_id'" err
+	)
+'
+
+test_expect_success 'remote-object-info fails on malformed OID fallback' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		malformed_object_id="this_id_is_not_valid" &&
+
+		test_must_fail git cat-file --batch-command 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $malformed_object_id
+		EOF
+		test_i18ngrep "malformed object id '$malformed_object_id'" err
+	)
+'
+
+test_expect_success 'remote-object-info fails on missing OID' '
+	git clone "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" missing_oid_repo &&
+	test_commit -C missing_oid_repo message1 c.txt &&
+	(
+		cd missing_oid_repo &&
+		object_id=$(git rev-parse message1:c.txt) &&
+		test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $object_id
+		EOF
+		test_i18ngrep "object-info: not our ref $object_id" err
+	)
+'
+
+test_expect_success 'remote-object-info fails on missing OID fallback' '
+	(
+		cd missing_oid_repo &&
+		object_id=$(git rev-parse message1:c.txt) &&
+		test_must_fail git cat-file --batch-command 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $object_id
+		EOF
+		test_i18ngrep "fatal: remote error: upload-pack: not our ref $object_id" err
+	)
+'
+
+# Test --batch-command remote-object-info with 'file://' transport
+
+test_expect_success 'create repo to be served by file:// transport' '
+	git init server &&
+	git -C server config protocol.version 2 &&
+	echo_without_newline "$hello_content" > server/hello &&
+	git -C server update-index --add hello
+'
+
+set_transport_variables "server"
+
+test_expect_success 'batch-command remote-object-info file://' '
+	(
+		cd server &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF &&
+		remote-object-info "file://$(pwd)" $hello_sha1
+		remote-object-info "file://$(pwd)" $tree_sha1
+		remote-object-info "file://$(pwd)" $commit_sha1
+		remote-object-info "file://$(pwd)" $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command remote-object-info file:// multiple sha1 per line' '
+	(
+		cd server &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF &&
+		remote-object-info "file://$(pwd)" $hello_sha1 $tree_sha1 $commit_sha1 $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command --buffer remote-object-info file://' '
+	(
+		cd server &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" --buffer >actual <<-EOF &&
+		remote-object-info "file://$(pwd)" $hello_sha1 $tree_sha1
+		remote-object-info "file://$(pwd)" $commit_sha1 $tag_sha1
+		flush
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command remote-object-info file:// default filter' '
+	(
+		cd server &&
+
+		echo "$hello_sha1 blob $hello_size" >expect &&
+		echo "$tree_sha1 tree $tree_size" >>expect &&
+		echo "$commit_sha1 commit $commit_size" >>expect &&
+		echo "$tag_sha1 tag $tag_size" >>expect &&
+		git cat-file --batch-command >actual <<-EOF &&
+		remote-object-info "file://$(pwd)" $hello_sha1 $tree_sha1
+		remote-object-info "file://$(pwd)" $commit_sha1 $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
 test_done
-- 
2.36.0.rc2.10170.gb555eefa6f


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 2/8] fetch-pack: move fetch default settings
  2022-05-02 17:08       ` [PATCH v4 2/8] fetch-pack: move fetch default settings Calvin Wan
@ 2022-05-02 22:58         ` Junio C Hamano
  2022-05-03 23:06           ` Jonathan Tan
  0 siblings, 1 reply; 103+ messages in thread
From: Junio C Hamano @ 2022-05-02 22:58 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86, me

Calvin Wan <calvinwan@google.com> writes:

> When the state machine in do_fetch_pack_v2() is in FETCH_CHECK_LOCAL, we
> set a few v2-specific defaults. It will be helpful for a future patch to
> have these defaults set if the initial state is not FETCH_CHECK_LOCAL.


> This is a safe change since the initial state is currently always
> FETCH_CHECK_LOCAL, so we're guaranteed to hit that code whether it's in
> the state machine or not.

What does "it" (which supposed to be able to be in the state machine
and also not to be in the state matchine) in the last sentence refer
to?

>
> ---

Missing sign-off.

The patch looks correct and I agree that this is a benign no-op
because the initial value of state is FETCH_CHECK_LOCAL (i.e. the
block of code moved here will execute pretty much as the first
thing, and the relative order between that block and sorting of ref
list should not matter).  I just didn't understand the explanation
given by the patch why it is safe.

Thanks.

>  fetch-pack.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/fetch-pack.c b/fetch-pack.c
> index e06125c90a..45473b4602 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -1610,18 +1610,18 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  		reader.me = "fetch-pack";
>  	}
>  
> +	/* v2 supports these by default */
> +	allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1;
> +	use_sideband = 2;
> +	if (args->depth > 0 || args->deepen_since || args->deepen_not)
> +		args->deepen = 1;
> +
>  	while (state != FETCH_DONE) {
>  		switch (state) {
>  		case FETCH_CHECK_LOCAL:
>  			sort_ref_list(&ref, ref_compare_name);
>  			QSORT(sought, nr_sought, cmp_ref_by_name);
>  
> -			/* v2 supports these by default */
> -			allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1;
> -			use_sideband = 2;
> -			if (args->depth > 0 || args->deepen_since || args->deepen_not)
> -				args->deepen = 1;
> -
>  			/* Filter 'ref' by 'sought' and those that aren't local */
>  			mark_complete_and_common_ref(negotiator, args, &ref);
>  			filter_refs(args, &ref, sought, nr_sought);

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 3/8] object-store: add function to free object_info contents
  2022-05-02 17:08       ` [PATCH v4 3/8] object-store: add function to free object_info contents Calvin Wan
@ 2022-05-02 23:23         ` Junio C Hamano
  2022-05-04 19:09           ` Junio C Hamano
  2022-05-05  0:15           ` Junio C Hamano
  0 siblings, 2 replies; 103+ messages in thread
From: Junio C Hamano @ 2022-05-02 23:23 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86, me

Calvin Wan <calvinwan@google.com> writes:

> Introduce free_object_info_contents.
>
> ---
>  object-file.c  | 16 ++++++++++++++++
>  object-store.h |  3 +++
>  2 files changed, 19 insertions(+)
>
> diff --git a/object-file.c b/object-file.c
> index 5ffbf3d4fd..34a6e34adf 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -2659,3 +2659,19 @@ int read_loose_object(const char *path,
>  		munmap(map, mapsize);
>  	return ret;
>  }
> +
> +void free_object_info_contents(struct object_info *object_info)
> +{
> +	if (!object_info)
> +		return;

This is OK, but ...


> +	if (object_info->typep)
> +		free(object_info->typep);
> +	if (object_info->sizep)
> +		free(object_info->sizep);
> +	if (object_info->disk_sizep)
> +		free(object_info->disk_sizep);
> +	if (object_info->delta_base_oid)
> +		free(object_info->delta_base_oid);
> +	if (object_info->type_name)
> +		free(object_info->type_name);

	if (PTR)
		free(PTR);

can and should be written as

	free(PTR);

If we are reusing object_info after calling this function, we
_might_ want to use FREE_AND_NULL() instead of free().

> +}
> \ No newline at end of file

Let's avoid such incomplete lines.

> diff --git a/object-store.h b/object-store.h
> index bd2322ed8c..840e04b56f 100644
> --- a/object-store.h
> +++ b/object-store.h
> @@ -533,4 +533,7 @@ int for_each_object_in_pack(struct packed_git *p,
>  int for_each_packed_object(each_packed_object_fn, void *,
>  			   enum for_each_object_flags flags);
>  
> +/* Free pointers inside of object_info, but not object_info itself */
> +void free_object_info_contents(struct object_info *object_info);
> +
>  #endif /* OBJECT_STORE_H */

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 4/8] object-info: send attribute packet regardless of object ids
  2022-05-02 17:09       ` [PATCH v4 4/8] object-info: send attribute packet regardless of object ids Calvin Wan
@ 2022-05-03  0:05         ` Junio C Hamano
  2022-05-03 19:21           ` Calvin Wan
  2022-05-03 23:11         ` Jonathan Tan
  1 sibling, 1 reply; 103+ messages in thread
From: Junio C Hamano @ 2022-05-03  0:05 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86, me

Calvin Wan <calvinwan@google.com> writes:

> Currently on the server side of object-info, if the client does not send
> any object ids in the request or if the client requests an attribute the
> server does not support, the server will either not send any packets
> back or error out.

There is an early return when oid_str_list->nr (i.e. number of
objects we are queried) is zero, and we return without showing the
"size" token in the output (called "attribute").  The change in this
patch changes the behaviour and makes such a request responded with
"size" but without any size information for no objects.

It is unclear why that is a good change, though.

The loop that accepts requests in today's code sends an error when
it sees a line that it does not understand.  The patch stops doing
so.  Also, after ignoring such an unknown line, the requests that
were understood are responded to by send_info() function.  The patch
instead refuses to give any output in such a case.

It is unclear why either of these two makes it a good change,
though.

> Consider the scenario where the client git version is
> ahead of the server git version, and the client can request additional
> object-info besides 'size'. There needs to be a way to tell whether the
> server can honor all of the client attribute requests before the client
> sends a request with all of the object ids.

Yes.  If we want to learn about "size", "frotz", and "nitfol"
attributes about these objects, we can send "size", "frotz",
"nitfol", and then start feeding object names.  Then we can observe
that "frotz" were not liked to learn that this old server does not
understand it, and the same for "nitfol".  At least we would have
learned about size of these objects, without this patch.

Now, an updated server side with this patch would respond with
"size" and no other response.  Does it mean it does not understand
"frotz", it does not understand "nitfol", or neither?  Did we get no
response because we didn't feed objects, or because we asked for
something they don't know about?

How well does the current client work with today's server side and
the server side updated with this patch?  I _think_ this change is
essentially a no-op when there is no version skew (i.e. we do not
ask for anything today's server does not know about and we do not
necessarily ask today's server about zero objects).

Am I correct to assume that those who are talking with today's
server side are all out-of-tree custom clients?

I am wondering how much damage we are causing them with this change
in behaviour, especially with the lack of "You asked for X, but I
don't understand X", which is replaced by no output, which would be
totally unexpected by them.

It totally is possible that this "object-info" request is treated as
an experimental curiosity that is not ready for production and has
no existing users.  If this were in 2006, I would just _declare_
such is a case and move on, breaking the protocol for existing users
whose number is zero.  But I am afraid that we no longer live in
such a simpler world, so...

> In a future patch, if the
> client were to make an initial command request with only attributes, the
> server would be able to confirm which attributes it could return.
>
> ---
>  protocol-caps.c | 14 +++++++-------
>  1 file changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/protocol-caps.c b/protocol-caps.c
> index bbde91810a..bc7def0727 100644
> --- a/protocol-caps.c
> +++ b/protocol-caps.c
> @@ -11,6 +11,7 @@
>  
>  struct requested_info {
>  	unsigned size : 1;
> +	unsigned unknown : 1;
>  };

OK.

>  /*
> @@ -40,12 +41,12 @@ static void send_info(struct repository *r, struct packet_writer *writer,
>  	struct string_list_item *item;
>  	struct strbuf send_buffer = STRBUF_INIT;
>  
> -	if (!oid_str_list->nr)
> -		return;
> -
>  	if (info->size)
>  		packet_writer_write(writer, "size");
>  
> +	if (info->unknown || !oid_str_list->nr)
> +		goto release;
> +
>  	for_each_string_list_item (item, oid_str_list) {
>  		const char *oid_str = item->string;
>  		struct object_id oid;
> @@ -72,12 +73,13 @@ static void send_info(struct repository *r, struct packet_writer *writer,
>  		packet_writer_write(writer, "%s", send_buffer.buf);
>  		strbuf_reset(&send_buffer);
>  	}
> +release:
>  	strbuf_release(&send_buffer);
>  }

OK, except for the bypass for info->unknown case, which I am not
sure about.

>  int cap_object_info(struct repository *r, struct packet_reader *request)
>  {
> -	struct requested_info info;
> +	struct requested_info info = { 0 };

Wow.  We have been using info uninitialized?  Is this a silent
bugfix?

If info.size bit was on due to on-stack garbage, we would have given
our response with "size" attribute prefixed, even when the client
side never requested it.

> @@ -92,9 +94,7 @@ int cap_object_info(struct repository *r, struct packet_reader *request)
>  		if (parse_oid(request->line, &oid_str_list))
>  			continue;
>  
> -		packet_writer_error(&writer,
> -				    "object-info: unexpected line: '%s'",
> -				    request->line);
> +		info.unknown = 1;


^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 5/8] transport: add client side capability to request object-info
  2022-05-02 17:09       ` [PATCH v4 5/8] transport: add client side capability to request object-info Calvin Wan
@ 2022-05-03  0:54         ` Junio C Hamano
  2022-05-03 18:58           ` Calvin Wan
  2022-05-03 23:15         ` Jonathan Tan
  1 sibling, 1 reply; 103+ messages in thread
From: Junio C Hamano @ 2022-05-03  0:54 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86, me

Calvin Wan <calvinwan@google.com> writes:

> The question of version mismatch often comes up with client/server
> relationships. There are two cases to consider here (assuming either
> client or server functionality for object-info changes between the
> different versions):
>
>  1. client version > server version
>  	- client can request additional attributes from server
>  2. server version > client version
>  	- server can return additional info to the client
>
> The second case is a non-issue since the client would never be able to
> request additional info from the server.

That assumes that we can never remove attributes once we start
supporting them, so it is not a non-issue exactly.  The way we make
it a non-issue is to make sure that the server returns no more than
what the client has asked, i.e. the same way we deal with the first
case.

> In order to solve the first
> case, recall an earlier patch where the server sends back the attributes
> even if no object ids are sent by the client. This allows the client to
> first check whether the server can accept the requested attributes
> before sending the entire request.

It requires two round-trips when the server does not know something
the client has asked, one without any object names, and then a real
request with the list of object names?

I think in practice, because the way the previous step is
structured, you can send a full request and in the non-error
codepath the server side will understand everything and give full
response back without an extra round-trip.  Only in the error case,
the client will see the set of acceptable attributes and no object
data (and implicitly treat that as an indication of an error), but
then what would the client do after that happens?

 - The client will just give up; the attributes it wanted but the
   server couldn't supply are so fundamentally necessary to perform
   whatever the client wanted to do.

 - The client will retry the request by asking a known subset of
   attributes for the same set of objects.  This requires an extra
   round-trip.  If an earlier step didn't make .unknown case bypass
   the response loop, this extra round-trip would have been totally
   unnecessary.

> index 8c7752fc82..c27018d48c 100644
> --- a/fetch-pack.h
> +++ b/fetch-pack.h
> @@ -69,6 +69,12 @@ struct fetch_pack_args {
>  	unsigned connectivity_checked:1;
>  };
>  
> +struct object_info_args {
> +	const struct string_list *object_info_options;
> +	const struct string_list *server_options;
> +	const struct oid_array *oids;
> +};

These are const pointers because these lists are not owned by this
structure and are borrowed from somebody else?

> diff --git a/transport-helper.c b/transport-helper.c
> index b4dbbabb0c..093946f7fd 100644
> --- a/transport-helper.c
> +++ b/transport-helper.c
> @@ -686,13 +686,16 @@ static int fetch_refs(struct transport *transport,
>  
>  	/*
>  	 * If we reach here, then the server, the client, and/or the transport
> -	 * helper does not support protocol v2. --negotiate-only requires
> -	 * protocol v2.
> +	 * helper does not support protocol v2. --negotiate-only and --object-info
> +	 * require protocol v2.
>  	 */

OK.

>  	if (data->transport_options.acked_commits) {
>  		warning(_("--negotiate-only requires protocol v2"));
>  		return -1;
>  	}
> +	if (transport->smart_options->object_info) {
> +		die(_("--object-info requires protocol v2"));
> +	}

OK.

> diff --git a/transport.c b/transport.c
> index 3d64a43ab3..08c505e1d0 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -353,6 +353,79 @@ static struct ref *handshake(struct transport *transport, int for_push,
>  	return refs;
>  }
>  
> +static int fetch_object_info(struct transport *transport, struct object_info **object_info_data)
> +{
> +	size_t i;
> +	int size_index = -1;
> +	struct git_transport_data *data = transport->data;
> +	struct object_info_args args;
> +	struct packet_reader reader;
> +	struct string_list server_attributes = STRING_LIST_INIT_DUP;
> +
> +	memset(&args, 0, sizeof(args));
> +	args.server_options = transport->server_options;
> +	args.object_info_options = transport->smart_options->object_info_options;

It is unclear who fills smart_options->object_info_options and from
what.  Are we referring to something that is not yet used, or worse
yet, code that is not yet written?

It seems that the result of applying 1-5/8 does not compile.

> +	connect_setup(transport, 0);
> +	packet_reader_init(&reader, data->fd[0], NULL, 0,
> +			PACKET_READ_CHOMP_NEWLINE |
> +			PACKET_READ_DIE_ON_ERR_PACKET);
> +	data->version = discover_version(&reader);
> +
> +	transport->hash_algo = reader.hash_algo;
> +
> +	switch (data->version) {
> +	case protocol_v2:
> +		if (!server_supports_v2("object-info", 0))
> +			return -1;
> +		/**
> +		 * Send a request with only attributes first. If server can return all
> +		 * of the requested attributes, then send request with object ids
> +		 */

Doesn't it force the most pessimistic behaviour, i.e. always do a
likely-to-be-useless (when the feature becomes widely avaiable)
probe round-trip before making a real request?

> +		send_object_info_request(data->fd[1], &args);
> +		if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> +			check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> +			return -1;
> +		}

All these check_stateless_delimiter() calls in this patch are on
overly long lines.

> +		string_list_split(&server_attributes, reader.line, ' ', -1);
> +		packet_reader_read(&reader);
> +		check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> +		if (server_attributes.nr != args.object_info_options->nr)
> +			return -1;

So, I am guessing (without having the code that prepares the right
hand side) that we are comparing the number of attributes we asked
(on the right hand side) and the number of attributes the other side
said it supports in response to our request (on the left hand
side).  

This does not protect against a server that responds with attribute
names duplicated or any somewhat odd third-party servers that are
not identical to what you wrote here.  I would rather see us to loop
through object_info_options array and see if server_attributes list
says all of them are supported more explicitly.  That can be done in
the loop below, where you'd figure out the "foo_index" for each
attribute "foo" you are supporting in the code.

> +		for (i = 0; i < server_attributes.nr; i++) {
> +			if (!strcmp(server_attributes.items[i].string, "size"))
> +				size_index = i + 1;
> +		}

size_index was initialized to -1 and I was expecting we can tell the
attribute is not used by checking (size_index < 0), but this seems
to make size_index 1 based, not 0 based.  Intended?

I think this is to skip the object name that comes as the first item
in the response, but it would be more "pure" to keep foo_index
0-based and add the offset by whatever constant number of things
that come in front (currently, 1 for object name) near a lot closer
to where we parse and read the data, i.e. in the while() loop below.

> +		args.oids = transport->smart_options->object_info_oids;
> +		send_object_info_request(data->fd[1], &args);

And this is the second round-trip.

> +		break;
> +	case protocol_v1:
> +	case protocol_v0:
> +		die(_("wrong protocol version. expected v2"));
> +	case protocol_unknown_version:
> +		BUG("unknown protocol version");
> +	}
> +
> +	if (packet_reader_read(&reader) != PACKET_READ_NORMAL)
> +		die(_("error reading object info header"));
> +	i = 0;
> +	while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
> +		struct string_list object_info_values = STRING_LIST_INIT_DUP;
> +
> +		string_list_split(&object_info_values, reader.line, ' ', -1);
> +		if (size_index > 0) {
> +			if (!strcmp(object_info_values.items[size_index].string, ""))
> +				die("object-info: not our ref %s",
> +					object_info_values.items[0].string);
> +			*(*object_info_data)[i].sizep = strtoul(object_info_values.items[size_index].string, NULL, 10);
> +		}
> +		i++;
> +	}
> +	check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> +
> +	return 0;
> +}

All of the above look written for the best case scenario (i.e. it
assumes there won't be any unexpected type of response), which makes
a good fuzz target.  A malicious server side can throw non-number in
the "size" slot and strtoul() does not check for a malformatted
number, or it can return more response records than it was
requested, and overflow object_info_data[] array the caller of this
function prepared.

smart_options->object_info_oids may know how many response records
we should expect, so at least this while() loop should be able to
protect against an object_info_data[] overflow attack using it as
the upper limit of i.

>  static struct ref *get_refs_via_connect(struct transport *transport, int for_push,
>  					struct transport_ls_refs_options *options)
>  {
> @@ -392,8 +465,14 @@ static int fetch_refs_via_pack(struct transport *transport,
>  	args.server_options = transport->server_options;
>  	args.negotiation_tips = data->options.negotiation_tips;
>  	args.reject_shallow_remote = transport->smart_options->reject_shallow;
> +	args.object_info = transport->smart_options->object_info;
>  
> -	if (!data->got_remote_heads) {
> +	if (transport->smart_options && transport->smart_options->object_info) {
> +		if (!fetch_object_info(transport, data->options.object_info_data))
> +			goto cleanup;
> +		ret = -1;
> +		goto cleanup;
> +	} else if (!data->got_remote_heads) {
>  		int i;
>  		int must_list_refs = 0;
>  		for (i = 0; i < nr_heads; i++) {
> diff --git a/transport.h b/transport.h
> index 12bc08fc33..dbf60bb710 100644
> --- a/transport.h
> +++ b/transport.h
> @@ -6,6 +6,7 @@
>  #include "remote.h"
>  #include "list-objects-filter-options.h"
>  #include "string-list.h"
> +#include "object-store.h"
>  
>  struct git_transport_options {
>  	unsigned thin : 1;
> @@ -31,6 +32,12 @@ struct git_transport_options {
>  	 */
>  	unsigned connectivity_checked:1;
>  
> +	/*
> +	 * Transport will attempt to pull only object-info. Fallbacks
> +	 * to pulling entire object if object-info is not supported
> +	 */
> +	unsigned object_info : 1;
> +
>  	int depth;
>  	const char *deepen_since;
>  	const struct string_list *deepen_not;
> @@ -54,6 +61,10 @@ struct git_transport_options {
>  	 * common commits to this oidset instead of fetching any packfiles.
>  	 */
>  	struct oidset *acked_commits;
> +
> +	struct oid_array *object_info_oids;
> +	struct object_info **object_info_data;
> +	struct string_list *object_info_options;
>  };
>  
>  enum transport_family {

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 5/8] transport: add client side capability to request object-info
  2022-05-03  0:54         ` Junio C Hamano
@ 2022-05-03 18:58           ` Calvin Wan
  2022-05-04 15:42             ` Junio C Hamano
  0 siblings, 1 reply; 103+ messages in thread
From: Calvin Wan @ 2022-05-03 18:58 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, jonathantanmy, philipoakley, johncai86, me

> It requires two round-trips when the server does not know something
> the client has asked, one without any object names, and then a real
> request with the list of object names?

> I think in practice, because the way the previous step is
> structured, you can send a full request and in the non-error
> codepath the server side will understand everything and give full
> response back without an extra round-trip.  Only in the error case,
> the client will see the set of acceptable attributes and no object
> data (and implicitly treat that as an indication of an error), but
> then what would the client do after that happens?

> - The client will just give up; the attributes it wanted but the
>   server couldn't supply are so fundamentally necessary to perform
>   whatever the client wanted to do.

> - The client will retry the request by asking a known subset of
>   attributes for the same set of objects.  This requires an extra
>   round-trip.  If an earlier step didn't make .unknown case bypass
>   the response loop, this extra round-trip would have been totally
>   unnecessary.

I do agree that the two round trips are unnecessary and I can send the
full request to begin with. The client (after patch 6) will fallback to
fetch after this initial request fails.

> These are const pointers because these lists are not owned by this
> structure and are borrowed from somebody else?

Yes git_transport_options holds them.

> It is unclear who fills smart_options->object_info_options and from
> what.  Are we referring to something that is not yet used, or worse
> yet, code that is not yet written?

The code that calls this function is in patch 8. I decided to separate
out the transport side and the new command in cat-file --batch-command
to make it easier to explain to a reviewer how the individual parts
work. Looking holistically at your comments, should I have gone about
splitting the patches differently? It seems like I was a little too
aggressive this time, creating confusion by requiring the reviewer to
look at the next patch to answer some questions.

> It seems that the result of applying 1-5/8 does not compile.

I'll check why this is the case. I should get in the habit of compiling
all of my patches individually.

> size_index was initialized to -1 and I was expecting we can tell the
> attribute is not used by checking (size_index < 0), but this seems
> to make size_index 1 based, not 0 based.  Intended?

Yes the server returns the packet as "object_id SP size" so the first
value is always the object_id.

> All of the above look written for the best case scenario (i.e. it
> assumes there won't be any unexpected type of response), which makes
> a good fuzz target.

I didn't take into account the possibility of a malicious server in my
implementation. Will go back through and protect against it.


On Mon, May 2, 2022 at 5:54 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Calvin Wan <calvinwan@google.com> writes:
>
> > The question of version mismatch often comes up with client/server
> > relationships. There are two cases to consider here (assuming either
> > client or server functionality for object-info changes between the
> > different versions):
> >
> >  1. client version > server version
> >       - client can request additional attributes from server
> >  2. server version > client version
> >       - server can return additional info to the client
> >
> > The second case is a non-issue since the client would never be able to
> > request additional info from the server.
>
> That assumes that we can never remove attributes once we start
> supporting them, so it is not a non-issue exactly.  The way we make
> it a non-issue is to make sure that the server returns no more than
> what the client has asked, i.e. the same way we deal with the first
> case.
>
> > In order to solve the first
> > case, recall an earlier patch where the server sends back the attributes
> > even if no object ids are sent by the client. This allows the client to
> > first check whether the server can accept the requested attributes
> > before sending the entire request.
>
> It requires two round-trips when the server does not know something
> the client has asked, one without any object names, and then a real
> request with the list of object names?
>
> I think in practice, because the way the previous step is
> structured, you can send a full request and in the non-error
> codepath the server side will understand everything and give full
> response back without an extra round-trip.  Only in the error case,
> the client will see the set of acceptable attributes and no object
> data (and implicitly treat that as an indication of an error), but
> then what would the client do after that happens?
>
>  - The client will just give up; the attributes it wanted but the
>    server couldn't supply are so fundamentally necessary to perform
>    whatever the client wanted to do.
>
>  - The client will retry the request by asking a known subset of
>    attributes for the same set of objects.  This requires an extra
>    round-trip.  If an earlier step didn't make .unknown case bypass
>    the response loop, this extra round-trip would have been totally
>    unnecessary.
>
> > index 8c7752fc82..c27018d48c 100644
> > --- a/fetch-pack.h
> > +++ b/fetch-pack.h
> > @@ -69,6 +69,12 @@ struct fetch_pack_args {
> >       unsigned connectivity_checked:1;
> >  };
> >
> > +struct object_info_args {
> > +     const struct string_list *object_info_options;
> > +     const struct string_list *server_options;
> > +     const struct oid_array *oids;
> > +};
>
> These are const pointers because these lists are not owned by this
> structure and are borrowed from somebody else?
>
> > diff --git a/transport-helper.c b/transport-helper.c
> > index b4dbbabb0c..093946f7fd 100644
> > --- a/transport-helper.c
> > +++ b/transport-helper.c
> > @@ -686,13 +686,16 @@ static int fetch_refs(struct transport *transport,
> >
> >       /*
> >        * If we reach here, then the server, the client, and/or the transport
> > -      * helper does not support protocol v2. --negotiate-only requires
> > -      * protocol v2.
> > +      * helper does not support protocol v2. --negotiate-only and --object-info
> > +      * require protocol v2.
> >        */
>
> OK.
>
> >       if (data->transport_options.acked_commits) {
> >               warning(_("--negotiate-only requires protocol v2"));
> >               return -1;
> >       }
> > +     if (transport->smart_options->object_info) {
> > +             die(_("--object-info requires protocol v2"));
> > +     }
>
> OK.
>
> > diff --git a/transport.c b/transport.c
> > index 3d64a43ab3..08c505e1d0 100644
> > --- a/transport.c
> > +++ b/transport.c
> > @@ -353,6 +353,79 @@ static struct ref *handshake(struct transport *transport, int for_push,
> >       return refs;
> >  }
> >
> > +static int fetch_object_info(struct transport *transport, struct object_info **object_info_data)
> > +{
> > +     size_t i;
> > +     int size_index = -1;
> > +     struct git_transport_data *data = transport->data;
> > +     struct object_info_args args;
> > +     struct packet_reader reader;
> > +     struct string_list server_attributes = STRING_LIST_INIT_DUP;
> > +
> > +     memset(&args, 0, sizeof(args));
> > +     args.server_options = transport->server_options;
> > +     args.object_info_options = transport->smart_options->object_info_options;
>
> It is unclear who fills smart_options->object_info_options and from
> what.  Are we referring to something that is not yet used, or worse
> yet, code that is not yet written?
>
> It seems that the result of applying 1-5/8 does not compile.
>
> > +     connect_setup(transport, 0);
> > +     packet_reader_init(&reader, data->fd[0], NULL, 0,
> > +                     PACKET_READ_CHOMP_NEWLINE |
> > +                     PACKET_READ_DIE_ON_ERR_PACKET);
> > +     data->version = discover_version(&reader);
> > +
> > +     transport->hash_algo = reader.hash_algo;
> > +
> > +     switch (data->version) {
> > +     case protocol_v2:
> > +             if (!server_supports_v2("object-info", 0))
> > +                     return -1;
> > +             /**
> > +              * Send a request with only attributes first. If server can return all
> > +              * of the requested attributes, then send request with object ids
> > +              */
>
> Doesn't it force the most pessimistic behaviour, i.e. always do a
> likely-to-be-useless (when the feature becomes widely avaiable)
> probe round-trip before making a real request?
>
> > +             send_object_info_request(data->fd[1], &args);
> > +             if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> > +                     check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> > +                     return -1;
> > +             }
>
> All these check_stateless_delimiter() calls in this patch are on
> overly long lines.
>
> > +             string_list_split(&server_attributes, reader.line, ' ', -1);
> > +             packet_reader_read(&reader);
> > +             check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> > +             if (server_attributes.nr != args.object_info_options->nr)
> > +                     return -1;
>
> So, I am guessing (without having the code that prepares the right
> hand side) that we are comparing the number of attributes we asked
> (on the right hand side) and the number of attributes the other side
> said it supports in response to our request (on the left hand
> side).
>
> This does not protect against a server that responds with attribute
> names duplicated or any somewhat odd third-party servers that are
> not identical to what you wrote here.  I would rather see us to loop
> through object_info_options array and see if server_attributes list
> says all of them are supported more explicitly.  That can be done in
> the loop below, where you'd figure out the "foo_index" for each
> attribute "foo" you are supporting in the code.
>
> > +             for (i = 0; i < server_attributes.nr; i++) {
> > +                     if (!strcmp(server_attributes.items[i].string, "size"))
> > +                             size_index = i + 1;
> > +             }
>
> size_index was initialized to -1 and I was expecting we can tell the
> attribute is not used by checking (size_index < 0), but this seems
> to make size_index 1 based, not 0 based.  Intended?
>
> I think this is to skip the object name that comes as the first item
> in the response, but it would be more "pure" to keep foo_index
> 0-based and add the offset by whatever constant number of things
> that come in front (currently, 1 for object name) near a lot closer
> to where we parse and read the data, i.e. in the while() loop below.
>
> > +             args.oids = transport->smart_options->object_info_oids;
> > +             send_object_info_request(data->fd[1], &args);
>
> And this is the second round-trip.
>
> > +             break;
> > +     case protocol_v1:
> > +     case protocol_v0:
> > +             die(_("wrong protocol version. expected v2"));
> > +     case protocol_unknown_version:
> > +             BUG("unknown protocol version");
> > +     }
> > +
> > +     if (packet_reader_read(&reader) != PACKET_READ_NORMAL)
> > +             die(_("error reading object info header"));
> > +     i = 0;
> > +     while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
> > +             struct string_list object_info_values = STRING_LIST_INIT_DUP;
> > +
> > +             string_list_split(&object_info_values, reader.line, ' ', -1);
> > +             if (size_index > 0) {
> > +                     if (!strcmp(object_info_values.items[size_index].string, ""))
> > +                             die("object-info: not our ref %s",
> > +                                     object_info_values.items[0].string);
> > +                     *(*object_info_data)[i].sizep = strtoul(object_info_values.items[size_index].string, NULL, 10);
> > +             }
> > +             i++;
> > +     }
> > +     check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> > +
> > +     return 0;
> > +}
>
> All of the above look written for the best case scenario (i.e. it
> assumes there won't be any unexpected type of response), which makes
> a good fuzz target.  A malicious server side can throw non-number in
> the "size" slot and strtoul() does not check for a malformatted
> number, or it can return more response records than it was
> requested, and overflow object_info_data[] array the caller of this
> function prepared.
>
> smart_options->object_info_oids may know how many response records
> we should expect, so at least this while() loop should be able to
> protect against an object_info_data[] overflow attack using it as
> the upper limit of i.
>
> >  static struct ref *get_refs_via_connect(struct transport *transport, int for_push,
> >                                       struct transport_ls_refs_options *options)
> >  {
> > @@ -392,8 +465,14 @@ static int fetch_refs_via_pack(struct transport *transport,
> >       args.server_options = transport->server_options;
> >       args.negotiation_tips = data->options.negotiation_tips;
> >       args.reject_shallow_remote = transport->smart_options->reject_shallow;
> > +     args.object_info = transport->smart_options->object_info;
> >
> > -     if (!data->got_remote_heads) {
> > +     if (transport->smart_options && transport->smart_options->object_info) {
> > +             if (!fetch_object_info(transport, data->options.object_info_data))
> > +                     goto cleanup;
> > +             ret = -1;
> > +             goto cleanup;
> > +     } else if (!data->got_remote_heads) {
> >               int i;
> >               int must_list_refs = 0;
> >               for (i = 0; i < nr_heads; i++) {
> > diff --git a/transport.h b/transport.h
> > index 12bc08fc33..dbf60bb710 100644
> > --- a/transport.h
> > +++ b/transport.h
> > @@ -6,6 +6,7 @@
> >  #include "remote.h"
> >  #include "list-objects-filter-options.h"
> >  #include "string-list.h"
> > +#include "object-store.h"
> >
> >  struct git_transport_options {
> >       unsigned thin : 1;
> > @@ -31,6 +32,12 @@ struct git_transport_options {
> >        */
> >       unsigned connectivity_checked:1;
> >
> > +     /*
> > +      * Transport will attempt to pull only object-info. Fallbacks
> > +      * to pulling entire object if object-info is not supported
> > +      */
> > +     unsigned object_info : 1;
> > +
> >       int depth;
> >       const char *deepen_since;
> >       const struct string_list *deepen_not;
> > @@ -54,6 +61,10 @@ struct git_transport_options {
> >        * common commits to this oidset instead of fetching any packfiles.
> >        */
> >       struct oidset *acked_commits;
> > +
> > +     struct oid_array *object_info_oids;
> > +     struct object_info **object_info_data;
> > +     struct string_list *object_info_options;
> >  };
> >
> >  enum transport_family {

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 4/8] object-info: send attribute packet regardless of object ids
  2022-05-03  0:05         ` Junio C Hamano
@ 2022-05-03 19:21           ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-05-03 19:21 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, jonathantanmy, philipoakley, johncai86, me

> Now, an updated server side with this patch would respond with
> "size" and no other response.  Does it mean it does not understand
> "frotz", it does not understand "nitfol", or neither?  Did we get no
> response because we didn't feed objects, or because we asked for
> something they don't know about?

Patches 4-6 describe the following design idea I had:
 - Send initial request with only attributes
 - If server can fulfill object-info attributes, then go ahead
 - Else fallback to fetch

I responded to your comments on patch 5 regarding not needing
that first request, and sending the entire request from the beginning
so I imagine v5 would look something like this instead:
 - Send full request
 - Server either responds with a fulfilled request or only the attributes
   it can return.
 - Fallback to fetch if request is unfulfilled

> How well does the current client work with today's server side and
> the server side updated with this patch?  I _think_ this change is
> essentially a no-op when there is no version skew

You are correct that this change is essentially a no-op when there is
no version skew, which is intended. There is no current client; it's
implemented in patch 5.

> Am I correct to assume that those who are talking with today's
> server side are all out-of-tree custom clients?

> I am wondering how much damage we are causing them with this change
> in behaviour, especially with the lack of "You asked for X, but I
> don't understand X", which is replaced by no output, which would be
> totally unexpected by them.

Internally we don't have anyone using this yet. If we want to be
totally safe, I am also OK with having object-info return everything
it understands about the request rather than nothing. This was
intended to be an optimization since we would be defaulting back
to fetch to get object-info rather than only returning what object-info
knows.

> Wow.  We have been using info uninitialized?  Is this a silent
> bugfix?

Yes unfortunately.
On Mon, May 2, 2022 at 5:06 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Calvin Wan <calvinwan@google.com> writes:
>
> > Currently on the server side of object-info, if the client does not send
> > any object ids in the request or if the client requests an attribute the
> > server does not support, the server will either not send any packets
> > back or error out.
>
> There is an early return when oid_str_list->nr (i.e. number of
> objects we are queried) is zero, and we return without showing the
> "size" token in the output (called "attribute").  The change in this
> patch changes the behaviour and makes such a request responded with
> "size" but without any size information for no objects.
>
> It is unclear why that is a good change, though.
>
> The loop that accepts requests in today's code sends an error when
> it sees a line that it does not understand.  The patch stops doing
> so.  Also, after ignoring such an unknown line, the requests that
> were understood are responded to by send_info() function.  The patch
> instead refuses to give any output in such a case.
>
> It is unclear why either of these two makes it a good change,
> though.
>
> > Consider the scenario where the client git version is
> > ahead of the server git version, and the client can request additional
> > object-info besides 'size'. There needs to be a way to tell whether the
> > server can honor all of the client attribute requests before the client
> > sends a request with all of the object ids.
>
> Yes.  If we want to learn about "size", "frotz", and "nitfol"
> attributes about these objects, we can send "size", "frotz",
> "nitfol", and then start feeding object names.  Then we can observe
> that "frotz" were not liked to learn that this old server does not
> understand it, and the same for "nitfol".  At least we would have
> learned about size of these objects, without this patch.
>
> Now, an updated server side with this patch would respond with
> "size" and no other response.  Does it mean it does not understand
> "frotz", it does not understand "nitfol", or neither?  Did we get no
> response because we didn't feed objects, or because we asked for
> something they don't know about?
>
> How well does the current client work with today's server side and
> the server side updated with this patch?  I _think_ this change is
> essentially a no-op when there is no version skew (i.e. we do not
> ask for anything today's server does not know about and we do not
> necessarily ask today's server about zero objects).
>
> Am I correct to assume that those who are talking with today's
> server side are all out-of-tree custom clients?
>
> I am wondering how much damage we are causing them with this change
> in behaviour, especially with the lack of "You asked for X, but I
> don't understand X", which is replaced by no output, which would be
> totally unexpected by them.
>
> It totally is possible that this "object-info" request is treated as
> an experimental curiosity that is not ready for production and has
> no existing users.  If this were in 2006, I would just _declare_
> such is a case and move on, breaking the protocol for existing users
> whose number is zero.  But I am afraid that we no longer live in
> such a simpler world, so...
>
> > In a future patch, if the
> > client were to make an initial command request with only attributes, the
> > server would be able to confirm which attributes it could return.
> >
> > ---
> >  protocol-caps.c | 14 +++++++-------
> >  1 file changed, 7 insertions(+), 7 deletions(-)
> >
> > diff --git a/protocol-caps.c b/protocol-caps.c
> > index bbde91810a..bc7def0727 100644
> > --- a/protocol-caps.c
> > +++ b/protocol-caps.c
> > @@ -11,6 +11,7 @@
> >
> >  struct requested_info {
> >       unsigned size : 1;
> > +     unsigned unknown : 1;
> >  };
>
> OK.
>
> >  /*
> > @@ -40,12 +41,12 @@ static void send_info(struct repository *r, struct packet_writer *writer,
> >       struct string_list_item *item;
> >       struct strbuf send_buffer = STRBUF_INIT;
> >
> > -     if (!oid_str_list->nr)
> > -             return;
> > -
> >       if (info->size)
> >               packet_writer_write(writer, "size");
> >
> > +     if (info->unknown || !oid_str_list->nr)
> > +             goto release;
> > +
> >       for_each_string_list_item (item, oid_str_list) {
> >               const char *oid_str = item->string;
> >               struct object_id oid;
> > @@ -72,12 +73,13 @@ static void send_info(struct repository *r, struct packet_writer *writer,
> >               packet_writer_write(writer, "%s", send_buffer.buf);
> >               strbuf_reset(&send_buffer);
> >       }
> > +release:
> >       strbuf_release(&send_buffer);
> >  }
>
> OK, except for the bypass for info->unknown case, which I am not
> sure about.
>
> >  int cap_object_info(struct repository *r, struct packet_reader *request)
> >  {
> > -     struct requested_info info;
> > +     struct requested_info info = { 0 };
>
> Wow.  We have been using info uninitialized?  Is this a silent
> bugfix?
>
> If info.size bit was on due to on-stack garbage, we would have given
> our response with "size" attribute prefixed, even when the client
> side never requested it.
>
> > @@ -92,9 +94,7 @@ int cap_object_info(struct repository *r, struct packet_reader *request)
> >               if (parse_oid(request->line, &oid_str_list))
> >                       continue;
> >
> > -             packet_writer_error(&writer,
> > -                                 "object-info: unexpected line: '%s'",
> > -                                 request->line);
> > +             info.unknown = 1;
>

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 2/8] fetch-pack: move fetch default settings
  2022-05-02 22:58         ` Junio C Hamano
@ 2022-05-03 23:06           ` Jonathan Tan
  2022-05-05 18:13             ` Calvin Wan
  0 siblings, 1 reply; 103+ messages in thread
From: Jonathan Tan @ 2022-05-03 23:06 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jonathan Tan, Calvin Wan, git, philipoakley, johncai86, me

Junio C Hamano <gitster@pobox.com> writes:
> Calvin Wan <calvinwan@google.com> writes:
> 
> > When the state machine in do_fetch_pack_v2() is in FETCH_CHECK_LOCAL, we
> > set a few v2-specific defaults. It will be helpful for a future patch to
> > have these defaults set if the initial state is not FETCH_CHECK_LOCAL.
> 
> 
> > This is a safe change since the initial state is currently always
> > FETCH_CHECK_LOCAL, so we're guaranteed to hit that code whether it's in
> > the state machine or not.
> 
> What does "it" (which supposed to be able to be in the state machine
> and also not to be in the state matchine) in the last sentence refer
> to?

I think the "it" refers to the initialization code that was moved.

> The patch looks correct and I agree that this is a benign no-op
> because the initial value of state is FETCH_CHECK_LOCAL (i.e. the
> block of code moved here will execute pretty much as the first
> thing, and the relative order between that block and sorting of ref
> list should not matter).  I just didn't understand the explanation
> given by the patch why it is safe.
> 
> Thanks.

Agreed - I think the commit message would be better off if it was worded
something like:

  There are some initialization steps that need to be done at the start
  of the execution of the do_fetch_pack_v2() state machine. Currently,
  these are done in the FETCH_CHECK_LOCAL state, which is the initial
  state.

  However, a subsequent patch will allow for another initial state,
  while still requiring these initialization steps. Therefore, move
  these initialization steps to before the state machine, so that they
  are run regardless of the initial state.

I think this description suffices for a reviewer to see that it is safe,
but if you want, you can add:

  Note that there is no change in behavior, because we're moving code
  from the beginning of the first state to just before the execution of
  the state machine.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 4/8] object-info: send attribute packet regardless of object ids
  2022-05-02 17:09       ` [PATCH v4 4/8] object-info: send attribute packet regardless of object ids Calvin Wan
  2022-05-03  0:05         ` Junio C Hamano
@ 2022-05-03 23:11         ` Jonathan Tan
  1 sibling, 0 replies; 103+ messages in thread
From: Jonathan Tan @ 2022-05-03 23:11 UTC (permalink / raw)
  To: Calvin Wan; +Cc: Jonathan Tan, git, gitster, philipoakley, johncai86, me

Calvin Wan <calvinwan@google.com> writes:
> Currently on the server side of object-info, if the client does not send
> any object ids in the request or if the client requests an attribute the
> server does not support, the server will either not send any packets
> back or error out. Consider the scenario where the client git version is
> ahead of the server git version, and the client can request additional
> object-info besides 'size'. There needs to be a way to tell whether the
> server can honor all of the client attribute requests before the client
> sends a request with all of the object ids.  

This part explains the problem...

> In a future patch, if the
> client were to make an initial command request with only attributes, the
> server would be able to confirm which attributes it could return.

...and this part explains what we want to do in the future, but this
commit message doesn't explain what this commit does. (The commit
subject has something related, but does not have enough detail. For
example, is it an empty attribute packet?)

Having said that, the way we usually communicate client-server version
differences is through capabilities - is there a reason why we can't use
that here?

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 5/8] transport: add client side capability to request object-info
  2022-05-02 17:09       ` [PATCH v4 5/8] transport: add client side capability to request object-info Calvin Wan
  2022-05-03  0:54         ` Junio C Hamano
@ 2022-05-03 23:15         ` Jonathan Tan
  2022-05-04 15:50           ` Junio C Hamano
  1 sibling, 1 reply; 103+ messages in thread
From: Jonathan Tan @ 2022-05-03 23:15 UTC (permalink / raw)
  To: Calvin Wan; +Cc: Jonathan Tan, git, gitster, philipoakley, johncai86, me

Calvin Wan <calvinwan@google.com> writes:
> Sometimes it is useful to get information about an object without having
> to download it completely. The server logic has already been implemented
> as “a2ba162cda (object-info: support for retrieving object info,
> 2021-04-20)”. This patch implements the client option for it. Currently,
> only 'size' is supported and the server must be v2, however future
> patches can implement additional options.
> 
> The question of version mismatch often comes up with client/server
> relationships. There are two cases to consider here (assuming either
> client or server functionality for object-info changes between the
> different versions):
> 
>  1. client version > server version
>  	- client can request additional attributes from server
>  2. server version > client version
>  	- server can return additional info to the client
> 
> The second case is a non-issue since the client would never be able to
> request additional info from the server.  In order to solve the first
> case, recall an earlier patch where the server sends back the attributes
> even if no object ids are sent by the client. This allows the client to
> first check whether the server can accept the requested attributes
> before sending the entire request.

From this description, it seems like the intention is to send an
object-info request, and then if the server responds in a certain way
(here, sending back the attributes - presumably without sending any
actual information), then we know that the server doesn't support our
request and we need to fall back. As Junio says [1], this requires one
RTT more than necessary. We could just determine if the server supports
the attributes we want by using capabilities (without needing to send
the request to check).

[1] https://lore.kernel.org/git/xmqqilqnsaep.fsf@gitster.g/

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 6/8] transport: add object-info fallback to fetch
  2022-05-02 17:09       ` [PATCH v4 6/8] transport: add object-info fallback to fetch Calvin Wan
@ 2022-05-03 23:27         ` Jonathan Tan
  0 siblings, 0 replies; 103+ messages in thread
From: Jonathan Tan @ 2022-05-03 23:27 UTC (permalink / raw)
  To: Calvin Wan; +Cc: Jonathan Tan, git, gitster, philipoakley, johncai86, me

Calvin Wan <calvinwan@google.com> writes:
> If a v2 server does not support object-info or if the client requests
> information that isn't supported by object-info, fetch the objects as if
> it were a standard v2 fetch (however without changing any refs).

What do you mean by "however without changing any refs"? (I would have
expected that no refs would be changed, so this, to me, implies that we
would expect some refs to be changed.)

> diff --git a/fetch-pack.c b/fetch-pack.c
> index 506ca28e05..938d082b80 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -1639,6 +1639,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  	if (args->depth > 0 || args->deepen_since || args->deepen_not)
>  		args->deepen = 1;
>  
> +	if (args->object_info)
> +		state = FETCH_SEND_REQUEST;
> +
>  	while (state != FETCH_DONE) {
>  		switch (state) {
>  		case FETCH_CHECK_LOCAL:
> @@ -1648,7 +1651,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  			/* Filter 'ref' by 'sought' and those that aren't local */
>  			mark_complete_and_common_ref(negotiator, args, &ref);
>  			filter_refs(args, &ref, sought, nr_sought);
> -			if (!args->refetch && everything_local(args, &ref))
> +			if (!args->refetch && !args->object_info && everything_local(args, &ref))
>  				state = FETCH_DONE;
>  			else
>  				state = FETCH_SEND_REQUEST;

I think that the purpose of all these changes is to skip certain steps
when we know that we're fetching as a fallback for object-info, but I
don't think they're necessary - it's fine to not fetch certain objects
if we have them present.

> @@ -2035,6 +2038,15 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
>  		args->connectivity_checked = 1;
>  	}
>  
> +	if (args->object_info) {
> +		struct ref *ref_cpy_reader = ref_cpy;
> +		int i = 0;
> +		while (ref_cpy_reader) {
> +			oid_object_info_extended(the_repository, &ref_cpy_reader->old_oid, &(*args->object_info_data)[i], OBJECT_INFO_LOOKUP_REPLACE);
> +			ref_cpy_reader = ref_cpy_reader->next;
> +			i++;
> +		}
> +	}

Could this part be done in the same function that falls back
(fetch_refs_via_pack(), I believe)? That would make the code easier to
understand - here, we don't know why we would need to copy the OIDs, but
in the function that falls back, we can look up and see that this is
done because we couldn't do something else.

> diff --git a/transport.c b/transport.c
> index 08c505e1d0..b85e52d9a8 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -436,10 +436,12 @@ static int fetch_refs_via_pack(struct transport *transport,
>  			       int nr_heads, struct ref **to_fetch)
>  {
>  	int ret = 0;
> +	size_t i;
>  	struct git_transport_data *data = transport->data;
>  	struct ref *refs = NULL;
>  	struct fetch_pack_args args;
>  	struct ref *refs_tmp = NULL;
> +	struct ref *wanted_refs = xcalloc(1, sizeof (struct ref));

If you only need these new variables in a block, declare them there (and
free them at the end of that block).

> @@ -489,7 +500,7 @@ static int fetch_refs_via_pack(struct transport *transport,
>  	else if (data->version <= protocol_v1)
>  		die_if_server_options(transport);
>  
> -	if (data->options.acked_commits) {
> +	if (data->options.acked_commits && !transport->smart_options->object_info) {
>  		if (data->version < protocol_v2) {
>  			warning(_("--negotiate-only requires protocol v2"));
>  			ret = -1;

Is this addition necessary? --negotiate-only and object-info do not work
together.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 5/8] transport: add client side capability to request object-info
  2022-05-03 18:58           ` Calvin Wan
@ 2022-05-04 15:42             ` Junio C Hamano
  0 siblings, 0 replies; 103+ messages in thread
From: Junio C Hamano @ 2022-05-04 15:42 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86, me

Calvin Wan <calvinwan@google.com> writes:

>> It seems that the result of applying 1-5/8 does not compile.
>
> I'll check why this is the case. I should get in the habit of compiling
> all of my patches individually.

For a 8-patch series, we should make sure that all 8 states resuting
from applying patches 1-thru-N for each N (1 <= N <= 8) builds and
tests OK for bisectability.

I often do not check that due to lack of time on the receiving end,
but for this series, I didn't understand the sudden appearance of
the object_info_options member in the code that didn't even seem to
be populated anywhere, and I noticed that the series was organized
incorrectly.  Perhaps a simple rebase misordering?

>> size_index was initialized to -1 and I was expecting we can tell the
>> attribute is not used by checking (size_index < 0), but this seems
>> to make size_index 1 based, not 0 based.  Intended?
>
> Yes the server returns the packet as "object_id SP size" so the first
> value is always the object_id.

I think this is to skip the object name that comes as the first item
in the response, but it would be more "pure" to keep foo_index
0-based and add the offset by whatever constant number of things
that come in front (currently, 1 for object name) near a lot closer
to where we parse and read the data, i.e. in the while() loop below.

IOW, the code that sets size_index based on the attribute query
response should not have to know how many fixed elements will come
before these attributes on the response lines.  That knowledge
belongs to the code below:

> +     i = 0;
> +     while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
> +             struct string_list object_info_values = STRING_LIST_INIT_DUP;
> +
> +             string_list_split(&object_info_values, reader.line, ' ', -1);
> +             if (size_index > 0) {
> +                     if (!strcmp(object_info_values.items[size_index].string, ""))

And here the code that uses size_index should be like

		if (0 <= size_index &&
		    !strcmp(object_info_values.items[1 + size_index].string, ""))
			...

if we wanted the logic to be more "pure" and keep foo_index 0-based.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 5/8] transport: add client side capability to request object-info
  2022-05-03 23:15         ` Jonathan Tan
@ 2022-05-04 15:50           ` Junio C Hamano
  0 siblings, 0 replies; 103+ messages in thread
From: Junio C Hamano @ 2022-05-04 15:50 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: Calvin Wan, git, philipoakley, johncai86, me

Jonathan Tan <jonathantanmy@google.com> writes:

> RTT more than necessary. We could just determine if the server supports
> the attributes we want by using capabilities (without needing to send
> the request to check).

Hmph.  capabilities may cut in both ways, though.

Those clients that are not interested in object-info at all (which
are the majority of case where people clone, fetch or ls-remote),
they do not even need to know what kind of object attributes the
object-info command is prepared to report, and they would appreciate
not having to spend any extra byte in the capability-advertisement.

Of course, for object-info clients, having it upfront does make
things simpler.

So...



^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 3/8] object-store: add function to free object_info contents
  2022-05-02 23:23         ` Junio C Hamano
@ 2022-05-04 19:09           ` Junio C Hamano
  2022-05-05  0:15           ` Junio C Hamano
  1 sibling, 0 replies; 103+ messages in thread
From: Junio C Hamano @ 2022-05-04 19:09 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86, me

Junio C Hamano <gitster@pobox.com> writes:

>> +}
>> \ No newline at end of file
>
> Let's avoid such incomplete lines.

As this breaks my build cycle ("make sparse" is part of my
post-integration check), I have added this workaround on the tip of
the topic, but please make sure I do not have to redo that when you
reroll.

Thanks.

Subject: [PATCH] SQUASH??? compilation fix

"make sparse" rightfully complains that this file ends in an
incomplete line.
---
 object-file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/object-file.c b/object-file.c
index 34a6e34adf..c6addb6969 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2674,4 +2674,4 @@ void free_object_info_contents(struct object_info *object_info)
 		free(object_info->delta_base_oid);
 	if (object_info->type_name)
 		free(object_info->type_name);
-}
\ No newline at end of file
+}
-- 
2.36.0-244-ge81c54a17b


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 8/8] cat-file: add --batch-command remote-object-info command
  2022-05-02 17:09       ` [PATCH v4 8/8] cat-file: add --batch-command remote-object-info command Calvin Wan
@ 2022-05-04 21:27         ` Jonathan Tan
  2022-05-05 18:13           ` Calvin Wan
  2022-07-31 15:24         ` ZheNing Hu
  1 sibling, 1 reply; 103+ messages in thread
From: Jonathan Tan @ 2022-05-04 21:27 UTC (permalink / raw)
  To: Calvin Wan; +Cc: Jonathan Tan, git, gitster, philipoakley, johncai86, me

Thanks for the patch. I'll limit myself to high-level comments for now,
since the design may need to change if we want to support mixing
remote-object-info with other commands in a --batch-command --buffer
invocation.

Calvin Wan <calvinwan@google.com> writes:
> Since the `info` command in cat-file --batch-command prints object info
> for a given object, it is natural to add another command in cat-file
> --batch-command to print object info for a given object from a remote.
> Add `remote-object-info` to cat-file --batch-command.
> 
> While `info` takes object ids one at a time, this creates overhead when
> making requests to a server so `remote-object-info` instead can take
> multiple object ids at once.
> 
> cat-file --batch-command is generally implemented in the following
> manner:
> 
>  - Receive and parse input from user
>  - Call respective function attached to command
>  - Set batch mode state, get object info, print object info

You mention below that this only works in --buffer mode, so I don't
think that this is relevant.

> In --buffer mode, this changes to:
> 
>  - Receive and parse input from user
>  - Store respective function attached to command in a queue
>  - After flush, loop through commands in queue
>     - Call respective function attached to command
>     - Set batch mode state, get object info, print object info
> 
> Notice how the getting and printing of object info is accomplished one
> at a time. As described above, this creates a problem for making
> requests to a server. Therefore, `remote-object-info` is implemented in
> the following manner:

If you do want this command to work with --buffer and without, probably
clearer to replace "Notice" with "Notice that when cat-file is invoked
both without --buffer and with it". (Having said that, it makes sense to
me to only make it work with --buffer - making one request per OID would
probably not be something that the user would want.)

>  - Receive and parse input from user
>  If command is `remote-object-info`:
>     - Get object info from remote
>     - Loop through object info
>         - Call respective function attached to `info`
>         - Set batch mode state, use passed in object info, print object
>           info
>  Else:
>     - Call respective function attached to command
>     - Parse input, get object info, print object info
> 
> And finally for --buffer mode `remote-object-info`:
>  - Receive and parse input from user
>  - Store respective function attached to command in a queue
>  - After flush, loop through commands in queue:
>     If command is `remote-object-info`:
>         - Get object info from remote
>         - Loop through object info
>             - Call respective function attached to `info`
>             - Set batch mode state, use passed in object info, print
>               object info
>     Else:
>         - Call respective function attached to command
>         - Set batch mode state, get object info, print object info
> 
> To summarize, `remote-object-info` gets object info from the remote and
> then generates multiples `info` commands with the object info passed in.
> It cannot be implemented similarly to `info` and `content` because of
> the overhead with remote commenication.

What happens if remote-object-info is mixed with other commands?

> diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
> index 24a811f0ef..0d9e8e6214 100644
> --- a/Documentation/git-cat-file.txt
> +++ b/Documentation/git-cat-file.txt
> @@ -115,6 +115,10 @@ info <object>::
>  	Print object info for object reference `<object>`. This corresponds to the
>  	output of `--batch-check`.
>  
> +remote-object-info <remote> [<object>]::
> +	Print object info for object references `<object>` at specified <remote>.
> +	This command may only be combined with `--buffer`.

Here you mention that we need --buffer.

> @@ -253,13 +258,14 @@ newline. The available atoms are:
>  
>  `objectsize:disk`::
>  	The size, in bytes, that the object takes up on disk. See the
> -	note about on-disk sizes in the `CAVEATS` section below.
> +	note about on-disk sizes in the `CAVEATS` section below. Not
> +	supported by `remote-object-info`.
>  
>  `deltabase`::
>  	If the object is stored as a delta on-disk, this expands to the
>  	full hex representation of the delta base object name.
>  	Otherwise, expands to the null OID (all zeroes). See `CAVEATS`
> -	below.
> +	below. Not supported by `remote-object-info`.

OK - makes sense that these are not supported by remote-object-info,
because the remote may not even store objects in delta format.

> @@ -32,11 +35,14 @@ struct batch_options {
>  	int unordered;
>  	int transform_mode; /* may be 'w' or 'c' for --filters or --textconv */
>  	const char *format;
> +	int use_remote_info;
>  };
>  
>  #define DEFAULT_FORMAT "%(objectname) %(objecttype) %(objectsize)"
>  
>  static const char *force_path;
> +static struct object_info *remote_object_info;
> +static struct oid_array object_info_oids = OID_ARRAY_INIT;

Hmm...I would have expected the information to appear in the same data
structure used to store other command invocations like "contents",
"info", and "flush", not separately like this. This design doesn't seem
to support mixing commands.

> @@ -538,6 +611,7 @@ typedef void (*parse_cmd_fn_t)(struct batch_options *, const char *,
>  struct queued_cmd {
>  	parse_cmd_fn_t fn;
>  	char *line;
> +	const char *name;
>  };

Rather than store a pointer like this (and thus needing to keep track of
the lifetime of the memory pointed to by this pointer), if we only need
to know whether this queued_cmd is a remote-object-info call, we can
just use "unsigned is_remote_object_info : 1".

> @@ -565,9 +639,49 @@ static const struct parse_cmd {
>  } commands[] = {
>  	{ "contents", parse_cmd_contents, 1},
>  	{ "info", parse_cmd_info, 1},
> +	{ "remote-object-info", parse_cmd_info, 1},
>  	{ "flush", NULL, 0},
>  };

Double parse_cmd_info?

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 3/8] object-store: add function to free object_info contents
  2022-05-02 23:23         ` Junio C Hamano
  2022-05-04 19:09           ` Junio C Hamano
@ 2022-05-05  0:15           ` Junio C Hamano
  2022-05-05 16:47             ` Calvin Wan
  1 sibling, 1 reply; 103+ messages in thread
From: Junio C Hamano @ 2022-05-05  0:15 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86, me

Junio C Hamano <gitster@pobox.com> writes:

>> +	if (object_info->typep)
>> +		free(object_info->typep);
>> +	if (object_info->sizep)
>> +		free(object_info->sizep);
>> +	if (object_info->disk_sizep)
>> +		free(object_info->disk_sizep);
>> +	if (object_info->delta_base_oid)
>> +		free(object_info->delta_base_oid);
>> +	if (object_info->type_name)
>> +		free(object_info->type_name);
>
> 	if (PTR)
> 		free(PTR);
>
> can and should be written as
>
> 	free(PTR);
>
> If we are reusing object_info after calling this function, we
> _might_ want to use FREE_AND_NULL() instead of free().

> As this breaks my build cycle ("make sparse" is part of my
> post-integration check), I have added this workaround on the tip of
> the topic, but please make sure I do not have to redo that when you
> reroll.
>
> Thanks.

Again, this breaks the build at GitHub CI (static-analysis), I added
this workaround on the tip of the topic, merged it to 'seen' and
pushed the result out, but please make sure I do not have to redo
that when you reroll.

Thanks.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 3/8] object-store: add function to free object_info contents
  2022-05-05  0:15           ` Junio C Hamano
@ 2022-05-05 16:47             ` Calvin Wan
  2022-05-05 17:01               ` Junio C Hamano
  0 siblings, 1 reply; 103+ messages in thread
From: Calvin Wan @ 2022-05-05 16:47 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, jonathantanmy, philipoakley, johncai86, me

Apologies for the mistakes. Getting a script from Josh Steadmon to
make sure these things never happen again :)

On Wed, May 4, 2022 at 5:15 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Junio C Hamano <gitster@pobox.com> writes:
>
> >> +    if (object_info->typep)
> >> +            free(object_info->typep);
> >> +    if (object_info->sizep)
> >> +            free(object_info->sizep);
> >> +    if (object_info->disk_sizep)
> >> +            free(object_info->disk_sizep);
> >> +    if (object_info->delta_base_oid)
> >> +            free(object_info->delta_base_oid);
> >> +    if (object_info->type_name)
> >> +            free(object_info->type_name);
> >
> >       if (PTR)
> >               free(PTR);
> >
> > can and should be written as
> >
> >       free(PTR);
> >
> > If we are reusing object_info after calling this function, we
> > _might_ want to use FREE_AND_NULL() instead of free().
>
> > As this breaks my build cycle ("make sparse" is part of my
> > post-integration check), I have added this workaround on the tip of
> > the topic, but please make sure I do not have to redo that when you
> > reroll.
> >
> > Thanks.
>
> Again, this breaks the build at GitHub CI (static-analysis), I added
> this workaround on the tip of the topic, merged it to 'seen' and
> pushed the result out, but please make sure I do not have to redo
> that when you reroll.
>
> Thanks.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 3/8] object-store: add function to free object_info contents
  2022-05-05 16:47             ` Calvin Wan
@ 2022-05-05 17:01               ` Junio C Hamano
  0 siblings, 0 replies; 103+ messages in thread
From: Junio C Hamano @ 2022-05-05 17:01 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86, me

Calvin Wan <calvinwan@google.com> writes:

> Apologies for the mistakes. Getting a script from Josh Steadmon to
> make sure these things never happen again :)

Don't fret, mistakes happen.  The static-analysis job is quite picky
and catch these for us.

Sample runs are found here:

https://github.com/git/git/runs/6299058086?check_suite_focus=true
https://github.com/git/git/runs/6295167966?check_suite_focus=true


^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 8/8] cat-file: add --batch-command remote-object-info command
  2022-05-04 21:27         ` Jonathan Tan
@ 2022-05-05 18:13           ` Calvin Wan
  2022-05-05 18:44             ` Junio C Hamano
  0 siblings, 1 reply; 103+ messages in thread
From: Calvin Wan @ 2022-05-05 18:13 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, gitster, philipoakley, johncai86, me

> You mention below that this only works in --buffer mode, so I don't
> think that this is relevant.

It works in non-buffer mode as well. I meant that if you wanted to use
`remote-object-info` with another option, then --filter is the only
one that it works with. I tried to match the documentation for
--batch-command that says something similar:

--batch-command=<format>
Enter a command mode that reads commands and arguments from stdin. May
only be combined with --buffer, --textconv or --filters

> What happens if remote-object-info is mixed with other commands?
It works with other commands. The idea is that you would enter a line like:

remote-object-info <remote> <oid> <oid> ... <oid>

rather than

remote-object-info <remote> <oid>
remote-object-info <remote> <oid>
remote-object-info <remote> <oid>

since each invocation of remote-object-info would cause another round
trip to the server.

> Hmm...I would have expected the information to appear in the same data
> structure used to store other command invocations like "contents",
> "info", and "flush", not separately like this. This design doesn't seem
> to support mixing commands.

Here's an example of how it works with other commands. Let's say I
call git cat-file --batch-command --buffer and input the following:

remote-object-info remote1 oid1 oid2 oid3
contents oid4
info oid5
remote-object-info remote2 oid6 oid7 oid8
flush

This would result in the following:

(call transport and store info in separate structure)
info oid1 (using passed in object-info)
info oid2
info oid3
contents oid4
info oid5
(call transport and store info in separate structure)
info oid6
info oid7
info oid8

> Rather than store a pointer like this (and thus needing to keep track of
> the lifetime of the memory pointed to by this pointer), if we only need
> to know whether this queued_cmd is a remote-object-info call, we can
> just use "unsigned is_remote_object_info : 1".

OK

>> @@ -565,9 +639,49 @@ static const struct parse_cmd {
>>  } commands[] = {
>>       { "contents", parse_cmd_contents, 1},
>>       { "info", parse_cmd_info, 1},
>> +     { "remote-object-info", parse_cmd_info, 1},
>>       { "flush", NULL, 0},
>>  };

> Double parse_cmd_info?

See above for why remote-object-info becomes info



On Wed, May 4, 2022 at 2:27 PM Jonathan Tan <jonathantanmy@google.com> wrote:
>
> Thanks for the patch. I'll limit myself to high-level comments for now,
> since the design may need to change if we want to support mixing
> remote-object-info with other commands in a --batch-command --buffer
> invocation.
>
> Calvin Wan <calvinwan@google.com> writes:
> > Since the `info` command in cat-file --batch-command prints object info
> > for a given object, it is natural to add another command in cat-file
> > --batch-command to print object info for a given object from a remote.
> > Add `remote-object-info` to cat-file --batch-command.
> >
> > While `info` takes object ids one at a time, this creates overhead when
> > making requests to a server so `remote-object-info` instead can take
> > multiple object ids at once.
> >
> > cat-file --batch-command is generally implemented in the following
> > manner:
> >
> >  - Receive and parse input from user
> >  - Call respective function attached to command
> >  - Set batch mode state, get object info, print object info
>
> You mention below that this only works in --buffer mode, so I don't
> think that this is relevant.
>
> > In --buffer mode, this changes to:
> >
> >  - Receive and parse input from user
> >  - Store respective function attached to command in a queue
> >  - After flush, loop through commands in queue
> >     - Call respective function attached to command
> >     - Set batch mode state, get object info, print object info
> >
> > Notice how the getting and printing of object info is accomplished one
> > at a time. As described above, this creates a problem for making
> > requests to a server. Therefore, `remote-object-info` is implemented in
> > the following manner:
>
> If you do want this command to work with --buffer and without, probably
> clearer to replace "Notice" with "Notice that when cat-file is invoked
> both without --buffer and with it". (Having said that, it makes sense to
> me to only make it work with --buffer - making one request per OID would
> probably not be something that the user would want.)
>
> >  - Receive and parse input from user
> >  If command is `remote-object-info`:
> >     - Get object info from remote
> >     - Loop through object info
> >         - Call respective function attached to `info`
> >         - Set batch mode state, use passed in object info, print object
> >           info
> >  Else:
> >     - Call respective function attached to command
> >     - Parse input, get object info, print object info
> >
> > And finally for --buffer mode `remote-object-info`:
> >  - Receive and parse input from user
> >  - Store respective function attached to command in a queue
> >  - After flush, loop through commands in queue:
> >     If command is `remote-object-info`:
> >         - Get object info from remote
> >         - Loop through object info
> >             - Call respective function attached to `info`
> >             - Set batch mode state, use passed in object info, print
> >               object info
> >     Else:
> >         - Call respective function attached to command
> >         - Set batch mode state, get object info, print object info
> >
> > To summarize, `remote-object-info` gets object info from the remote and
> > then generates multiples `info` commands with the object info passed in.
> > It cannot be implemented similarly to `info` and `content` because of
> > the overhead with remote commenication.
>
> What happens if remote-object-info is mixed with other commands?
>
> > diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
> > index 24a811f0ef..0d9e8e6214 100644
> > --- a/Documentation/git-cat-file.txt
> > +++ b/Documentation/git-cat-file.txt
> > @@ -115,6 +115,10 @@ info <object>::
> >       Print object info for object reference `<object>`. This corresponds to the
> >       output of `--batch-check`.
> >
> > +remote-object-info <remote> [<object>]::
> > +     Print object info for object references `<object>` at specified <remote>.
> > +     This command may only be combined with `--buffer`.
>
> Here you mention that we need --buffer.
>
> > @@ -253,13 +258,14 @@ newline. The available atoms are:
> >
> >  `objectsize:disk`::
> >       The size, in bytes, that the object takes up on disk. See the
> > -     note about on-disk sizes in the `CAVEATS` section below.
> > +     note about on-disk sizes in the `CAVEATS` section below. Not
> > +     supported by `remote-object-info`.
> >
> >  `deltabase`::
> >       If the object is stored as a delta on-disk, this expands to the
> >       full hex representation of the delta base object name.
> >       Otherwise, expands to the null OID (all zeroes). See `CAVEATS`
> > -     below.
> > +     below. Not supported by `remote-object-info`.
>
> OK - makes sense that these are not supported by remote-object-info,
> because the remote may not even store objects in delta format.
>
> > @@ -32,11 +35,14 @@ struct batch_options {
> >       int unordered;
> >       int transform_mode; /* may be 'w' or 'c' for --filters or --textconv */
> >       const char *format;
> > +     int use_remote_info;
> >  };
> >
> >  #define DEFAULT_FORMAT "%(objectname) %(objecttype) %(objectsize)"
> >
> >  static const char *force_path;
> > +static struct object_info *remote_object_info;
> > +static struct oid_array object_info_oids = OID_ARRAY_INIT;
>
> Hmm...I would have expected the information to appear in the same data
> structure used to store other command invocations like "contents",
> "info", and "flush", not separately like this. This design doesn't seem
> to support mixing commands.
>
> > @@ -538,6 +611,7 @@ typedef void (*parse_cmd_fn_t)(struct batch_options *, const char *,
> >  struct queued_cmd {
> >       parse_cmd_fn_t fn;
> >       char *line;
> > +     const char *name;
> >  };
>
> Rather than store a pointer like this (and thus needing to keep track of
> the lifetime of the memory pointed to by this pointer), if we only need
> to know whether this queued_cmd is a remote-object-info call, we can
> just use "unsigned is_remote_object_info : 1".
>
> > @@ -565,9 +639,49 @@ static const struct parse_cmd {
> >  } commands[] = {
> >       { "contents", parse_cmd_contents, 1},
> >       { "info", parse_cmd_info, 1},
> > +     { "remote-object-info", parse_cmd_info, 1},
> >       { "flush", NULL, 0},
> >  };
>
> Double parse_cmd_info?

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 2/8] fetch-pack: move fetch default settings
  2022-05-03 23:06           ` Jonathan Tan
@ 2022-05-05 18:13             ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-05-05 18:13 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: Junio C Hamano, git, philipoakley, johncai86, me

Done thanks for the suggestions

On Tue, May 3, 2022 at 4:07 PM Jonathan Tan <jonathantanmy@google.com> wrote:
>
> Junio C Hamano <gitster@pobox.com> writes:
> > Calvin Wan <calvinwan@google.com> writes:
> >
> > > When the state machine in do_fetch_pack_v2() is in FETCH_CHECK_LOCAL, we
> > > set a few v2-specific defaults. It will be helpful for a future patch to
> > > have these defaults set if the initial state is not FETCH_CHECK_LOCAL.
> >
> >
> > > This is a safe change since the initial state is currently always
> > > FETCH_CHECK_LOCAL, so we're guaranteed to hit that code whether it's in
> > > the state machine or not.
> >
> > What does "it" (which supposed to be able to be in the state machine
> > and also not to be in the state matchine) in the last sentence refer
> > to?
>
> I think the "it" refers to the initialization code that was moved.
>
> > The patch looks correct and I agree that this is a benign no-op
> > because the initial value of state is FETCH_CHECK_LOCAL (i.e. the
> > block of code moved here will execute pretty much as the first
> > thing, and the relative order between that block and sorting of ref
> > list should not matter).  I just didn't understand the explanation
> > given by the patch why it is safe.
> >
> > Thanks.
>
> Agreed - I think the commit message would be better off if it was worded
> something like:
>
>   There are some initialization steps that need to be done at the start
>   of the execution of the do_fetch_pack_v2() state machine. Currently,
>   these are done in the FETCH_CHECK_LOCAL state, which is the initial
>   state.
>
>   However, a subsequent patch will allow for another initial state,
>   while still requiring these initialization steps. Therefore, move
>   these initialization steps to before the state machine, so that they
>   are run regardless of the initial state.
>
> I think this description suffices for a reviewer to see that it is safe,
> but if you want, you can add:
>
>   Note that there is no change in behavior, because we're moving code
>   from the beginning of the first state to just before the execution of
>   the state machine.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 8/8] cat-file: add --batch-command remote-object-info command
  2022-05-05 18:13           ` Calvin Wan
@ 2022-05-05 18:44             ` Junio C Hamano
  2022-05-05 19:09               ` Junio C Hamano
  0 siblings, 1 reply; 103+ messages in thread
From: Junio C Hamano @ 2022-05-05 18:44 UTC (permalink / raw)
  To: Calvin Wan; +Cc: Jonathan Tan, git, philipoakley, johncai86, me

Calvin Wan <calvinwan@google.com> writes:

> It works with other commands. The idea is that you would enter a line like:
>
> remote-object-info <remote> <oid> <oid> ... <oid>
>
> rather than
>
> remote-object-info <remote> <oid>
> remote-object-info <remote> <oid>
> remote-object-info <remote> <oid>
>
> since each invocation of remote-object-info would cause another round
> trip to the server.

Hmm.  It looks unfortunate, but if we do not care about hitting the
pkt-line length limit, that might be OK.  I dunno.

I would have expected to see something like

	start batch
	request 1
	request 2
	...
	request 2000
	flush batch

during which the other side patiently listens to our requests.  They
(meaning the local process that reads the above and talks to a
remote as needed) can coalesce the requests of the same kind
(e.g. going to the same remote) while buffering and optimize their
operation without having the caller of them to worry about it that
way, no?

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 8/8] cat-file: add --batch-command remote-object-info command
  2022-05-05 18:44             ` Junio C Hamano
@ 2022-05-05 19:09               ` Junio C Hamano
  2022-05-05 19:19                 ` Calvin Wan
  0 siblings, 1 reply; 103+ messages in thread
From: Junio C Hamano @ 2022-05-05 19:09 UTC (permalink / raw)
  To: Calvin Wan; +Cc: Jonathan Tan, git, philipoakley, johncai86, me

Junio C Hamano <gitster@pobox.com> writes:

> I would have expected to see something like
>
> 	start batch
> 	request 1
> 	request 2
> 	...
> 	request 2000
> 	flush batch
>
> during which the other side patiently listens to our requests.  They
> (meaning the local process that reads the above and talks to a
> remote as needed) can coalesce the requests of the same kind
> (e.g. going to the same remote) while buffering and optimize their
> operation without having the caller of them to worry about it that
> way, no?

Ah, that is in the batch mode, and you were syaing that "one request
with multiple objects" would allow multiple object-info requests to
be hanled efficiently even in non-batch mode.  If that was what you
were talking about, I think that does make sense.  Thanks.


^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 8/8] cat-file: add --batch-command remote-object-info command
  2022-05-05 19:09               ` Junio C Hamano
@ 2022-05-05 19:19                 ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-05-05 19:19 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jonathan Tan, git, philipoakley, johncai86, me

> Ah, that is in the batch mode, and you were syaing that "one request
> with multiple objects" would allow multiple object-info requests to
> be hanled efficiently even in non-batch mode.  If that was what you
> were talking about, I think that does make sense.  Thanks.

Correct, also I think you mean "buffer" mode rather than "batch" mode.

On Thu, May 5, 2022 at 12:09 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Junio C Hamano <gitster@pobox.com> writes:
>
> > I would have expected to see something like
> >
> >       start batch
> >       request 1
> >       request 2
> >       ...
> >       request 2000
> >       flush batch
> >
> > during which the other side patiently listens to our requests.  They
> > (meaning the local process that reads the above and talks to a
> > remote as needed) can coalesce the requests of the same kind
> > (e.g. going to the same remote) while buffering and optimize their
> > operation without having the caller of them to worry about it that
> > way, no?
>
> Ah, that is in the batch mode, and you were syaing that "one request
> with multiple objects" would allow multiple object-info requests to
> be hanled efficiently even in non-batch mode.  If that was what you
> were talking about, I think that does make sense.  Thanks.
>

^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH v5 0/6] cat-file: add --batch-command remote-object-info command
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
                         ` (7 preceding siblings ...)
  2022-05-02 17:09       ` [PATCH v4 8/8] cat-file: add --batch-command remote-object-info command Calvin Wan
@ 2022-07-28 23:02       ` Calvin Wan
  2022-07-28 23:56         ` Junio C Hamano
                           ` (2 more replies)
  2022-07-28 23:02       ` [PATCH v5 1/6] fetch-pack: refactor packet writing Calvin Wan
                         ` (7 subsequent siblings)
  16 siblings, 3 replies; 103+ messages in thread
From: Calvin Wan @ 2022-07-28 23:02 UTC (permalink / raw)
  To: git; +Cc: Calvin Wan, gitster, jonathantanmy, philipoakley, johncai86

Sometimes it is useful to get information about an object without having
to download it completely. The server logic has already been implemented
as “a2ba162cda (object-info: support for retrieving object info,
2021-04-20)”. This patch implements the client option for it.

Add `--object-info` option to `cat-file --batch-command`. This option
allows the client to make an object-info command request to a server
that supports protocol v2. If the server is v2, but does not allow for
the object-info command request, the entire object is fetched and the
relevant object info is returned.

=== Changes since v4 ===
 - Instead of making 2 rounds trips to determine whether the server can
   handle the client object-info request, the server now advertises the
   features it can send back so the client can immediately fallback to
   fetch if any feature is not supported
 - Fixed nits and clarified some documentation/commit messages

Signed-off-by: Calvin Wan <calvinwan@google.com>
Helped-by: Jonathan Tan <jonathantanmy@google.com>

Calvin Wan (6):
  fetch-pack: refactor packet writing
  fetch-pack: move fetch initialization
  protocol-caps: initialization bug fix
  serve: advertise object-info feature
  transport: add client support for object-info
  cat-file: add remote-object-info to batch-command

 Documentation/git-cat-file.txt |  16 +-
 builtin/cat-file.c             | 225 ++++++++++++++++-----
 fetch-pack.c                   |  52 +++--
 fetch-pack.h                   |  10 +
 object-file.c                  |  11 ++
 object-store.h                 |   3 +
 protocol-caps.c                |   2 +-
 serve.c                        |  10 +-
 t/t1006-cat-file.sh            | 348 +++++++++++++++++++++++++++++++++
 t/t5555-http-smart-common.sh   |   2 +-
 t/t5701-git-serve.sh           |   2 +-
 transport-helper.c             |   7 +-
 transport.c                    | 110 ++++++++++-
 transport.h                    |  11 ++
 14 files changed, 734 insertions(+), 75 deletions(-)


base-commit: 9dd64cb4d310986dd7b8ca7fff92f9b61e0bd21a
-- 
2.37.1.455.g008518b4e5-goog


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH v5 1/6] fetch-pack: refactor packet writing
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
                         ` (8 preceding siblings ...)
  2022-07-28 23:02       ` [PATCH v5 0/6] " Calvin Wan
@ 2022-07-28 23:02       ` Calvin Wan
  2022-07-28 23:02       ` [PATCH v5 2/6] fetch-pack: move fetch initialization Calvin Wan
                         ` (6 subsequent siblings)
  16 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-07-28 23:02 UTC (permalink / raw)
  To: git; +Cc: Calvin Wan, gitster, jonathantanmy, philipoakley, johncai86

A subsequent patch will need to write capabilities for another command,
so refactor write_fetch_command_and_capabilities() to be used by both
fetch and future command.

Signed-off-by: Calvin Wan <calvinwan@google.com>
Helped-by: Jonathan Tan <jonathantanmy@google.com>
---
 fetch-pack.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fetch-pack.c b/fetch-pack.c
index cb6647d657..5176420b62 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1261,13 +1261,13 @@ static int add_haves(struct fetch_negotiator *negotiator,
 	return haves_added;
 }
 
-static void write_fetch_command_and_capabilities(struct strbuf *req_buf,
-						 const struct string_list *server_options)
+static void write_command_and_capabilities(struct strbuf *req_buf,
+						 const struct string_list *server_options, const char* command)
 {
 	const char *hash_name;
 
-	if (server_supports_v2("fetch", 1))
-		packet_buf_write(req_buf, "command=fetch");
+	if (server_supports_v2(command, 1))
+		packet_buf_write(req_buf, "command=%s", command);
 	if (server_supports_v2("agent", 0))
 		packet_buf_write(req_buf, "agent=%s", git_user_agent_sanitized());
 	if (advertise_sid && server_supports_v2("session-id", 0))
@@ -1303,7 +1303,7 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 	int done_sent = 0;
 	struct strbuf req_buf = STRBUF_INIT;
 
-	write_fetch_command_and_capabilities(&req_buf, args->server_options);
+	write_command_and_capabilities(&req_buf, args->server_options, "fetch");
 
 	if (args->use_thin_pack)
 		packet_buf_write(&req_buf, "thin-pack");
@@ -2090,7 +2090,7 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
 		int received_ready = 0;
 
 		strbuf_reset(&req_buf);
-		write_fetch_command_and_capabilities(&req_buf, server_options);
+		write_command_and_capabilities(&req_buf, server_options, "fetch");
 
 		packet_buf_write(&req_buf, "wait-for-done");
 
-- 
2.37.1.455.g008518b4e5-goog


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* [PATCH v5 2/6] fetch-pack: move fetch initialization
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
                         ` (9 preceding siblings ...)
  2022-07-28 23:02       ` [PATCH v5 1/6] fetch-pack: refactor packet writing Calvin Wan
@ 2022-07-28 23:02       ` Calvin Wan
  2022-07-28 23:02       ` [PATCH v5 3/6] protocol-caps: initialization bug fix Calvin Wan
                         ` (5 subsequent siblings)
  16 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-07-28 23:02 UTC (permalink / raw)
  To: git; +Cc: Calvin Wan, gitster, jonathantanmy, philipoakley, johncai86

There are some variables that are initialized at the start of the
do_fetch_pack_v2() state machine. Currently, these are initialized
in FETCH_CHECK_LOCAL, which is the initial state set at the beginning
of the function.

However, a subsequent patch will allow for another initial state,
while still requiring these initialized variables. Therefore, move
the initialization to before the state machine, so that they are set
regardless of the initial state.

Note that there is no change in behavior, because we're moving code
from the beginning of the first state to just before the execution of
the state machine.

Signed-off-by: Calvin Wan <calvinwan@google.com>
Helped-by: Jonathan Tan <jonathantanmy@google.com>
---
 fetch-pack.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fetch-pack.c b/fetch-pack.c
index 5176420b62..8c862b017e 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1628,18 +1628,18 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 		reader.me = "fetch-pack";
 	}
 
+	/* v2 supports these by default */
+	allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1;
+	use_sideband = 2;
+	if (args->depth > 0 || args->deepen_since || args->deepen_not)
+		args->deepen = 1;
+
 	while (state != FETCH_DONE) {
 		switch (state) {
 		case FETCH_CHECK_LOCAL:
 			sort_ref_list(&ref, ref_compare_name);
 			QSORT(sought, nr_sought, cmp_ref_by_name);
 
-			/* v2 supports these by default */
-			allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1;
-			use_sideband = 2;
-			if (args->depth > 0 || args->deepen_since || args->deepen_not)
-				args->deepen = 1;
-
 			/* Filter 'ref' by 'sought' and those that aren't local */
 			mark_complete_and_common_ref(negotiator, args, &ref);
 			filter_refs(args, &ref, sought, nr_sought);
-- 
2.37.1.455.g008518b4e5-goog


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* [PATCH v5 3/6] protocol-caps: initialization bug fix
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
                         ` (10 preceding siblings ...)
  2022-07-28 23:02       ` [PATCH v5 2/6] fetch-pack: move fetch initialization Calvin Wan
@ 2022-07-28 23:02       ` Calvin Wan
  2022-07-29 17:51         ` Junio C Hamano
  2022-07-28 23:02       ` [PATCH v5 4/6] serve: advertise object-info feature Calvin Wan
                         ` (4 subsequent siblings)
  16 siblings, 1 reply; 103+ messages in thread
From: Calvin Wan @ 2022-07-28 23:02 UTC (permalink / raw)
  To: git; +Cc: Calvin Wan, gitster, jonathantanmy, philipoakley, johncai86

Initialize info. If info.size bit was on due to on-stack garbage,
we would have given our response with "size" attribute prefixed,
even when the client side never requested it.

Signed-off-by: Calvin Wan <calvinwan@google.com>
Helped-by: Jonathan Tan <jonathantanmy@google.com>
---
 protocol-caps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/protocol-caps.c b/protocol-caps.c
index bbde91810a..2ad9f45c59 100644
--- a/protocol-caps.c
+++ b/protocol-caps.c
@@ -77,7 +77,7 @@ static void send_info(struct repository *r, struct packet_writer *writer,
 
 int cap_object_info(struct repository *r, struct packet_reader *request)
 {
-	struct requested_info info;
+	struct requested_info info = { 0 };
 	struct packet_writer writer;
 	struct string_list oid_str_list = STRING_LIST_INIT_DUP;
 
-- 
2.37.1.455.g008518b4e5-goog


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* [PATCH v5 4/6] serve: advertise object-info feature
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
                         ` (11 preceding siblings ...)
  2022-07-28 23:02       ` [PATCH v5 3/6] protocol-caps: initialization bug fix Calvin Wan
@ 2022-07-28 23:02       ` Calvin Wan
  2022-07-29 17:57         ` Junio C Hamano
  2022-07-28 23:02       ` [PATCH v5 5/6] transport: add client support for object-info Calvin Wan
                         ` (3 subsequent siblings)
  16 siblings, 1 reply; 103+ messages in thread
From: Calvin Wan @ 2022-07-28 23:02 UTC (permalink / raw)
  To: git; +Cc: Calvin Wan, gitster, jonathantanmy, philipoakley, johncai86

In order for a client to know what object-info components a server can
provide, advertise supported object-info features. This will allow a
client to decide whether to query the server for object-info or fetch as
a fallback.

Signed-off-by: Calvin Wan <calvinwan@google.com>
Helped-by: Jonathan Tan <jonathantanmy@google.com>
---
 serve.c                      | 10 +++++++++-
 t/t5555-http-smart-common.sh |  2 +-
 t/t5701-git-serve.sh         |  2 +-
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/serve.c b/serve.c
index 733347f602..1adf9df4a8 100644
--- a/serve.c
+++ b/serve.c
@@ -56,6 +56,14 @@ static int session_id_advertise(struct repository *r, struct strbuf *value)
 	return 1;
 }
 
+static int object_info_advertise(struct repository *r,
+				   struct strbuf *value)
+{
+	if (value)
+		strbuf_addstr(value, "size");
+	return 1;
+}
+
 static void session_id_receive(struct repository *r,
 			       const char *client_sid)
 {
@@ -132,7 +140,7 @@ static struct protocol_capability capabilities[] = {
 	},
 	{
 		.name = "object-info",
-		.advertise = always_advertise,
+		.advertise = object_info_advertise,
 		.command = cap_object_info,
 	},
 };
diff --git a/t/t5555-http-smart-common.sh b/t/t5555-http-smart-common.sh
index b1cfe8b7db..5a16d4259a 100755
--- a/t/t5555-http-smart-common.sh
+++ b/t/t5555-http-smart-common.sh
@@ -131,7 +131,7 @@ test_expect_success 'git upload-pack --advertise-refs: v2' '
 	fetch=shallow wait-for-done
 	server-option
 	object-format=$(test_oid algo)
-	object-info
+	object-info=size
 	0000
 	EOF
 
diff --git a/t/t5701-git-serve.sh b/t/t5701-git-serve.sh
index 1896f671cb..ebb32644e3 100755
--- a/t/t5701-git-serve.sh
+++ b/t/t5701-git-serve.sh
@@ -20,7 +20,7 @@ test_expect_success 'test capability advertisement' '
 	fetch=shallow wait-for-done
 	server-option
 	object-format=$(test_oid algo)
-	object-info
+	object-info=size
 	0000
 	EOF
 
-- 
2.37.1.455.g008518b4e5-goog


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* [PATCH v5 5/6] transport: add client support for object-info
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
                         ` (12 preceding siblings ...)
  2022-07-28 23:02       ` [PATCH v5 4/6] serve: advertise object-info feature Calvin Wan
@ 2022-07-28 23:02       ` Calvin Wan
  2022-07-29 18:06         ` Junio C Hamano
                           ` (3 more replies)
  2022-07-28 23:02       ` [PATCH v5 6/6] cat-file: add remote-object-info to batch-command Calvin Wan
                         ` (2 subsequent siblings)
  16 siblings, 4 replies; 103+ messages in thread
From: Calvin Wan @ 2022-07-28 23:02 UTC (permalink / raw)
  To: git; +Cc: Calvin Wan, gitster, jonathantanmy, philipoakley, johncai86

Sometimes it is useful to get information about an object without having
to download it completely. The server logic has already been implemented
as “a2ba162cda (object-info: support for retrieving object info,
2021-04-20)”. This patch adds client functions to communicate with the
server.

The client currently supports requesting a list of object ids with
features 'size' and 'type' from a v2 server. If a server does not
advertise either of the requested features, then the client falls back
to making the request through 'fetch'.

Signed-off-by: Calvin Wan <calvinwan@google.com>
Helped-by: Jonathan Tan <jonathantanmy@google.com>
---
 fetch-pack.c       |  28 ++++++++++++
 fetch-pack.h       |  10 +++++
 transport-helper.c |   7 ++-
 transport.c        | 110 +++++++++++++++++++++++++++++++++++++++++++--
 transport.h        |  11 +++++
 5 files changed, 161 insertions(+), 5 deletions(-)

diff --git a/fetch-pack.c b/fetch-pack.c
index 8c862b017e..d373aed775 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1293,6 +1293,31 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
 	packet_buf_delim(req_buf);
 }
 
+void send_object_info_request(int fd_out, struct object_info_args *args)
+{
+	struct strbuf req_buf = STRBUF_INIT;
+	size_t i;
+
+	write_command_and_capabilities(&req_buf, args->server_options, "object-info");
+
+	if (unsorted_string_list_has_string(args->object_info_options, "size"))
+		packet_buf_write(&req_buf, "size");
+
+	if (unsorted_string_list_has_string(args->object_info_options, "type"))
+		packet_buf_write(&req_buf, "type");
+
+	if (args->oids) {
+		for (i = 0; i < args->oids->nr; i++)
+			packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i]));
+	}
+
+	packet_buf_flush(&req_buf);
+	if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
+		die_errno(_("unable to write request to remote"));
+
+	strbuf_release(&req_buf);
+}
+
 static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 			      struct fetch_pack_args *args,
 			      const struct ref *wants, struct oidset *common,
@@ -1634,6 +1659,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 	if (args->depth > 0 || args->deepen_since || args->deepen_not)
 		args->deepen = 1;
 
+	if (args->object_info)
+		state = FETCH_SEND_REQUEST;
+
 	while (state != FETCH_DONE) {
 		switch (state) {
 		case FETCH_CHECK_LOCAL:
diff --git a/fetch-pack.h b/fetch-pack.h
index 8c7752fc82..11c513f748 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -17,6 +17,7 @@ struct fetch_pack_args {
 	const struct string_list *deepen_not;
 	struct list_objects_filter_options filter_options;
 	const struct string_list *server_options;
+	struct object_info **object_info_data;
 
 	/*
 	 * If not NULL, during packfile negotiation, fetch-pack will send "have"
@@ -43,6 +44,7 @@ struct fetch_pack_args {
 	unsigned reject_shallow_remote:1;
 	unsigned deepen:1;
 	unsigned refetch:1;
+	unsigned object_info:1;
 
 	/*
 	 * Indicate that the remote of this request is a promisor remote. The
@@ -69,6 +71,12 @@ struct fetch_pack_args {
 	unsigned connectivity_checked:1;
 };
 
+struct object_info_args {
+	struct string_list *object_info_options;
+	const struct string_list *server_options;
+	struct oid_array *oids;
+};
+
 /*
  * sought represents remote references that should be updated from.
  * On return, the names that were found on the remote will have been
@@ -102,4 +110,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
  */
 int report_unmatched_refs(struct ref **sought, int nr_sought);
 
+void send_object_info_request(int fd_out, struct object_info_args *args);
+
 #endif
diff --git a/transport-helper.c b/transport-helper.c
index 322c722478..48a6680200 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -686,13 +686,16 @@ static int fetch_refs(struct transport *transport,
 
 	/*
 	 * If we reach here, then the server, the client, and/or the transport
-	 * helper does not support protocol v2. --negotiate-only requires
-	 * protocol v2.
+	 * helper does not support protocol v2. --negotiate-only and --object-info
+	 * require protocol v2.
 	 */
 	if (data->transport_options.acked_commits) {
 		warning(_("--negotiate-only requires protocol v2"));
 		return -1;
 	}
+	if (transport->smart_options->object_info) {
+		die(_("--object-info requires protocol v2"));
+	}
 
 	if (!data->get_refs_list_called)
 		get_refs_list_using_list(transport, 0);
diff --git a/transport.c b/transport.c
index 52db7a3cb0..2d503e2fbd 100644
--- a/transport.c
+++ b/transport.c
@@ -353,6 +353,80 @@ static struct ref *handshake(struct transport *transport, int for_push,
 	return refs;
 }
 
+static int fetch_object_info(struct transport *transport, struct object_info **object_info_data)
+{
+	size_t i;
+	int size_index = -1;
+	struct git_transport_data *data = transport->data;
+	struct object_info_args args;
+	struct packet_reader reader;
+
+	memset(&args, 0, sizeof(args));
+	args.server_options = transport->server_options;
+	args.object_info_options = transport->smart_options->object_info_options;
+	args.oids = transport->smart_options->object_info_oids;
+
+	connect_setup(transport, 0);
+	packet_reader_init(&reader, data->fd[0], NULL, 0,
+			PACKET_READ_CHOMP_NEWLINE |
+			PACKET_READ_GENTLE_ON_EOF |
+			PACKET_READ_DIE_ON_ERR_PACKET);
+	data->version = discover_version(&reader);
+
+	transport->hash_algo = reader.hash_algo;
+
+	switch (data->version) {
+	case protocol_v2:
+		if (!server_supports_v2("object-info", 0))
+			return -1;
+		if (unsorted_string_list_has_string(args.object_info_options, "size")
+				&& !server_supports_feature("object-info", "size", 0)) {
+			return -1;
+		}
+		if (unsorted_string_list_has_string(args.object_info_options, "type")
+				&& !server_supports_feature("object-info", "type", 0)) {
+			return -1;
+		}
+		send_object_info_request(data->fd[1], &args);
+		break;
+	case protocol_v1:
+	case protocol_v0:
+		die(_("wrong protocol version. expected v2"));
+	case protocol_unknown_version:
+		BUG("unknown protocol version");
+	}
+
+	for (i = 0; i < args.object_info_options->nr; i++) {
+		if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
+			check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
+			return -1;
+		}
+		if (unsorted_string_list_has_string(args.object_info_options, reader.line)) {
+			if (!strcmp(reader.line, "size"))
+				size_index = i;
+			continue;
+		}
+		return -1;
+	}
+
+	i = 0;
+	while (packet_reader_read(&reader) == PACKET_READ_NORMAL && i < args.oids->nr) {
+		struct string_list object_info_values = STRING_LIST_INIT_DUP;
+
+		string_list_split(&object_info_values, reader.line, ' ', -1);
+		if (0 <= size_index) {
+			if (!strcmp(object_info_values.items[1 + size_index].string, ""))
+				die("object-info: not our ref %s",
+					object_info_values.items[0].string);
+			*(*object_info_data)[i].sizep = strtoul(object_info_values.items[1 + size_index].string, NULL, 10);
+		}
+		i++;
+	}
+	check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
+
+	return 0;
+}
+
 static struct ref *get_refs_via_connect(struct transport *transport, int for_push,
 					struct transport_ls_refs_options *options)
 {
@@ -363,10 +437,12 @@ static int fetch_refs_via_pack(struct transport *transport,
 			       int nr_heads, struct ref **to_fetch)
 {
 	int ret = 0;
+	size_t i;
 	struct git_transport_data *data = transport->data;
 	struct ref *refs = NULL;
 	struct fetch_pack_args args;
 	struct ref *refs_tmp = NULL;
+	struct ref *object_info_refs = xcalloc(1, sizeof (struct ref));
 
 	memset(&args, 0, sizeof(args));
 	args.uploadpack = data->options.uploadpack;
@@ -392,8 +468,25 @@ static int fetch_refs_via_pack(struct transport *transport,
 	args.server_options = transport->server_options;
 	args.negotiation_tips = data->options.negotiation_tips;
 	args.reject_shallow_remote = transport->smart_options->reject_shallow;
-
-	if (!data->got_remote_heads) {
+	args.object_info = transport->smart_options->object_info;
+
+	if (transport->smart_options && transport->smart_options->object_info) {
+		struct ref *ref = object_info_refs;
+
+		if (!fetch_object_info(transport, data->options.object_info_data))
+			goto cleanup;
+		args.object_info_data = data->options.object_info_data;
+		args.quiet = 1;
+		args.no_progress = 1;
+		for (i = 0; i < transport->smart_options->object_info_oids->nr; i++) {
+			struct ref *temp_ref = xcalloc(1, sizeof (struct ref));
+			temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i);
+			temp_ref->exact_oid = 1;
+			ref->next = temp_ref;
+			ref = ref->next;
+		}
+		transport->remote_refs = object_info_refs->next;
+	} else if (!data->got_remote_heads) {
 		int i;
 		int must_list_refs = 0;
 		for (i = 0; i < nr_heads; i++) {
@@ -433,12 +526,22 @@ static int fetch_refs_via_pack(struct transport *transport,
 			  to_fetch, nr_heads, &data->shallow,
 			  &transport->pack_lockfiles, data->version);
 
+	if (args.object_info) {
+		struct ref *ref_cpy_reader = object_info_refs->next;
+		int i = 0;
+		while (ref_cpy_reader) {
+			oid_object_info_extended(the_repository, &ref_cpy_reader->old_oid, &(*args.object_info_data)[i], OBJECT_INFO_LOOKUP_REPLACE);
+			ref_cpy_reader = ref_cpy_reader->next;
+			i++;
+		}
+	}
+
 	data->got_remote_heads = 0;
 	data->options.self_contained_and_connected =
 		args.self_contained_and_connected;
 	data->options.connectivity_checked = args.connectivity_checked;
 
-	if (!refs)
+	if (refs == NULL && !args.object_info)
 		ret = -1;
 	if (report_unmatched_refs(to_fetch, nr_heads))
 		ret = -1;
@@ -453,6 +556,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 
 	free_refs(refs_tmp);
 	free_refs(refs);
+	free_refs(object_info_refs);
 	return ret;
 }
 
diff --git a/transport.h b/transport.h
index b5bf7b3e70..5512fdb140 100644
--- a/transport.h
+++ b/transport.h
@@ -6,6 +6,7 @@
 #include "remote.h"
 #include "list-objects-filter-options.h"
 #include "string-list.h"
+#include "object-store.h"
 
 struct git_transport_options {
 	unsigned thin : 1;
@@ -31,6 +32,12 @@ struct git_transport_options {
 	 */
 	unsigned connectivity_checked:1;
 
+	/*
+	 * Transport will attempt to pull only object-info. Fallbacks
+	 * to pulling entire object if object-info is not supported
+	 */
+	unsigned object_info : 1;
+
 	int depth;
 	const char *deepen_since;
 	const struct string_list *deepen_not;
@@ -54,6 +61,10 @@ struct git_transport_options {
 	 * common commits to this oidset instead of fetching any packfiles.
 	 */
 	struct oidset *acked_commits;
+
+	struct oid_array *object_info_oids;
+	struct object_info **object_info_data;
+	struct string_list *object_info_options;
 };
 
 enum transport_family {
-- 
2.37.1.455.g008518b4e5-goog


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* [PATCH v5 6/6] cat-file: add remote-object-info to batch-command
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
                         ` (13 preceding siblings ...)
  2022-07-28 23:02       ` [PATCH v5 5/6] transport: add client support for object-info Calvin Wan
@ 2022-07-28 23:02       ` Calvin Wan
  2022-07-29  6:25         ` Ævar Arnfjörð Bjarmason
                           ` (2 more replies)
  2022-07-31 15:02       ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command ZheNing Hu
  2022-08-13 22:17       ` Junio C Hamano
  16 siblings, 3 replies; 103+ messages in thread
From: Calvin Wan @ 2022-07-28 23:02 UTC (permalink / raw)
  To: git; +Cc: Calvin Wan, gitster, jonathantanmy, philipoakley, johncai86

Since the `info` command in cat-file --batch-command prints object info
for a given object, it is natural to add another command in cat-file
--batch-command to print object info for a given object from a remote.
Add `remote-object-info` to cat-file --batch-command.

While `info` takes object ids one at a time, this creates overhead when
making requests to a server so `remote-object-info` instead can take
multiple object ids at once.

cat-file --batch-command is generally implemented in the following
manner:

 - Receive and parse input from user
 - Call respective function attached to command
 - Set batch mode state, get object info, print object info

In --buffer mode, this changes to:

 - Receive and parse input from user
 - Store respective function attached to command in a queue
 - After flush, loop through commands in queue
    - Call respective function attached to command
    - Set batch mode state, get object info, print object info

Notice how the getting and printing of object info is accomplished one
at a time. As described above, this creates a problem for making
requests to a server. Therefore, `remote-object-info` is implemented in
the following manner:

 - Receive and parse input from user
 If command is `remote-object-info`:
    - Get object info from remote
    - Loop through object info
        - Call respective function attached to `info`
        - Set batch mode state, use passed in object info, print object
          info
 Else:
    - Call respective function attached to command
    - Parse input, get object info, print object info

And finally for --buffer mode `remote-object-info`:
 - Receive and parse input from user
 - Store respective function attached to command in a queue
 - After flush, loop through commands in queue:
    If command is `remote-object-info`:
        - Get object info from remote
        - Loop through object info
            - Call respective function attached to `info`
            - Set batch mode state, use passed in object info, print
              object info
    Else:
        - Call respective function attached to command
        - Set batch mode state, get object info, print object info

To summarize, `remote-object-info` gets object info from the remote and
then generates multiple `info` commands with the object info passed in.

In order for remote-object-info to avoid remote communication overhead
in the non-buffer mode, the objects are passed in as such:

remote-object-info <remote> <oid> <oid> ... <oid>

rather than

remote-object-info <remote> <oid>
remote-object-info <remote> <oid>
...
remote-object-info <remote> <oid>

Signed-off-by: Calvin Wan <calvinwan@google.com>
Helped-by: Jonathan Tan <jonathantanmy@google.com>
---
 Documentation/git-cat-file.txt |  16 +-
 builtin/cat-file.c             | 225 ++++++++++++++++-----
 object-file.c                  |  11 ++
 object-store.h                 |   3 +
 t/t1006-cat-file.sh            | 348 +++++++++++++++++++++++++++++++++
 5 files changed, 549 insertions(+), 54 deletions(-)

diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index 24a811f0ef..c073d5e50d 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -115,6 +115,10 @@ info <object>::
 	Print object info for object reference `<object>`. This corresponds to the
 	output of `--batch-check`.
 
+remote-object-info <remote> [<object>]::
+	Print object info for object references `<object>` at specified <remote>.
+	This command may be combined with `--buffer`.
+
 flush::
 	Used with `--buffer` to execute all preceding commands that were issued
 	since the beginning or since the last flush was issued. When `--buffer`
@@ -245,7 +249,8 @@ newline. The available atoms are:
 	The full hex representation of the object name.
 
 `objecttype`::
-	The type of the object (the same as `cat-file -t` reports).
+	The type of the object (the same as `cat-file -t` reports). See
+	`CAVEATS` below.
 
 `objectsize`::
 	The size, in bytes, of the object (the same as `cat-file -s`
@@ -253,13 +258,14 @@ newline. The available atoms are:
 
 `objectsize:disk`::
 	The size, in bytes, that the object takes up on disk. See the
-	note about on-disk sizes in the `CAVEATS` section below.
+	note about on-disk sizes in the `CAVEATS` section below. Not
+	supported by `remote-object-info`.
 
 `deltabase`::
 	If the object is stored as a delta on-disk, this expands to the
 	full hex representation of the delta base object name.
 	Otherwise, expands to the null OID (all zeroes). See `CAVEATS`
-	below.
+	below. Not supported by `remote-object-info`.
 
 `rest`::
 	If this atom is used in the output string, input lines are split
@@ -346,6 +352,10 @@ directory name.
 CAVEATS
 -------
 
+Note that since object type is currently not supported by the
+object-info command request, git fetches the entire object instead to
+get the object info.
+
 Note that the sizes of objects on disk are reported accurately, but care
 should be taken in drawing conclusions about which refs or objects are
 responsible for disk usage. The size of a packed non-delta object may be
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 50cf38999d..68b72377e6 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -16,6 +16,9 @@
 #include "packfile.h"
 #include "object-store.h"
 #include "promisor-remote.h"
+#include "alias.h"
+#include "remote.h"
+#include "transport.h"
 
 enum batch_mode {
 	BATCH_MODE_CONTENTS,
@@ -32,9 +35,14 @@ struct batch_options {
 	int unordered;
 	int transform_mode; /* may be 'w' or 'c' for --filters or --textconv */
 	const char *format;
+	int use_remote_info;
 };
 
+#define DEFAULT_FORMAT "%(objectname) %(objecttype) %(objectsize)"
+
 static const char *force_path;
+static struct object_info *remote_object_info;
+static struct oid_array object_info_oids = OID_ARRAY_INIT;
 
 static int filter_object(const char *path, unsigned mode,
 			 const struct object_id *oid,
@@ -423,48 +431,115 @@ static void batch_one_object(const char *obj_name,
 	int flags = opt->follow_symlinks ? GET_OID_FOLLOW_SYMLINKS : 0;
 	enum get_oid_result result;
 
-	result = get_oid_with_context(the_repository, obj_name,
-				      flags, &data->oid, &ctx);
-	if (result != FOUND) {
-		switch (result) {
-		case MISSING_OBJECT:
-			printf("%s missing\n", obj_name);
-			break;
-		case SHORT_NAME_AMBIGUOUS:
-			printf("%s ambiguous\n", obj_name);
-			break;
-		case DANGLING_SYMLINK:
-			printf("dangling %"PRIuMAX"\n%s\n",
-			       (uintmax_t)strlen(obj_name), obj_name);
-			break;
-		case SYMLINK_LOOP:
-			printf("loop %"PRIuMAX"\n%s\n",
-			       (uintmax_t)strlen(obj_name), obj_name);
-			break;
-		case NOT_DIR:
-			printf("notdir %"PRIuMAX"\n%s\n",
-			       (uintmax_t)strlen(obj_name), obj_name);
-			break;
-		default:
-			BUG("unknown get_sha1_with_context result %d\n",
-			       result);
-			break;
+	if (!opt->use_remote_info) {
+		result = get_oid_with_context(the_repository, obj_name,
+						flags, &data->oid, &ctx);
+		if (result != FOUND) {
+			switch (result) {
+			case MISSING_OBJECT:
+				printf("%s missing\n", obj_name);
+				break;
+			case SHORT_NAME_AMBIGUOUS:
+				printf("%s ambiguous\n", obj_name);
+				break;
+			case DANGLING_SYMLINK:
+				printf("dangling %"PRIuMAX"\n%s\n",
+					(uintmax_t)strlen(obj_name), obj_name);
+				break;
+			case SYMLINK_LOOP:
+				printf("loop %"PRIuMAX"\n%s\n",
+					(uintmax_t)strlen(obj_name), obj_name);
+				break;
+			case NOT_DIR:
+				printf("notdir %"PRIuMAX"\n%s\n",
+					(uintmax_t)strlen(obj_name), obj_name);
+				break;
+			default:
+				BUG("unknown get_sha1_with_context result %d\n",
+					result);
+				break;
+			}
+			fflush(stdout);
+			return;
 		}
-		fflush(stdout);
-		return;
-	}
 
-	if (ctx.mode == 0) {
-		printf("symlink %"PRIuMAX"\n%s\n",
-		       (uintmax_t)ctx.symlink_path.len,
-		       ctx.symlink_path.buf);
-		fflush(stdout);
-		return;
+		if (ctx.mode == 0) {
+			printf("symlink %"PRIuMAX"\n%s\n",
+				(uintmax_t)ctx.symlink_path.len,
+				ctx.symlink_path.buf);
+			fflush(stdout);
+			return;
+		}
 	}
 
 	batch_object_write(obj_name, scratch, opt, data, NULL, 0);
 }
 
+static int get_remote_info(struct batch_options *opt, int argc, const char **argv)
+{
+	int retval = 0;
+	size_t i;
+	struct remote *remote = NULL;
+	struct object_id oid;
+	struct string_list object_info_options = STRING_LIST_INIT_NODUP;
+	static struct transport *gtransport;
+	char *format = DEFAULT_FORMAT;
+
+	if (opt->format)
+		format = xstrdup(opt->format);
+
+	remote = remote_get(argv[0]);
+	if (!remote)
+		die(_("must supply valid remote when using --object-info"));
+	oid_array_clear(&object_info_oids);
+	for (i = 1; i < argc; i++) {
+		if (get_oid(argv[i], &oid))
+			die(_("malformed object id '%s'"), argv[i]);
+		oid_array_append(&object_info_oids, &oid);
+	}
+
+	gtransport = transport_get(remote, NULL);
+	if (gtransport->smart_options) {
+		size_t j;
+		int include_size = 0, include_type = 0;
+
+		remote_object_info = xcalloc(object_info_oids.nr, sizeof(struct object_info));
+		gtransport->smart_options->object_info = 1;
+		gtransport->smart_options->object_info_oids = &object_info_oids;
+		/**
+		 * 'size' is the only option currently supported.
+		 * Other options that are passed in the format will fallback to a
+		 * standard fetch request rather than object-info.
+		 */
+		if (strstr(format, "%(objectsize)")) {
+			string_list_append(&object_info_options, "size");
+			include_size = 1;
+		}
+		if (strstr(format, "%(objecttype)")) {
+			string_list_append(&object_info_options, "type");
+			include_type = 1;
+		}
+		if (strstr(format, "%(objectsize:disk)"))
+			die(_("objectsize:disk is currently not supported with remote-object-info"));
+		if (strstr(format, "%(deltabase)"))
+			die(_("deltabase is currently not supported with remote-object-info"));
+		if (object_info_options.nr > 0) {
+			gtransport->smart_options->object_info_options = &object_info_options;
+			for (j = 0; j < object_info_oids.nr; j++) {
+				if (include_size)
+					remote_object_info[j].sizep = xcalloc(1, sizeof(long));
+				if (include_type)
+					remote_object_info[j].typep = xcalloc(1, sizeof(enum object_type));
+			}
+			gtransport->smart_options->object_info_data = &remote_object_info;
+			retval = transport_fetch_refs(gtransport, NULL);
+		}
+	} else {
+		retval = -1;
+	}
+	return retval;
+}
+
 struct object_cb_data {
 	struct batch_options *opt;
 	struct expand_data *expand;
@@ -536,6 +611,7 @@ typedef void (*parse_cmd_fn_t)(struct batch_options *, const char *,
 struct queued_cmd {
 	parse_cmd_fn_t fn;
 	char *line;
+	const char *name;
 };
 
 static void parse_cmd_contents(struct batch_options *opt,
@@ -556,6 +632,56 @@ static void parse_cmd_info(struct batch_options *opt,
 	batch_one_object(line, output, opt, data);
 }
 
+static const struct parse_cmd {
+	const char *name;
+	parse_cmd_fn_t fn;
+	unsigned takes_args;
+} commands[] = {
+	{ "contents", parse_cmd_contents, 1},
+	{ "info", parse_cmd_info, 1},
+	{ "remote-object-info", parse_cmd_info, 1},
+	{ "flush", NULL, 0},
+};
+
+static void parse_remote_info(struct batch_options *opt,
+			   char *line,
+			   struct strbuf *output,
+			   struct expand_data *data,
+			   const struct parse_cmd *p_cmd,
+			   struct queued_cmd *q_cmd)
+{
+	int count;
+	size_t i;
+	const char **argv;
+
+	count = split_cmdline(line, &argv);
+	if (get_remote_info(opt, count, argv))
+		goto cleanup;
+	opt->use_remote_info = 1;
+	data->skip_object_info = 1;
+	data->mark_query = 0;
+	for (i = 0; i < object_info_oids.nr; i++) {
+		if (remote_object_info[i].sizep)
+			data->size = *remote_object_info[i].sizep;
+		if (remote_object_info[i].typep)
+			data->type = *remote_object_info[i].typep;
+
+		data->oid = object_info_oids.oid[i];
+		if (p_cmd)
+			p_cmd->fn(opt, argv[i+1], output, data);
+		else
+			q_cmd->fn(opt, argv[i+1], output, data);
+	}
+	opt->use_remote_info = 0;
+	data->skip_object_info = 0;
+	data->mark_query = 1;
+
+cleanup:
+	for (i = 0; i < object_info_oids.nr; i++)
+		free_object_info_contents(&remote_object_info[i]);
+	free(remote_object_info);
+}
+
 static void dispatch_calls(struct batch_options *opt,
 		struct strbuf *output,
 		struct expand_data *data,
@@ -567,8 +693,12 @@ static void dispatch_calls(struct batch_options *opt,
 	if (!opt->buffer_output)
 		die(_("flush is only for --buffer mode"));
 
-	for (i = 0; i < nr; i++)
-		cmd[i].fn(opt, cmd[i].line, output, data);
+	for (i = 0; i < nr; i++) {
+		if (!strcmp(cmd[i].name, "remote-object-info"))
+			parse_remote_info(opt, cmd[i].line, output, data, NULL, &cmd[i]);
+		else
+			cmd[i].fn(opt, cmd[i].line, output, data);
+	}
 
 	fflush(stdout);
 }
@@ -583,17 +713,6 @@ static void free_cmds(struct queued_cmd *cmd, size_t *nr)
 	*nr = 0;
 }
 
-
-static const struct parse_cmd {
-	const char *name;
-	parse_cmd_fn_t fn;
-	unsigned takes_args;
-} commands[] = {
-	{ "contents", parse_cmd_contents, 1},
-	{ "info", parse_cmd_info, 1},
-	{ "flush", NULL, 0},
-};
-
 static void batch_objects_command(struct batch_options *opt,
 				    struct strbuf *output,
 				    struct expand_data *data)
@@ -639,11 +758,17 @@ static void batch_objects_command(struct batch_options *opt,
 			dispatch_calls(opt, output, data, queued_cmd, nr);
 			free_cmds(queued_cmd, &nr);
 		} else if (!opt->buffer_output) {
-			cmd->fn(opt, p, output, data);
+			if (!strcmp(cmd->name, "remote-object-info")) {
+				char *line = xstrdup_or_null(p);
+				parse_remote_info(opt, line, output, data, cmd, NULL);
+			} else {
+				cmd->fn(opt, p, output, data);
+			}
 		} else {
 			ALLOC_GROW(queued_cmd, nr + 1, alloc);
 			call.fn = cmd->fn;
 			call.line = xstrdup_or_null(p);
+			call.name = cmd->name;
 			queued_cmd[nr++] = call;
 		}
 	}
@@ -659,8 +784,6 @@ static void batch_objects_command(struct batch_options *opt,
 	strbuf_release(&input);
 }
 
-#define DEFAULT_FORMAT "%(objectname) %(objecttype) %(objectsize)"
-
 static int batch_objects(struct batch_options *opt)
 {
 	struct strbuf input = STRBUF_INIT;
diff --git a/object-file.c b/object-file.c
index 5b270f046d..417c76def3 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2839,3 +2839,14 @@ int read_loose_object(const char *path,
 		munmap(map, mapsize);
 	return ret;
 }
+
+void free_object_info_contents(struct object_info *object_info)
+{
+	if (!object_info)
+		return;
+	free(object_info->typep);
+	free(object_info->sizep);
+	free(object_info->disk_sizep);
+	free(object_info->delta_base_oid);
+	free(object_info->type_name);
+}
diff --git a/object-store.h b/object-store.h
index 5222ee5460..015838eb08 100644
--- a/object-store.h
+++ b/object-store.h
@@ -547,4 +547,7 @@ int for_each_object_in_pack(struct packed_git *p,
 int for_each_packed_object(each_packed_object_fn, void *,
 			   enum for_each_object_flags flags);
 
+/* Free pointers inside of object_info, but not object_info itself */
+void free_object_info_contents(struct object_info *object_info);
+
 #endif /* OBJECT_STORE_H */
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index dadf3b1458..55b2a1f06f 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -2,6 +2,9 @@
 
 test_description='git cat-file'
 
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
 . ./test-lib.sh
 
 test_cmdmode_usage () {
@@ -1093,4 +1096,349 @@ test_expect_success 'batch-command flush without --buffer' '
 	grep "^fatal:.*flush is only for --buffer mode.*" err
 '
 
+# This section tests --batch-command with remote-object-info command
+# If a filter is not set, the filter defaults to "%(objectname) %(objectsize) %(objecttype)"
+# Since "%(objecttype)" is currently not supported by the command request, object-info,
+# the filters are set to "%(objectname) %(objectsize)".
+# Tests with the default filter are used to test the fallback to 'fetch' command
+
+set_transport_variables () {
+    tree_sha1=$(git -C "$1" write-tree)
+	commit_sha1=$(echo_without_newline "$commit_message" | git -C "$1" commit-tree $tree_sha1)
+	tag_sha1=$(echo_without_newline "$tag_content" | git -C "$1" hash-object -t tag --stdin -w)
+	tag_size=$(strlen "$tag_content")
+}
+
+# Test --batch-command remote-object-info with 'git://' transport
+
+. "$TEST_DIRECTORY"/lib-git-daemon.sh
+start_git_daemon --export-all --enable=receive-pack
+daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent
+
+test_expect_success 'create repo to be served by git-daemon' '
+	git init "$daemon_parent" &&
+	echo_without_newline "$hello_content" > $daemon_parent/hello &&
+	git -C "$daemon_parent" update-index --add hello
+'
+
+set_transport_variables "$daemon_parent"
+
+test_expect_success 'batch-command remote-object-info git://' '
+	(
+		cd "$daemon_parent" &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF &&
+		remote-object-info "$GIT_DAEMON_URL/parent" $hello_sha1
+		remote-object-info "$GIT_DAEMON_URL/parent" $tree_sha1
+		remote-object-info "$GIT_DAEMON_URL/parent" $commit_sha1
+		remote-object-info "$GIT_DAEMON_URL/parent" $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command remote-object-info git:// multiple sha1 per line' '
+	(
+		cd "$daemon_parent" &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF &&
+		remote-object-info "$GIT_DAEMON_URL/parent" $hello_sha1 $tree_sha1 $commit_sha1 $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command remote-object-info http:// default filter' '
+	(
+		cd "$daemon_parent" &&
+
+		echo "$hello_sha1 blob $hello_size" >expect &&
+		echo "$tree_sha1 tree $tree_size" >>expect &&
+		echo "$commit_sha1 commit $commit_size" >>expect &&
+		echo "$tag_sha1 tag $tag_size" >>expect &&
+		GIT_TRACE_PACKET=1 git cat-file --batch-command >actual <<-EOF &&
+		remote-object-info "$GIT_DAEMON_URL/parent" $hello_sha1 $tree_sha1
+		remote-object-info "$GIT_DAEMON_URL/parent" $commit_sha1 $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command --buffer remote-object-info git://' '
+	(
+		cd "$daemon_parent" &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" --buffer >actual <<-EOF &&
+		remote-object-info "$GIT_DAEMON_URL/parent" $hello_sha1 $tree_sha1
+		remote-object-info "$GIT_DAEMON_URL/parent" $commit_sha1 $tag_sha1
+		flush
+		EOF
+		test_cmp expect actual
+	)
+'
+
+stop_git_daemon
+
+# Test --batch-command remote-object-info with 'http://' transport
+
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+test_expect_success 'create repo to be served by http:// transport' '
+	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true &&
+	echo_without_newline "$hello_content" > $HTTPD_DOCUMENT_ROOT_PATH/http_parent/hello &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" update-index --add hello
+'
+set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent"
+
+test_expect_success 'batch-command remote-object-info http://' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1
+		remote-object-info "$HTTPD_URL/smart/http_parent" $tree_sha1
+		remote-object-info "$HTTPD_URL/smart/http_parent" $commit_sha1
+		remote-object-info "$HTTPD_URL/smart/http_parent" $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command remote-object-info http:// one line' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1 $tree_sha1 $commit_sha1 $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command --buffer remote-object-info http://' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+
+		git cat-file --batch-command="%(objectname) %(objectsize)" --buffer >actual <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1 $tree_sha1
+		remote-object-info "$HTTPD_URL/smart/http_parent" $commit_sha1 $tag_sha1
+		flush
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command remote-object-info http:// default filter' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		echo "$hello_sha1 blob $hello_size" >expect &&
+		echo "$tree_sha1 tree $tree_size" >>expect &&
+		echo "$commit_sha1 commit $commit_size" >>expect &&
+		echo "$tag_sha1 tag $tag_size" >>expect &&
+
+		git cat-file --batch-command >actual <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1 $tree_sha1
+		remote-object-info "$HTTPD_URL/smart/http_parent" $commit_sha1 $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'remote-object-info fails on unspported filter option (objectsize:disk)' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		test_must_fail git cat-file --batch-command="%(objectsize:disk)" 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1
+		EOF
+		test_i18ngrep "objectsize:disk is currently not supported with remote-object-info" err
+	)
+'
+
+test_expect_success 'remote-object-info fails on unspported filter option (deltabase)' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		test_must_fail git cat-file --batch-command="%(deltabase)" 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1
+		EOF
+		test_i18ngrep "deltabase is currently not supported with remote-object-info" err
+	)
+'
+
+test_expect_success 'remote-object-info fails on server with legacy protocol' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		test_must_fail git -c protocol.version=0 cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1
+		EOF
+		test_i18ngrep "object-info requires protocol v2" err
+	)
+'
+
+test_expect_success 'remote-object-info fails on server with legacy protocol fallback' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+
+		test_must_fail git -c protocol.version=0 cat-file --batch-command 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1
+		EOF
+		test_i18ngrep "object-info requires protocol v2" err
+	)
+'
+
+test_expect_success 'remote-object-info fails on malformed OID' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		malformed_object_id="this_id_is_not_valid" &&
+
+		test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $malformed_object_id
+		EOF
+		test_i18ngrep "malformed object id '$malformed_object_id'" err
+	)
+'
+
+test_expect_success 'remote-object-info fails on malformed OID fallback' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		malformed_object_id="this_id_is_not_valid" &&
+
+		test_must_fail git cat-file --batch-command 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $malformed_object_id
+		EOF
+		test_i18ngrep "malformed object id '$malformed_object_id'" err
+	)
+'
+
+test_expect_success 'remote-object-info fails on missing OID' '
+	git clone "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" missing_oid_repo &&
+	test_commit -C missing_oid_repo message1 c.txt &&
+	(
+		cd missing_oid_repo &&
+		object_id=$(git rev-parse message1:c.txt) &&
+		test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $object_id
+		EOF
+		test_i18ngrep "object-info: not our ref $object_id" err
+	)
+'
+
+test_expect_success 'remote-object-info fails on missing OID fallback' '
+	(
+		cd missing_oid_repo &&
+		object_id=$(git rev-parse message1:c.txt) &&
+		test_must_fail git cat-file --batch-command 2>err <<-EOF &&
+		remote-object-info "$HTTPD_URL/smart/http_parent" $object_id
+		EOF
+		test_i18ngrep "fatal: remote error: upload-pack: not our ref $object_id" err
+	)
+'
+
+# Test --batch-command remote-object-info with 'file://' transport
+
+test_expect_success 'create repo to be served by file:// transport' '
+	git init server &&
+	git -C server config protocol.version 2 &&
+	echo_without_newline "$hello_content" > server/hello &&
+	git -C server update-index --add hello
+'
+
+set_transport_variables "server"
+
+test_expect_success 'batch-command remote-object-info file://' '
+	(
+		cd server &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF &&
+		remote-object-info "file://$(pwd)" $hello_sha1
+		remote-object-info "file://$(pwd)" $tree_sha1
+		remote-object-info "file://$(pwd)" $commit_sha1
+		remote-object-info "file://$(pwd)" $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command remote-object-info file:// multiple sha1 per line' '
+	(
+		cd server &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF &&
+		remote-object-info "file://$(pwd)" $hello_sha1 $tree_sha1 $commit_sha1 $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command --buffer remote-object-info file://' '
+	(
+		cd server &&
+
+		echo "$hello_sha1 $hello_size" >expect &&
+		echo "$tree_sha1 $tree_size" >>expect &&
+		echo "$commit_sha1 $commit_size" >>expect &&
+		echo "$tag_sha1 $tag_size" >>expect &&
+		git cat-file --batch-command="%(objectname) %(objectsize)" --buffer >actual <<-EOF &&
+		remote-object-info "file://$(pwd)" $hello_sha1 $tree_sha1
+		remote-object-info "file://$(pwd)" $commit_sha1 $tag_sha1
+		flush
+		EOF
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'batch-command remote-object-info file:// default filter' '
+	(
+		cd server &&
+
+		echo "$hello_sha1 blob $hello_size" >expect &&
+		echo "$tree_sha1 tree $tree_size" >>expect &&
+		echo "$commit_sha1 commit $commit_size" >>expect &&
+		echo "$tag_sha1 tag $tag_size" >>expect &&
+		git cat-file --batch-command >actual <<-EOF &&
+		remote-object-info "file://$(pwd)" $hello_sha1 $tree_sha1
+		remote-object-info "file://$(pwd)" $commit_sha1 $tag_sha1
+		EOF
+		test_cmp expect actual
+	)
+'
+
 test_done
-- 
2.37.1.455.g008518b4e5-goog


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 0/6] cat-file: add --batch-command remote-object-info command
  2022-07-28 23:02       ` [PATCH v5 0/6] " Calvin Wan
@ 2022-07-28 23:56         ` Junio C Hamano
  2022-07-29  0:02           ` Junio C Hamano
  2022-07-31  8:41         ` Phillip Wood
  2022-09-30 23:23         ` Junio C Hamano
  2 siblings, 1 reply; 103+ messages in thread
From: Junio C Hamano @ 2022-07-28 23:56 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Calvin Wan <calvinwan@google.com> writes:

> Sometimes it is useful to get information about an object without having
> to download it completely. The server logic has already been implemented
> as “a2ba162cda (object-info: support for retrieving object info,
> 2021-04-20)”. This patch implements the client option for it.
>
> Add `--object-info` option to `cat-file --batch-command`. This option
> allows the client to make an object-info command request to a server
> that supports protocol v2. If the server is v2, but does not allow for
> the object-info command request, the entire object is fetched and the
> relevant object info is returned.

Thanks.  Since this has changed significantly enough to render
range-diff less useful, I think a fresh round of full review may be
needed.  I'll use a more recent tip of 'master' as the base when I
queue these patches.


^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 0/6] cat-file: add --batch-command remote-object-info command
  2022-07-28 23:56         ` Junio C Hamano
@ 2022-07-29  0:02           ` Junio C Hamano
  0 siblings, 0 replies; 103+ messages in thread
From: Junio C Hamano @ 2022-07-29  0:02 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Junio C Hamano <gitster@pobox.com> writes:

> Thanks.  Since this has changed significantly enough to render
> range-diff less useful, I think a fresh round of full review may be
> needed.  I'll use a more recent tip of 'master' as the base when I
> queue these patches.

Having said that, here is a diff between

 a/ is previous round queued with fixups merged to 'master'
 b/ is the latest round queued on top of 'master'.

which may help people to see what got changed since the previous
round.

I notice that use of CALLOC_ARRAY() suggested during the review of
the last round was dropped (it should be easy to resurrect).


 Documentation/git-cat-file.txt |  2 +-
 builtin/cat-file.c             |  4 +--
 fetch-pack.c                   | 17 +++--------
 fetch-pack.h                   |  4 +--
 object-file.c                  |  1 -
 protocol-caps.c                | 12 ++++----
 serve.c                        | 10 +++++-
 t/t1006-cat-file.sh            |  3 +-
 t/t5555-http-smart-common.sh   |  2 +-
 t/t5701-git-serve.sh           |  2 +-
 transport.c                    | 69 +++++++++++++++++++++++++-----------------
 11 files changed, 70 insertions(+), 56 deletions(-)

diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index 0d9e8e6214..c073d5e50d 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -117,7 +117,7 @@ info <object>::
 
 remote-object-info <remote> [<object>]::
 	Print object info for object references `<object>` at specified <remote>.
-	This command may only be combined with `--buffer`.
+	This command may be combined with `--buffer`.
 
 flush::
 	Used with `--buffer` to execute all preceding commands that were issued
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 48bf0ee02f..57c090f249 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -515,12 +515,12 @@ static int get_remote_info(struct batch_options *opt, int argc, const char **arg
 		size_t j;
 		int include_size = 0, include_type = 0;
 
-		CALLOC_ARRAY(remote_object_info, object_info_oids.nr);
+		remote_object_info = xcalloc(object_info_oids.nr, sizeof(struct object_info));
 		gtransport->smart_options->object_info = 1;
 		gtransport->smart_options->object_info_oids = &object_info_oids;
 		/**
 		 * 'size' is the only option currently supported.
-		 * Other options that are passed in the format will default to a
+		 * Other options that are passed in the format will fallback to a
 		 * standard fetch request rather than object-info.
 		 */
 		if (strstr(format, "%(objectsize)")) {
diff --git a/fetch-pack.c b/fetch-pack.c
index ca06a7130f..d373aed775 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1296,14 +1296,16 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
 void send_object_info_request(int fd_out, struct object_info_args *args)
 {
 	struct strbuf req_buf = STRBUF_INIT;
-	struct string_list unsorted_object_info_options = *args->object_info_options;
 	size_t i;
 
 	write_command_and_capabilities(&req_buf, args->server_options, "object-info");
 
-	if (unsorted_string_list_has_string(&unsorted_object_info_options, "size"))
+	if (unsorted_string_list_has_string(args->object_info_options, "size"))
 		packet_buf_write(&req_buf, "size");
 
+	if (unsorted_string_list_has_string(args->object_info_options, "type"))
+		packet_buf_write(&req_buf, "type");
+
 	if (args->oids) {
 		for (i = 0; i < args->oids->nr; i++)
 			packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i]));
@@ -1669,7 +1671,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 			/* Filter 'ref' by 'sought' and those that aren't local */
 			mark_complete_and_common_ref(negotiator, args, &ref);
 			filter_refs(args, &ref, sought, nr_sought);
-			if (!args->refetch && !args->object_info && everything_local(args, &ref))
+			if (!args->refetch && everything_local(args, &ref))
 				state = FETCH_DONE;
 			else
 				state = FETCH_SEND_REQUEST;
@@ -2056,15 +2058,6 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
 		args->connectivity_checked = 1;
 	}
 
-	if (args->object_info) {
-		struct ref *ref_cpy_reader = ref_cpy;
-		int i = 0;
-		while (ref_cpy_reader) {
-			oid_object_info_extended(the_repository, &ref_cpy_reader->old_oid, &(*args->object_info_data)[i], OBJECT_INFO_LOOKUP_REPLACE);
-			ref_cpy_reader = ref_cpy_reader->next;
-			i++;
-		}
-	}
 	update_shallow(args, sought, nr_sought, &si);
 cleanup:
 	clear_shallow_info(&si);
diff --git a/fetch-pack.h b/fetch-pack.h
index 552fd7bde0..11c513f748 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -72,9 +72,9 @@ struct fetch_pack_args {
 };
 
 struct object_info_args {
-	const struct string_list *object_info_options;
+	struct string_list *object_info_options;
 	const struct string_list *server_options;
-	const struct oid_array *oids;
+	struct oid_array *oids;
 };
 
 /*
diff --git a/object-file.c b/object-file.c
index 333fed9f68..417c76def3 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2844,7 +2844,6 @@ void free_object_info_contents(struct object_info *object_info)
 {
 	if (!object_info)
 		return;
-
 	free(object_info->typep);
 	free(object_info->sizep);
 	free(object_info->disk_sizep);
diff --git a/protocol-caps.c b/protocol-caps.c
index bc7def0727..2ad9f45c59 100644
--- a/protocol-caps.c
+++ b/protocol-caps.c
@@ -11,7 +11,6 @@
 
 struct requested_info {
 	unsigned size : 1;
-	unsigned unknown : 1;
 };
 
 /*
@@ -41,12 +40,12 @@ static void send_info(struct repository *r, struct packet_writer *writer,
 	struct string_list_item *item;
 	struct strbuf send_buffer = STRBUF_INIT;
 
+	if (!oid_str_list->nr)
+		return;
+
 	if (info->size)
 		packet_writer_write(writer, "size");
 
-	if (info->unknown || !oid_str_list->nr)
-		goto release;
-
 	for_each_string_list_item (item, oid_str_list) {
 		const char *oid_str = item->string;
 		struct object_id oid;
@@ -73,7 +72,6 @@ static void send_info(struct repository *r, struct packet_writer *writer,
 		packet_writer_write(writer, "%s", send_buffer.buf);
 		strbuf_reset(&send_buffer);
 	}
-release:
 	strbuf_release(&send_buffer);
 }
 
@@ -94,7 +92,9 @@ int cap_object_info(struct repository *r, struct packet_reader *request)
 		if (parse_oid(request->line, &oid_str_list))
 			continue;
 
-		info.unknown = 1;
+		packet_writer_error(&writer,
+				    "object-info: unexpected line: '%s'",
+				    request->line);
 	}
 
 	if (request->status != PACKET_READ_FLUSH) {
diff --git a/serve.c b/serve.c
index 733347f602..1adf9df4a8 100644
--- a/serve.c
+++ b/serve.c
@@ -56,6 +56,14 @@ static int session_id_advertise(struct repository *r, struct strbuf *value)
 	return 1;
 }
 
+static int object_info_advertise(struct repository *r,
+				   struct strbuf *value)
+{
+	if (value)
+		strbuf_addstr(value, "size");
+	return 1;
+}
+
 static void session_id_receive(struct repository *r,
 			       const char *client_sid)
 {
@@ -132,7 +140,7 @@ static struct protocol_capability capabilities[] = {
 	},
 	{
 		.name = "object-info",
-		.advertise = always_advertise,
+		.advertise = object_info_advertise,
 		.command = cap_object_info,
 	},
 };
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 7f0baea07c..55b2a1f06f 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -1100,6 +1100,7 @@ test_expect_success 'batch-command flush without --buffer' '
 # If a filter is not set, the filter defaults to "%(objectname) %(objectsize) %(objecttype)"
 # Since "%(objecttype)" is currently not supported by the command request, object-info,
 # the filters are set to "%(objectname) %(objectsize)".
+# Tests with the default filter are used to test the fallback to 'fetch' command
 
 set_transport_variables () {
     tree_sha1=$(git -C "$1" write-tree)
@@ -1163,7 +1164,7 @@ test_expect_success 'batch-command remote-object-info http:// default filter' '
 		echo "$tree_sha1 tree $tree_size" >>expect &&
 		echo "$commit_sha1 commit $commit_size" >>expect &&
 		echo "$tag_sha1 tag $tag_size" >>expect &&
-		git cat-file --batch-command >actual <<-EOF &&
+		GIT_TRACE_PACKET=1 git cat-file --batch-command >actual <<-EOF &&
 		remote-object-info "$GIT_DAEMON_URL/parent" $hello_sha1 $tree_sha1
 		remote-object-info "$GIT_DAEMON_URL/parent" $commit_sha1 $tag_sha1
 		EOF
diff --git a/t/t5555-http-smart-common.sh b/t/t5555-http-smart-common.sh
index b1cfe8b7db..5a16d4259a 100755
--- a/t/t5555-http-smart-common.sh
+++ b/t/t5555-http-smart-common.sh
@@ -131,7 +131,7 @@ test_expect_success 'git upload-pack --advertise-refs: v2' '
 	fetch=shallow wait-for-done
 	server-option
 	object-format=$(test_oid algo)
-	object-info
+	object-info=size
 	0000
 	EOF
 
diff --git a/t/t5701-git-serve.sh b/t/t5701-git-serve.sh
index 1896f671cb..ebb32644e3 100755
--- a/t/t5701-git-serve.sh
+++ b/t/t5701-git-serve.sh
@@ -20,7 +20,7 @@ test_expect_success 'test capability advertisement' '
 	fetch=shallow wait-for-done
 	server-option
 	object-format=$(test_oid algo)
-	object-info
+	object-info=size
 	0000
 	EOF
 
diff --git a/transport.c b/transport.c
index 1baa1a194a..64bcc311ff 100644
--- a/transport.c
+++ b/transport.c
@@ -360,15 +360,16 @@ static int fetch_object_info(struct transport *transport, struct object_info **o
 	struct git_transport_data *data = transport->data;
 	struct object_info_args args;
 	struct packet_reader reader;
-	struct string_list server_attributes = STRING_LIST_INIT_DUP;
 
 	memset(&args, 0, sizeof(args));
 	args.server_options = transport->server_options;
 	args.object_info_options = transport->smart_options->object_info_options;
+	args.oids = transport->smart_options->object_info_oids;
 
 	connect_setup(transport, 0);
 	packet_reader_init(&reader, data->fd[0], NULL, 0,
 			PACKET_READ_CHOMP_NEWLINE |
+			PACKET_READ_GENTLE_ON_EOF |
 			PACKET_READ_DIE_ON_ERR_PACKET);
 	data->version = discover_version(&reader);
 
@@ -378,25 +379,14 @@ static int fetch_object_info(struct transport *transport, struct object_info **o
 	case protocol_v2:
 		if (!server_supports_v2("object-info", 0))
 			return -1;
-		/**
-		 * Send a request with only attributes first. If server can return all
-		 * of the requested attributes, then send request with object ids
-		 */
-		send_object_info_request(data->fd[1], &args);
-		if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
-			check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
+		if (unsorted_string_list_has_string(args.object_info_options, "size")
+				&& !server_supports_feature("object-info", "size", 0)) {
 			return -1;
 		}
-		string_list_split(&server_attributes, reader.line, ' ', -1);
-		packet_reader_read(&reader);
-		check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
-		if (server_attributes.nr != args.object_info_options->nr)
+		if (unsorted_string_list_has_string(args.object_info_options, "type")
+				&& !server_supports_feature("object-info", "type", 0)) {
 			return -1;
-		for (i = 0; i < server_attributes.nr; i++) {
-			if (!strcmp(server_attributes.items[i].string, "size"))
-				size_index = i + 1;
 		}
-		args.oids = transport->smart_options->object_info_oids;
 		send_object_info_request(data->fd[1], &args);
 		break;
 	case protocol_v1:
@@ -406,18 +396,29 @@ static int fetch_object_info(struct transport *transport, struct object_info **o
 		BUG("unknown protocol version");
 	}
 
-	if (packet_reader_read(&reader) != PACKET_READ_NORMAL)
-		die(_("error reading object info header"));
+	for (i = 0; i < args.object_info_options->nr; i++) {
+		if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
+			check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
+			return -1;
+		}
+		if (unsorted_string_list_has_string(args.object_info_options, reader.line)) {
+			if (!strcmp(reader.line, "size"))
+				size_index = i;
+			continue;
+		}
+		return -1;
+	}
+
 	i = 0;
-	while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
+	while (packet_reader_read(&reader) == PACKET_READ_NORMAL && i < args.oids->nr) {
 		struct string_list object_info_values = STRING_LIST_INIT_DUP;
 
 		string_list_split(&object_info_values, reader.line, ' ', -1);
-		if (size_index > 0) {
-			if (!strcmp(object_info_values.items[size_index].string, ""))
+		if (0 <= size_index) {
+			if (!strcmp(object_info_values.items[1 + size_index].string, ""))
 				die("object-info: not our ref %s",
 					object_info_values.items[0].string);
-			*(*object_info_data)[i].sizep = strtoul(object_info_values.items[size_index].string, NULL, 10);
+			*(*object_info_data)[i].sizep = strtoul(object_info_values.items[1 + size_index].string, NULL, 10);
 		}
 		i++;
 	}
@@ -441,7 +442,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 	struct ref *refs = NULL;
 	struct fetch_pack_args args;
 	struct ref *refs_tmp = NULL;
-	struct ref *wanted_refs = xcalloc(1, sizeof (struct ref));
+	struct ref *object_info_refs = xcalloc(1, sizeof (struct ref));
 
 	memset(&args, 0, sizeof(args));
 	args.uploadpack = data->options.uploadpack;
@@ -470,11 +471,13 @@ static int fetch_refs_via_pack(struct transport *transport,
 	args.object_info = transport->smart_options->object_info;
 
 	if (transport->smart_options && transport->smart_options->object_info) {
-		struct ref *ref = wanted_refs;
+		struct ref *ref = object_info_refs;
 
 		if (!fetch_object_info(transport, data->options.object_info_data))
 			goto cleanup;
 		args.object_info_data = data->options.object_info_data;
+		args.quiet = 1;
+		args.no_progress = 1;
 		for (i = 0; i < transport->smart_options->object_info_oids->nr; i++) {
 			struct ref *temp_ref = xcalloc(1, sizeof (struct ref));
 			temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i);
@@ -482,7 +485,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 			ref->next = temp_ref;
 			ref = ref->next;
 		}
-		transport->remote_refs = wanted_refs->next;
+		transport->remote_refs = object_info_refs->next;
 	} else if (!data->got_remote_heads) {
 		int i;
 		int must_list_refs = 0;
@@ -500,7 +503,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 	else if (data->version <= protocol_v1)
 		die_if_server_options(transport);
 
-	if (data->options.acked_commits && !transport->smart_options->object_info) {
+	if (data->options.acked_commits) {
 		if (data->version < protocol_v2) {
 			warning(_("--negotiate-only requires protocol v2"));
 			ret = -1;
@@ -523,12 +526,22 @@ static int fetch_refs_via_pack(struct transport *transport,
 			  to_fetch, nr_heads, &data->shallow,
 			  &transport->pack_lockfiles, data->version);
 
+	if (args.object_info) {
+		struct ref *ref_cpy_reader = object_info_refs->next;
+		int i = 0;
+		while (ref_cpy_reader) {
+			oid_object_info_extended(the_repository, &ref_cpy_reader->old_oid, &(*args.object_info_data)[i], OBJECT_INFO_LOOKUP_REPLACE);
+			ref_cpy_reader = ref_cpy_reader->next;
+			i++;
+		}
+	}
+
 	data->got_remote_heads = 0;
 	data->options.self_contained_and_connected =
 		args.self_contained_and_connected;
 	data->options.connectivity_checked = args.connectivity_checked;
 
-	if (!refs)
+	if (refs == NULL && !args.object_info)
 		ret = -1;
 	if (report_unmatched_refs(to_fetch, nr_heads))
 		ret = -1;
@@ -543,7 +556,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 
 	free_refs(refs_tmp);
 	free_refs(refs);
-	free_refs(wanted_refs);
+	free_refs(object_info_refs);
 	return ret;
 }
 

^ permalink raw reply related	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 6/6] cat-file: add remote-object-info to batch-command
  2022-07-28 23:02       ` [PATCH v5 6/6] cat-file: add remote-object-info to batch-command Calvin Wan
@ 2022-07-29  6:25         ` Ævar Arnfjörð Bjarmason
  2022-08-07  5:50           ` ZheNing Hu
  2022-08-08 18:07           ` Calvin Wan
  2022-07-29 18:21         ` Junio C Hamano
  2022-09-28 13:12         ` Ævar Arnfjörð Bjarmason
  2 siblings, 2 replies; 103+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-07-29  6:25 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, gitster, jonathantanmy, philipoakley, johncai86


On Thu, Jul 28 2022, Calvin Wan wrote:

> Since the `info` command in cat-file --batch-command prints object info
> for a given object, it is natural to add another command in cat-file
> --batch-command to print object info for a given object from a remote.

Is it ?:)

> Add `remote-object-info` to cat-file --batch-command.

I realize this bit of implementation changed in v4, i.e. it used to be
in "fetch", and I'm happy to have it moved out of there, we don't need
to overload it more.

But I remember thinking (and perhaps commenting on-list, I can't
remember) that the "object-info" server verb was a bit odd at the time
that it was implemented. I understand the motivation, but surely it was
stumbling its way towards being something more generic, i.e. being able
to just expose cmd_cat_file() in some form.

Which is one of the goals I've had in mind with working on fixing memory
leaks in various places, i.e. once you get common commands to clean up
after themselves it usually becomes to have a "command server".

So (and I don't mind if this is longer term, just asking), is there a
reason for why we wouldn't want to do away with object-info and this
"cat-file talks to a remote", in favor of just having support for
invoking arbitrary commands from a remote.

Of course that set of allowed RCE commands would be zero by default, but
if we had some way to define tha "cat-file" was allowed to be called,
and only if you invoked:

	cat-file --batch="%(objectsize)"

Or whatever, but over the v2 protocol, wouldn't we basically have
object-info in a more roundabout way?

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 3/6] protocol-caps: initialization bug fix
  2022-07-28 23:02       ` [PATCH v5 3/6] protocol-caps: initialization bug fix Calvin Wan
@ 2022-07-29 17:51         ` Junio C Hamano
  0 siblings, 0 replies; 103+ messages in thread
From: Junio C Hamano @ 2022-07-29 17:51 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Calvin Wan <calvinwan@google.com> writes:

> Initialize info. If info.size bit was on due to on-stack garbage,
> we would have given our response with "size" attribute prefixed,
> even when the client side never requested it.
>
> Signed-off-by: Calvin Wan <calvinwan@google.com>
> Helped-by: Jonathan Tan <jonathantanmy@google.com>
> ---
>  protocol-caps.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

OK, this is new in this round and it makes sense.

> diff --git a/protocol-caps.c b/protocol-caps.c
> index bbde91810a..2ad9f45c59 100644
> --- a/protocol-caps.c
> +++ b/protocol-caps.c
> @@ -77,7 +77,7 @@ static void send_info(struct repository *r, struct packet_writer *writer,
>  
>  int cap_object_info(struct repository *r, struct packet_reader *request)
>  {
> -	struct requested_info info;
> +	struct requested_info info = { 0 };
>  	struct packet_writer writer;
>  	struct string_list oid_str_list = STRING_LIST_INIT_DUP;

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 4/6] serve: advertise object-info feature
  2022-07-28 23:02       ` [PATCH v5 4/6] serve: advertise object-info feature Calvin Wan
@ 2022-07-29 17:57         ` Junio C Hamano
  2022-08-01 18:28           ` Calvin Wan
  0 siblings, 1 reply; 103+ messages in thread
From: Junio C Hamano @ 2022-07-29 17:57 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Calvin Wan <calvinwan@google.com> writes:

> In order for a client to know what object-info components a server can
> provide, advertise supported object-info features. This will allow a
> client to decide whether to query the server for object-info or fetch as
> a fallback.

OK.  Now we will no longer advertise a bare "object-info", but
"object-info=size" (and possibly in the future things other than
"size").  How would this change affect older clients who knows what
to do with "object-info" but not "object-info=<values>" yet?

> +static int object_info_advertise(struct repository *r,
> +				   struct strbuf *value)
> +{
> +	if (value)
> +		strbuf_addstr(value, "size");
> +	return 1;
> +}
> +
>  static void session_id_receive(struct repository *r,
>  			       const char *client_sid)
>  {
> @@ -132,7 +140,7 @@ static struct protocol_capability capabilities[] = {
>  	},
>  	{
>  		.name = "object-info",
> -		.advertise = always_advertise,
> +		.advertise = object_info_advertise,
>  		.command = cap_object_info,
>  	},
>  };
> diff --git a/t/t5555-http-smart-common.sh b/t/t5555-http-smart-common.sh
> index b1cfe8b7db..5a16d4259a 100755
> --- a/t/t5555-http-smart-common.sh
> +++ b/t/t5555-http-smart-common.sh
> @@ -131,7 +131,7 @@ test_expect_success 'git upload-pack --advertise-refs: v2' '
>  	fetch=shallow wait-for-done
>  	server-option
>  	object-format=$(test_oid algo)
> -	object-info
> +	object-info=size
>  	0000
>  	EOF
>  
> diff --git a/t/t5701-git-serve.sh b/t/t5701-git-serve.sh
> index 1896f671cb..ebb32644e3 100755
> --- a/t/t5701-git-serve.sh
> +++ b/t/t5701-git-serve.sh
> @@ -20,7 +20,7 @@ test_expect_success 'test capability advertisement' '
>  	fetch=shallow wait-for-done
>  	server-option
>  	object-format=$(test_oid algo)
> -	object-info
> +	object-info=size
>  	0000
>  	EOF

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 5/6] transport: add client support for object-info
  2022-07-28 23:02       ` [PATCH v5 5/6] transport: add client support for object-info Calvin Wan
@ 2022-07-29 18:06         ` Junio C Hamano
  2022-08-04 20:28           ` Calvin Wan
  2022-08-01 13:30         ` Phillip Wood
                           ` (2 subsequent siblings)
  3 siblings, 1 reply; 103+ messages in thread
From: Junio C Hamano @ 2022-07-29 18:06 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Calvin Wan <calvinwan@google.com> writes:

> +void send_object_info_request(int fd_out, struct object_info_args *args)
> +{
> +	struct strbuf req_buf = STRBUF_INIT;
> +	size_t i;
> +
> +	write_command_and_capabilities(&req_buf, args->server_options, "object-info");
> +
> +	if (unsorted_string_list_has_string(args->object_info_options, "size"))
> +		packet_buf_write(&req_buf, "size");
> +
> +	if (unsorted_string_list_has_string(args->object_info_options, "type"))
> +		packet_buf_write(&req_buf, "type");
> +
> +	if (args->oids) {
> +		for (i = 0; i < args->oids->nr; i++)
> +			packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i]));
> +	}

If !args->oids then we say "we want to request object-info to learn
size and type for the following objects: oh, there are no objects we
are interested in".  I wonder if an early return

	if (!args->oids)
		return;

at the beginning of the function that turns it into a benign no-op,
may make more sense?  Calling "send_X()" helper and seeing nothing
come out on the wire might make it look awkward, though.

> @@ -363,10 +437,12 @@ static int fetch_refs_via_pack(struct transport *transport,
>  			       int nr_heads, struct ref **to_fetch)
>  {
>  	int ret = 0;
> +	size_t i;
>  	struct git_transport_data *data = transport->data;
>  	struct ref *refs = NULL;
>  	struct fetch_pack_args args;
>  	struct ref *refs_tmp = NULL;
> +	struct ref *object_info_refs = xcalloc(1, sizeof (struct ref));

Style: no SP between "sizeof" and "(".

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 6/6] cat-file: add remote-object-info to batch-command
  2022-07-28 23:02       ` [PATCH v5 6/6] cat-file: add remote-object-info to batch-command Calvin Wan
  2022-07-29  6:25         ` Ævar Arnfjörð Bjarmason
@ 2022-07-29 18:21         ` Junio C Hamano
  2022-08-08 18:37           ` Calvin Wan
  2022-09-28 13:12         ` Ævar Arnfjörð Bjarmason
  2 siblings, 1 reply; 103+ messages in thread
From: Junio C Hamano @ 2022-07-29 18:21 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Calvin Wan <calvinwan@google.com> writes:

> While `info` takes object ids one at a time, this creates overhead when
> making requests to a server so `remote-object-info` instead can take
> multiple object ids at once.

I am not sure if the above is desirable---doesn't it make the
protocol (i.e. the requests the invoker of "cat-file" needs to
prepare) unnecessarily complex?  If we have "buffered" mode, the
implementation can collect and batch requests and talk with the
other side in a single communication with the other end, no?

> In --buffer mode, this changes to:
>
>  - Receive and parse input from user
>  - Store respective function attached to command in a queue
>  - After flush, loop through commands in queue
>     - Call respective function attached to command
>     - Set batch mode state, get object info, print object info

So, the existing buffered mode is poorly designed, in other words.
Isn't this something we can fix for performance without affecting
the existing callers?

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 0/6] cat-file: add --batch-command remote-object-info command
  2022-07-28 23:02       ` [PATCH v5 0/6] " Calvin Wan
  2022-07-28 23:56         ` Junio C Hamano
@ 2022-07-31  8:41         ` Phillip Wood
  2022-08-04 22:57           ` Calvin Wan
  2022-09-30 23:23         ` Junio C Hamano
  2 siblings, 1 reply; 103+ messages in thread
From: Phillip Wood @ 2022-07-31  8:41 UTC (permalink / raw)
  To: Calvin Wan, git; +Cc: gitster, jonathantanmy, philipoakley, johncai86

Hi Calvin

On 29/07/2022 00:02, Calvin Wan wrote:
> Sometimes it is useful to get information about an object without having
> to download it completely. The server logic has already been implemented
> as “a2ba162cda (object-info: support for retrieving object info,
> 2021-04-20)”. This patch implements the client option for it.
> 
> Add `--object-info` option to `cat-file --batch-command`. 

This part of the cover letter seems to have been left over from a 
previous iteration. I noticed that in patch 6 there is a reference to 
'--object-info' in an error message as well.

This might be a daft question but could we teach cat-file to check if an 
object is available locally and if it is not retrieve just the metadata 
from the remote rather than adding a new command to --batch-command? 
That way it would be transparent to the user and they would not have to 
worry about whether they might be querying a remote object or not.

Best Wishes

Phillip

> This option
> allows the client to make an object-info command request to a server
> that supports protocol v2. If the server is v2, but does not allow for
> the object-info command request, the entire object is fetched and the
> relevant object info is returned.
> 
> === Changes since v4 ===
>   - Instead of making 2 rounds trips to determine whether the server can
>     handle the client object-info request, the server now advertises the
>     features it can send back so the client can immediately fallback to
>     fetch if any feature is not supported
>   - Fixed nits and clarified some documentation/commit messages
> 
> Signed-off-by: Calvin Wan <calvinwan@google.com>
> Helped-by: Jonathan Tan <jonathantanmy@google.com>
> 
> Calvin Wan (6):
>    fetch-pack: refactor packet writing
>    fetch-pack: move fetch initialization
>    protocol-caps: initialization bug fix
>    serve: advertise object-info feature
>    transport: add client support for object-info
>    cat-file: add remote-object-info to batch-command
> 
>   Documentation/git-cat-file.txt |  16 +-
>   builtin/cat-file.c             | 225 ++++++++++++++++-----
>   fetch-pack.c                   |  52 +++--
>   fetch-pack.h                   |  10 +
>   object-file.c                  |  11 ++
>   object-store.h                 |   3 +
>   protocol-caps.c                |   2 +-
>   serve.c                        |  10 +-
>   t/t1006-cat-file.sh            | 348 +++++++++++++++++++++++++++++++++
>   t/t5555-http-smart-common.sh   |   2 +-
>   t/t5701-git-serve.sh           |   2 +-
>   transport-helper.c             |   7 +-
>   transport.c                    | 110 ++++++++++-
>   transport.h                    |  11 ++
>   14 files changed, 734 insertions(+), 75 deletions(-)
> 
> 
> base-commit: 9dd64cb4d310986dd7b8ca7fff92f9b61e0bd21a

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
                         ` (14 preceding siblings ...)
  2022-07-28 23:02       ` [PATCH v5 6/6] cat-file: add remote-object-info to batch-command Calvin Wan
@ 2022-07-31 15:02       ` ZheNing Hu
  2022-08-08 17:32         ` Calvin Wan
  2022-08-13 22:17       ` Junio C Hamano
  16 siblings, 1 reply; 103+ messages in thread
From: ZheNing Hu @ 2022-07-31 15:02 UTC (permalink / raw)
  To: Calvin Wan
  Cc: Git List, Junio C Hamano, Jonathan Tan, Philip Oakley, johncai86,
	Taylor Blau

Hi, Calvin Wan,

Calvin Wan <calvinwan@google.com> 于2022年5月3日周二 08:14写道:
>
> Sometimes it is useful to get information about an object without having
> to download it completely. The server logic has already been implemented
> as “a2ba162cda (object-info: support for retrieving object info,
> 2021-04-20)”. This patch implements the client option for it.
>
> Add `--object-info` option to `cat-file --batch-command`. This option
> allows the client to make an object-info command request to a server
> that supports protocol v2. If the server is v2, but does not allow for
> the object-info command request, the entire object is fetched and the
> relevant object info is returned.
>
> Summary of changes ==================
>
> Patches 1, 2, 3, and 7 are small changes that setup the main
> implementation. Patch 4 sets up object-info to be backwards compatible
> for future patch series that adds additional attributes.  Patch 5 adds
> internal trasnport functions to send and receive object-info command
> request packets. Patch 6 adds the fallback if object-info is not
> supported or fails.  Patch 8 adds the cat-file implementation.
>

I have to say I am very curious about this feature. Since the current
git partial-clone interface supports only a few filters:

blob:limit
blob:none
sparse:oid
tree:0

Though these filters reduce the number of objects downloaded each time,
sometimes I just need *only* one blob object, git partial-clone will still
download some additional commits and tree objects from the remote.

This patch can get the remote-object-info by git cat-file, so can we go
further to get the remote-object-content of the object?

Something like:

$ git cat-file --batch-command
remote-object-content origin <oid>

> Changes since V3 ================
>
>  * Object-info is now implemented in cat-file --batch-command rather
>    than fetch (new base commit)
>  * Removed config option to advertise object-info
>  * Added forwards and backwards compability for object-info
>  * Split up some patches to better describe and visualize changes
>
> Signed-off-by: Calvin Wan <calvinwan@google.com>
> Helped-by: Jonathan Tan <jonathantanmy@google.com>
>
> Calvin Wan (8):
>   fetch-pack: refactor packet writing
>   fetch-pack: move fetch default settings
>   object-store: add function to free object_info contents
>   object-info: send attribute packet regardless of object ids
>   transport: add client side capability to request object-info
>   transport: add object-info fallback to fetch
>   cat-file: move parse_cmd and DEFAULT_FORMAT up
>   cat-file: add --batch-command remote-object-info command
>
>  Documentation/git-cat-file.txt |  16 +-
>  builtin/cat-file.c             | 225 ++++++++++++++++-----
>  fetch-pack.c                   |  61 ++++--
>  fetch-pack.h                   |  10 +
>  object-file.c                  |  16 ++
>  object-store.h                 |   3 +
>  protocol-caps.c                |  14 +-
>  t/t1006-cat-file.sh            | 347 +++++++++++++++++++++++++++++++++
>  transport-helper.c             |   7 +-
>  transport.c                    |  97 ++++++++-
>  transport.h                    |  11 ++
>  11 files changed, 728 insertions(+), 79 deletions(-)
>
> --
> 2.36.0.rc2.10170.gb555eefa6f
>

Thanks!

ZheNing Hu

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 8/8] cat-file: add --batch-command remote-object-info command
  2022-05-02 17:09       ` [PATCH v4 8/8] cat-file: add --batch-command remote-object-info command Calvin Wan
  2022-05-04 21:27         ` Jonathan Tan
@ 2022-07-31 15:24         ` ZheNing Hu
  2022-08-08 17:43           ` Calvin Wan
  1 sibling, 1 reply; 103+ messages in thread
From: ZheNing Hu @ 2022-07-31 15:24 UTC (permalink / raw)
  To: Calvin Wan
  Cc: Git List, Junio C Hamano, Jonathan Tan, Philip Oakley, johncai86,
	Taylor Blau

Calvin Wan <calvinwan@google.com> 于2022年5月3日周二 06:33写道:
>
> Since the `info` command in cat-file --batch-command prints object info
> for a given object, it is natural to add another command in cat-file
> --batch-command to print object info for a given object from a remote.
> Add `remote-object-info` to cat-file --batch-command.
>
> While `info` takes object ids one at a time, this creates overhead when
> making requests to a server so `remote-object-info` instead can take
> multiple object ids at once.
>
> cat-file --batch-command is generally implemented in the following
> manner:
>
>  - Receive and parse input from user
>  - Call respective function attached to command
>  - Set batch mode state, get object info, print object info
>
> In --buffer mode, this changes to:
>
>  - Receive and parse input from user
>  - Store respective function attached to command in a queue
>  - After flush, loop through commands in queue
>     - Call respective function attached to command
>     - Set batch mode state, get object info, print object info
>
> Notice how the getting and printing of object info is accomplished one
> at a time. As described above, this creates a problem for making
> requests to a server. Therefore, `remote-object-info` is implemented in
> the following manner:
>
>  - Receive and parse input from user
>  If command is `remote-object-info`:
>     - Get object info from remote
>     - Loop through object info
>         - Call respective function attached to `info`
>         - Set batch mode state, use passed in object info, print object
>           info
>  Else:
>     - Call respective function attached to command
>     - Parse input, get object info, print object info
>
> And finally for --buffer mode `remote-object-info`:
>  - Receive and parse input from user
>  - Store respective function attached to command in a queue
>  - After flush, loop through commands in queue:
>     If command is `remote-object-info`:
>         - Get object info from remote
>         - Loop through object info
>             - Call respective function attached to `info`
>             - Set batch mode state, use passed in object info, print
>               object info
>     Else:
>         - Call respective function attached to command
>         - Set batch mode state, get object info, print object info
>
> To summarize, `remote-object-info` gets object info from the remote and
> then generates multiples `info` commands with the object info passed in.
> It cannot be implemented similarly to `info` and `content` because of
> the overhead with remote commenication.
>
> ---
>  Documentation/git-cat-file.txt |  16 +-
>  builtin/cat-file.c             | 200 +++++++++++++++----
>  t/t1006-cat-file.sh            | 347 +++++++++++++++++++++++++++++++++
>  3 files changed, 522 insertions(+), 41 deletions(-)
>
> diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
> index 24a811f0ef..0d9e8e6214 100644
> --- a/Documentation/git-cat-file.txt
> +++ b/Documentation/git-cat-file.txt
> @@ -115,6 +115,10 @@ info <object>::
>         Print object info for object reference `<object>`. This corresponds to the
>         output of `--batch-check`.
>
> +remote-object-info <remote> [<object>]::
> +       Print object info for object references `<object>` at specified <remote>.
> +       This command may only be combined with `--buffer`.
> +
>  flush::
>         Used with `--buffer` to execute all preceding commands that were issued
>         since the beginning or since the last flush was issued. When `--buffer`
> @@ -245,7 +249,8 @@ newline. The available atoms are:
>         The full hex representation of the object name.
>
>  `objecttype`::
> -       The type of the object (the same as `cat-file -t` reports).
> +       The type of the object (the same as `cat-file -t` reports). See
> +       `CAVEATS` below.
>
>  `objectsize`::
>         The size, in bytes, of the object (the same as `cat-file -s`
> @@ -253,13 +258,14 @@ newline. The available atoms are:
>
>  `objectsize:disk`::
>         The size, in bytes, that the object takes up on disk. See the
> -       note about on-disk sizes in the `CAVEATS` section below.
> +       note about on-disk sizes in the `CAVEATS` section below. Not
> +       supported by `remote-object-info`.
>
>  `deltabase`::
>         If the object is stored as a delta on-disk, this expands to the
>         full hex representation of the delta base object name.
>         Otherwise, expands to the null OID (all zeroes). See `CAVEATS`
> -       below.
> +       below. Not supported by `remote-object-info`.
>
>  `rest`::
>         If this atom is used in the output string, input lines are split

Why not forbidden %(rest) here too?

> +static int get_remote_info(struct batch_options *opt, int argc, const char **argv)
> +{
> +       int retval = 0;
> +       size_t i;
> +       struct remote *remote = NULL;
> +       struct object_id oid;
> +       struct string_list object_info_options = STRING_LIST_INIT_NODUP;
> +       static struct transport *gtransport;
> +       char *format = DEFAULT_FORMAT;
> +
> +       if (opt->format)
> +               format = xstrdup(opt->format);
> +
> +       remote = remote_get(argv[0]);
> +       if (!remote)
> +               die(_("must supply valid remote when using --object-info"));
> +       oid_array_clear(&object_info_oids);
> +       for (i = 1; i < argc; i++) {
> +               if (get_oid(argv[i], &oid))
> +                       die(_("malformed object id '%s'"), argv[i]);
> +               oid_array_append(&object_info_oids, &oid);
> +       }
> +
> +       gtransport = transport_get(remote, NULL);
> +       if (gtransport->smart_options) {
> +               size_t j;
> +               int include_size = 0, include_type = 0;
> +
> +               remote_object_info = xcalloc(object_info_oids.nr, sizeof(struct object_info));
> +               gtransport->smart_options->object_info = 1;
> +               gtransport->smart_options->object_info_oids = &object_info_oids;
> +               /**
> +                * 'size' is the only option currently supported.
> +                * Other options that are passed in the format will default to a
> +                * standard fetch request rather than object-info.
> +                */
> +               if (strstr(format, "%(objectsize)")) {
> +                       string_list_append(&object_info_options, "size");
> +                       include_size = 1;
> +               }
> +               if (strstr(format, "%(objecttype)")) {
> +                       string_list_append(&object_info_options, "type");
> +                       include_type = 1;
> +               }
> +               if (strstr(format, "%(objectsize:disk)"))
> +                       die(_("objectsize:disk is currently not supported with remote-object-info"));
> +               if (strstr(format, "%(deltabase)"))
> +                       die(_("deltabase is currently not supported with remote-object-info"));

%(rest) too?

> +               if (object_info_options.nr > 0) {
> +                       gtransport->smart_options->object_info_options = &object_info_options;
> +                       for (j = 0; j < object_info_oids.nr; j++) {
> +                               if (include_size)
> +                                       remote_object_info[j].sizep = xcalloc(1, sizeof(long));
> +                               if (include_type)
> +                                       remote_object_info[j].typep = xcalloc(1, sizeof(enum object_type));
> +                       }
> +                       gtransport->smart_options->object_info_data = &remote_object_info;
> +                       retval = transport_fetch_refs(gtransport, NULL);
> +               }
> +       } else {
> +               retval = -1;
> +       }
> +       return retval;
> +}

Maybe such code style will be better?

        if (!gtransport->smart_options) {
               return -1;
        }
        ...
        return transport_fetch_refs(gtransport, NULL);

> +
>  struct object_cb_data {
>         struct batch_options *opt;
>         struct expand_data *expand;
> @@ -538,6 +611,7 @@ typedef void (*parse_cmd_fn_t)(struct batch_options *, const char *,
>  struct queued_cmd {
>         parse_cmd_fn_t fn;
>         char *line;
> +       const char *name;
>  };
>
>  static void parse_cmd_contents(struct batch_options *opt,
> @@ -565,9 +639,49 @@ static const struct parse_cmd {
>  } commands[] = {
>         { "contents", parse_cmd_contents, 1},
>         { "info", parse_cmd_info, 1},
> +       { "remote-object-info", parse_cmd_info, 1},
>         { "flush", NULL, 0},
>  };
>
> +static void parse_remote_info(struct batch_options *opt,
> +                          char *line,
> +                          struct strbuf *output,
> +                          struct expand_data *data,
> +                          const struct parse_cmd *p_cmd,
> +                          struct queued_cmd *q_cmd)
> +{
> +       int count;
> +       size_t i;
> +       const char **argv;
> +
> +       count = split_cmdline(line, &argv);
> +       if (get_remote_info(opt, count, argv))
> +               goto cleanup;
> +       opt->use_remote_info = 1;
> +       data->skip_object_info = 1;
> +       data->mark_query = 0;
> +       for (i = 0; i < object_info_oids.nr; i++) {
> +               if (remote_object_info[i].sizep)
> +                       data->size = *remote_object_info[i].sizep;
> +               if (remote_object_info[i].typep)
> +                       data->type = *remote_object_info[i].typep;
> +
> +               data->oid = object_info_oids.oid[i];
> +               if (p_cmd)
> +                       p_cmd->fn(opt, argv[i+1], output, data);
> +               else
> +                       q_cmd->fn(opt, argv[i+1], output, data);
> +       }
> +       opt->use_remote_info = 0;
> +       data->skip_object_info = 0;
> +       data->mark_query = 1;
> +
> +cleanup:
> +       for (i = 0; i < object_info_oids.nr; i++)
> +               free_object_info_contents(&remote_object_info[i]);
> +       free(remote_object_info);
> +}
> +
>  static void dispatch_calls(struct batch_options *opt,
>                 struct strbuf *output,
>                 struct expand_data *data,
> @@ -579,8 +693,12 @@ static void dispatch_calls(struct batch_options *opt,
>         if (!opt->buffer_output)
>                 die(_("flush is only for --buffer mode"));
>
> -       for (i = 0; i < nr; i++)
> -               cmd[i].fn(opt, cmd[i].line, output, data);
> +       for (i = 0; i < nr; i++) {
> +               if (!strcmp(cmd[i].name, "remote-object-info"))
> +                       parse_remote_info(opt, cmd[i].line, output, data, NULL, &cmd[i]);
> +               else
> +                       cmd[i].fn(opt, cmd[i].line, output, data);
> +       }
>
>         fflush(stdout);
>  }
> @@ -640,11 +758,17 @@ static void batch_objects_command(struct batch_options *opt,
>                         dispatch_calls(opt, output, data, queued_cmd, nr);
>                         free_cmds(queued_cmd, &nr);
>                 } else if (!opt->buffer_output) {
> -                       cmd->fn(opt, p, output, data);
> +                       if (!strcmp(cmd->name, "remote-object-info")) {
> +                               char *line = xstrdup_or_null(p);
> +                               parse_remote_info(opt, line, output, data, cmd, NULL);

memory leak: "line".

> +                       } else {
> +                               cmd->fn(opt, p, output, data);
> +                       }
>                 } else {
>                         ALLOC_GROW(queued_cmd, nr + 1, alloc);
>                         call.fn = cmd->fn;
>                         call.line = xstrdup_or_null(p);
> +                       call.name = cmd->name;
>                         queued_cmd[nr++] = call;
>                 }
>         }
> +
> +test_expect_success 'remote-object-info fails on unspported filter option (objectsize:disk)' '
> +       (
> +               cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +
> +               test_must_fail git cat-file --batch-command="%(objectsize:disk)" 2>err <<-EOF &&
> +               remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1
> +               EOF
> +               test_i18ngrep "objectsize:disk is currently not supported with remote-object-info" err
> +       )
> +'
> +
> +test_expect_success 'remote-object-info fails on unspported filter option (deltabase)' '
> +       (
> +               cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +
> +               test_must_fail git cat-file --batch-command="%(deltabase)" 2>err <<-EOF &&
> +               remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1
> +               EOF
> +               test_i18ngrep "deltabase is currently not supported with remote-object-info" err
> +       )
> +'
> +

%(rest) too?

> +set_transport_variables "server"
> +
> +test_expect_success 'batch-command remote-object-info file://' '
> +       (
> +               cd server &&
> +
> +               echo "$hello_sha1 $hello_size" >expect &&
> +               echo "$tree_sha1 $tree_size" >>expect &&
> +               echo "$commit_sha1 $commit_size" >>expect &&
> +               echo "$tag_sha1 $tag_size" >>expect &&
> +               git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF &&
> +               remote-object-info "file://$(pwd)" $hello_sha1
> +               remote-object-info "file://$(pwd)" $tree_sha1
> +               remote-object-info "file://$(pwd)" $commit_sha1
> +               remote-object-info "file://$(pwd)" $tag_sha1
> +               EOF
> +               test_cmp expect actual
> +       )

Can we support <rev> instead of only <oid> here?

$ git cat-file --batch-check
HEAD
28583b8d8ca72730d7c9e0ea50861ad431a6dea4 commit 3038
master
ab336e8f1c8009c8b1aab8deb592148e69217085 commit 281
origin/master
23b219f8e3f2adfb0441e135f0a880e6124f766c commit 282
origin/master:git.c
e5d62fa5a92e95e1ede041ebf913d841744c31f8 blob 28398

So cat-file --batch-check can support it.

$git cat-file --batch-commands
remote-object-info "file://$(pwd)" master:git.c

I guess it cannot work now, right?

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 5/6] transport: add client support for object-info
  2022-07-28 23:02       ` [PATCH v5 5/6] transport: add client support for object-info Calvin Wan
  2022-07-29 18:06         ` Junio C Hamano
@ 2022-08-01 13:30         ` Phillip Wood
  2022-08-04 22:20           ` Calvin Wan
  2022-08-01 14:26         ` Phillip Wood
  2022-08-08  9:16         ` Phillip Wood
  3 siblings, 1 reply; 103+ messages in thread
From: Phillip Wood @ 2022-08-01 13:30 UTC (permalink / raw)
  To: Calvin Wan, git; +Cc: gitster, jonathantanmy, philipoakley, johncai86

Hi Calvin

On 29/07/2022 00:02, Calvin Wan wrote:
> Sometimes it is useful to get information about an object without having
> to download it completely. The server logic has already been implemented
> as “a2ba162cda (object-info: support for retrieving object info,
> 2021-04-20)”. This patch adds client functions to communicate with the
> server.
> 
> The client currently supports requesting a list of object ids with
> features 'size' and 'type' from a v2 server. If a server does not
> advertise either of the requested features, then the client falls back
> to making the request through 'fetch'.

I'm confused by the 'type' support, I might have missed something as I'm 
not familiar with this code but I couldn't see where we parse the type 
returned by the server.

> +	for (i = 0; i < args.object_info_options->nr; i++) {
> +		if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> +			check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");

This is one of a number of lines in this series that are way over the 80 
column limit.

> +			return -1;
> +		}
> +		if (unsorted_string_list_has_string(args.object_info_options, reader.line)) {
> +			if (!strcmp(reader.line, "size"))
> +				size_index = i;

Should we be checking for "type" as well? Also does protocol-v2.txt need 
updating as it only mentions "size" as an attribute.

> +			continue;
> +		}
> +		return -1;
> +	}
> +
> +	i = 0;
> +	while (packet_reader_read(&reader) == PACKET_READ_NORMAL && i < args.oids->nr) {
> +		struct string_list object_info_values = STRING_LIST_INIT_DUP;
> +
> +		string_list_split(&object_info_values, reader.line, ' ', -1);
> +		if (0 <= size_index) {

To avoid a possible out-of-bounds access we need to check that 
size_index + 1 < object_info_value.nr in case the server response is 
malformed

> +			if (!strcmp(object_info_values.items[1 + size_index].string, ""))
> +				die("object-info: not our ref %s",

I'm a bit confused by this message is it trying to say "object %s is 
missing on the server"?

> +					object_info_values.items[0].string);
> +			*(*object_info_data)[i].sizep = strtoul(object_info_values.items[1 + size_index].string, NULL, 10);

As Junio pointed out in his comments in v4 there is no error checking 
here - we should check the server has actually returned a number. Note 
that strtoul() will happily parse negative numbers so we probably want 
to do something like

const char *endp
errno = 0
if (!isdigit(*object_info_values.items[1 + size_index].string))
   die("...")
*(*object_info_data)[i].sizep = strtoul(object_info_values.items[1 + 
size_index].string, &endp, 10);
if (errno || *endp)
   die("...")

Should be we checking the object id matches what we asked for? (I'm not 
sure if protocol-v2.txt mentions the order in which the objects are 
returned)

Should we be parsing the object type here as well?

> @@ -392,8 +468,25 @@ static int fetch_refs_via_pack(struct transport *transport,
>   	args.server_options = transport->server_options;
>   	args.negotiation_tips = data->options.negotiation_tips;
>   	args.reject_shallow_remote = transport->smart_options->reject_shallow;
> -
> -	if (!data->got_remote_heads) {
> +	args.object_info = transport->smart_options->object_info;
> +
> +	if (transport->smart_options && transport->smart_options->object_info) {
> +		struct ref *ref = object_info_refs;
> +
> +		if (!fetch_object_info(transport, data->options.object_info_data))
> +			goto cleanup;
> +		args.object_info_data = data->options.object_info_data;
> +		args.quiet = 1;
> +		args.no_progress = 1;
> +		for (i = 0; i < transport->smart_options->object_info_oids->nr; i++) {
> +			struct ref *temp_ref = xcalloc(1, sizeof (struct ref));

Using CALLOC_ARRAY() or p = xcalloc(1, sizeof(*p)) would be safer here 
(and everywhere else where you have used xcalloc()) as it ensures we 
allocate the correct size.

> diff --git a/transport.h b/transport.h
> index b5bf7b3e70..5512fdb140 100644
> --- a/transport.h
> +++ b/transport.h
> @@ -31,6 +32,12 @@ struct git_transport_options {
>   	 */
>   	unsigned connectivity_checked:1;
>   
> +	/*
> +	 * Transport will attempt to pull only object-info. Fallbacks
> +	 * to pulling entire object if object-info is not supported
> +	 */

Is it definitely true that we fallback to pulling the entire object? - 
there is at least one place above where we do

 > +	if (transport->smart_options->object_info) {
 > +		die(_("--object-info requires protocol v2"));
 > +	}
 >

Best Wishes

Phillip

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 5/6] transport: add client support for object-info
  2022-07-28 23:02       ` [PATCH v5 5/6] transport: add client support for object-info Calvin Wan
  2022-07-29 18:06         ` Junio C Hamano
  2022-08-01 13:30         ` Phillip Wood
@ 2022-08-01 14:26         ` Phillip Wood
  2022-08-08  9:16         ` Phillip Wood
  3 siblings, 0 replies; 103+ messages in thread
From: Phillip Wood @ 2022-08-01 14:26 UTC (permalink / raw)
  To: Calvin Wan, git; +Cc: gitster, jonathantanmy, philipoakley, johncai86

Hi Calvin

On 29/07/2022 00:02, Calvin Wan wrote:
> diff --git a/transport.c b/transport.c
> index 52db7a3cb0..2d503e2fbd 100644
> --- a/transport.c
> +++ b/transport.c

> +	i = 0;
> +	while (packet_reader_read(&reader) == PACKET_READ_NORMAL && i < args.oids->nr) {
> +		struct string_list object_info_values = STRING_LIST_INIT_DUP;

I forget to say earlier that this is leaked

Best Wishes

Phillip

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 4/6] serve: advertise object-info feature
  2022-07-29 17:57         ` Junio C Hamano
@ 2022-08-01 18:28           ` Calvin Wan
  2022-08-01 18:44             ` Ævar Arnfjörð Bjarmason
  2022-08-01 18:47             ` Junio C Hamano
  0 siblings, 2 replies; 103+ messages in thread
From: Calvin Wan @ 2022-08-01 18:28 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, jonathantanmy, philipoakley, johncai86

> OK.  Now we will no longer advertise a bare "object-info", but
> "object-info=size" (and possibly in the future things other than
> "size").  How would this change affect older clients who knows what
> to do with "object-info" but not "object-info=<values>" yet?

This was a tricky tradeoff that I definitely think I should have
discussed more in the commit message. The issue with how object
info is currently implemented is that it is very inflexible for adding
new parameters.

This is how object-info currently parses a client request:

while (packet_reader_read(request) == PACKET_READ_NORMAL) {
    if (!strcmp("size", request->line)) {
        info.size = 1;
        continue;
    }

    if (parse_oid(request->line, &oid_str_list))
        continue;

    packet_writer_error(&writer,
    "object-info: unexpected line: '%s'",
    request->line);
}

Object-info supports "size" right now but, let's say I want to add
"type" as a parameter. OK I add another if statement like:

if (!strcmp("type", request->line)) {
    info.type = 1;
    continue;
}

And we update the docs to say "type" is now supported by
object-info. If a user now attempts to request "size" and "type"
from a server that has not been updated to support "type",
then the user gets an error message "object-info: unexpected
line: 'type'", which is another situation that is a bad experience
for older clients. The client has no way of knowing that their
failure is caused by a server version issue.

Essentially I think at some point we have to bite the bullet and say
we need to rework some part of the object-info advertisement (or
if anyone has a better idea of achieving the same goal) so that we
can make future incremental changes to object-info. If the supported
parameters are posted in the advertisement, then the client doesn't
have to first make a request to find out that their requested
parameter isn't support by the server. While you noted that we can't
make the assumption now that nobody is using the current
object-info feature, I think the benefit of the change outweighs
the cost of affecting the possibly small amount of users of this
feature. (A quick search on stack overflow for "object-info" tagged
under [git] returned no questions about it so that's what I used as
a cursory estimate for how popular this feature is).

Curious to hear what your thoughts on this are Junio, since as
much as I'd like to create a seamless upgrade experience for
older clients, I'm out of ideas as to how I would do so.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 4/6] serve: advertise object-info feature
  2022-08-01 18:28           ` Calvin Wan
@ 2022-08-01 18:44             ` Ævar Arnfjörð Bjarmason
  2022-08-01 18:47             ` Junio C Hamano
  1 sibling, 0 replies; 103+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-08-01 18:44 UTC (permalink / raw)
  To: Calvin Wan; +Cc: Junio C Hamano, git, jonathantanmy, philipoakley, johncai86


On Mon, Aug 01 2022, Calvin Wan wrote:

>> OK.  Now we will no longer advertise a bare "object-info", but
>> "object-info=size" (and possibly in the future things other than
>> "size").  How would this change affect older clients who knows what
>> to do with "object-info" but not "object-info=<values>" yet?
>
> This was a tricky tradeoff that I definitely think I should have
> discussed more in the commit message. The issue with how object
> info is currently implemented is that it is very inflexible for adding
> new parameters.
>
> This is how object-info currently parses a client request:
>
> while (packet_reader_read(request) == PACKET_READ_NORMAL) {
>     if (!strcmp("size", request->line)) {
>         info.size = 1;
>         continue;
>     }
>
>     if (parse_oid(request->line, &oid_str_list))
>         continue;
>
>     packet_writer_error(&writer,
>     "object-info: unexpected line: '%s'",
>     request->line);
> }
>
> Object-info supports "size" right now but, let's say I want to add
> "type" as a parameter. OK I add another if statement like:
>
> if (!strcmp("type", request->line)) {
>     info.type = 1;
>     continue;
> }
>
> And we update the docs to say "type" is now supported by
> object-info. If a user now attempts to request "size" and "type"
> from a server that has not been updated to support "type",
> then the user gets an error message "object-info: unexpected
> line: 'type'", which is another situation that is a bad experience
> for older clients. The client has no way of knowing that their
> failure is caused by a server version issue.
>
> Essentially I think at some point we have to bite the bullet and say
> we need to rework some part of the object-info advertisement (or
> if anyone has a better idea of achieving the same goal) so that we
> can make future incremental changes to object-info. If the supported
> parameters are posted in the advertisement, then the client doesn't
> have to first make a request to find out that their requested
> parameter isn't support by the server. While you noted that we can't
> make the assumption now that nobody is using the current
> object-info feature, I think the benefit of the change outweighs
> the cost of affecting the possibly small amount of users of this
> feature. (A quick search on stack overflow for "object-info" tagged
> under [git] returned no questions about it so that's what I used as
> a cursory estimate for how popular this feature is).
>
> Curious to hear what your thoughts on this are Junio, since as
> much as I'd like to create a seamless upgrade experience for
> older clients, I'm out of ideas as to how I would do so.

I haven't looked deeply into this case, but in general for such protcol
incompatibilities we could just create an object-info2, and have that
use some extensible calling convention we wish we'd have used from day
1.

Then have new clients understand both (and prefer the new verb), and
older clients use object-info without breakage.

Or we could call the new thing "cat-file", and have it accept any
arbitrary options it does, and then limit it to some sensible subset for
now :)

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 4/6] serve: advertise object-info feature
  2022-08-01 18:28           ` Calvin Wan
  2022-08-01 18:44             ` Ævar Arnfjörð Bjarmason
@ 2022-08-01 18:47             ` Junio C Hamano
  2022-08-01 18:58               ` Calvin Wan
  1 sibling, 1 reply; 103+ messages in thread
From: Junio C Hamano @ 2022-08-01 18:47 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Calvin Wan <calvinwan@google.com> writes:

> And we update the docs to say "type" is now supported by
> object-info. If a user now attempts to request "size" and "type"
> from a server that has not been updated to support "type",
> then the user gets an error message "object-info: unexpected
> line: 'type'", which is another situation that is a bad experience
> for older clients.

Is it?  older clients by definition does not know about "type", no?

I am perfectly happy with the capability advertisement to say not
just "object-info", but also what attributes of objects you can be
queried, by the way.  If clients right now (i.e. without this
series) expect that the other side who advertises "object-info"
without "=<these>,<attributes>,<are>,<supported>" supports only
"size", for example, perhaps treat the bare "object-info" capability
just as a synonym of "object-info=size", or something?

I do not deeply care either way, to be honest, as the deployed
clients with bare "object-info" will not survive for a long time
and will quickly be deprecated anyway.  I just wanted to see the
reasoning behind the decision to ignore them.

Thanks.


^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 4/6] serve: advertise object-info feature
  2022-08-01 18:47             ` Junio C Hamano
@ 2022-08-01 18:58               ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-08-01 18:58 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, jonathantanmy, philipoakley, johncai86

> Is it?  older clients by definition does not know about "type", no?

Newer client, older remote server was the situation I was thinking
about

> I am perfectly happy with the capability advertisement to say not
> just "object-info", but also what attributes of objects you can be
> queried, by the way.  If clients right now (i.e. without this
> series) expect that the other side who advertises "object-info"
> without "=<these>,<attributes>,<are>,<supported>" supports only
> "size", for example, perhaps treat the bare "object-info" capability
> just as a synonym of "object-info=size", or something?

This is a good idea! Thanks

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 5/6] transport: add client support for object-info
  2022-07-29 18:06         ` Junio C Hamano
@ 2022-08-04 20:28           ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-08-04 20:28 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, jonathantanmy, philipoakley, johncai86

> If !args->oids then we say "we want to request object-info to learn
> size and type for the following objects: oh, there are no objects we
> are interested in".  I wonder if an early return
>
>         if (!args->oids)
>                 return;
>
> at the beginning of the function that turns it into a benign no-op,
> may make more sense?  Calling "send_X()" helper and seeing nothing
> come out on the wire might make it look awkward, though.

If I add this change, then I should also add a check to ensure
args->object_info_options is also not empty. The question I think we
should answer is whether checking for oids and options should
happen inside this function or not. I'm leaning towards outside
the function for the reason you stated above about nothing
coming out of the wire being awkward.

> > +     struct ref *object_info_refs = xcalloc(1, sizeof (struct ref));
>
> Style: no SP between "sizeof" and "(".

ack

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 5/6] transport: add client support for object-info
  2022-08-01 13:30         ` Phillip Wood
@ 2022-08-04 22:20           ` Calvin Wan
  2022-08-08 10:07             ` Phillip Wood
  0 siblings, 1 reply; 103+ messages in thread
From: Calvin Wan @ 2022-08-04 22:20 UTC (permalink / raw)
  To: phillip.wood; +Cc: git, gitster, jonathantanmy, philipoakley, johncai86

> > The client currently supports requesting a list of object ids with
> > features 'size' and 'type' from a v2 server. If a server does not
> > advertise either of the requested features, then the client falls back
> > to making the request through 'fetch'.
>
> I'm confused by the 'type' support, I might have missed something as I'm
> not familiar with this code but I couldn't see where we parse the type
> returned by the server.

I should clarify that the server does not support 'type', only the client.
Since the client falls back to 'fetch' to grab the object info if the server
doesn't support a requested option (e.g. type), I have 'type' included
as part of the supported client options.

> > +     for (i = 0; i < args.object_info_options->nr; i++) {
> > +             if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> > +                     check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
>
> This is one of a number of lines in this series that are way over the 80
> column limit.

ack

> > +             if (unsorted_string_list_has_string(args.object_info_options, reader.line)) {
> > +                     if (!strcmp(reader.line, "size"))
> > +                             size_index = i;
>
> Should we be checking for "type" as well? Also does protocol-v2.txt need
> updating as it only mentions "size" as an attribute.

I gave context above -- the server does not accept 'type' so protocol-v2.txt
does not need to be updated.

> To avoid a possible out-of-bounds access we need to check that
> size_index + 1 < object_info_value.nr in case the server response is
> malformed

ack

> > +                     if (!strcmp(object_info_values.items[1 + size_index].string, ""))
> > +                             die("object-info: not our ref %s",
>
> I'm a bit confused by this message is it trying to say "object %s is
> missing on the server"?

Correct. You'll find the same error message in upload-pack.c

> > +                                     object_info_values.items[0].string);
> > +                     *(*object_info_data)[i].sizep = strtoul(object_info_values.items[1 + size_index].string, NULL, 10);
>
> As Junio pointed out in his comments in v4 there is no error checking
> here - we should check the server has actually returned a number. Note
> that strtoul() will happily parse negative numbers so we probably want
> to do something like
>
> const char *endp
> errno = 0
> if (!isdigit(*object_info_values.items[1 + size_index].string))
>    die("...")
> *(*object_info_data)[i].sizep = strtoul(object_info_values.items[1 +
> size_index].string, &endp, 10);
> if (errno || *endp)
>    die("...")



> Should be we checking the object id matches what we asked for? (I'm not
> sure if protocol-v2.txt mentions the order in which the objects are
> returned)

Hmmmm I think I either check for an object id match or update
protocol-v2.txt to mention order is consistent.

> Should we be parsing the object type here as well?

When the server starts supporting it.

> > +             for (i = 0; i < transport->smart_options->object_info_oids->nr; i++) {
> > +                     struct ref *temp_ref = xcalloc(1, sizeof (struct ref));
>
> Using CALLOC_ARRAY() or p = xcalloc(1, sizeof(*p)) would be safer here
> (and everywhere else where you have used xcalloc()) as it ensures we
> allocate the correct size.

ack

> > +     /*
> > +      * Transport will attempt to pull only object-info. Fallbacks
> > +      * to pulling entire object if object-info is not supported
> > +      */
>
> Is it definitely true that we fallback to pulling the entire object? -
> there is at least one place above where we do

Yes, fetch_refs_via_pack() is where fetch_object_info() is called,
making it easy to fallback if fetch_object_info() fails.

>> +     while (packet_reader_read(&reader) == PACKET_READ_NORMAL && i < args.oids->nr) {
>> +             struct string_list object_info_values = STRING_LIST_INIT_DUP;
>
> I forget to say earlier that this is leaked

ack

Thank you for the review!

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 0/6] cat-file: add --batch-command remote-object-info command
  2022-07-31  8:41         ` Phillip Wood
@ 2022-08-04 22:57           ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-08-04 22:57 UTC (permalink / raw)
  To: phillip.wood; +Cc: git, gitster, jonathantanmy, philipoakley, johncai86

> This might be a daft question but could we teach cat-file to check if an
> object is available locally and if it is not retrieve just the metadata
> from the remote rather than adding a new command to --batch-command?
> That way it would be transparent to the user and they would not have to
> worry about whether they might be querying a remote object or not.

Definitely a good question! If we did this over a large amount of objects and
we didn't have those objects locally, then we would have to query for each
of them, resulting in a lot of transport overhead. Therefore, I needed to use
--batch-command to allow me to query all at once. This also begs the
question of why I don't check for the objects locally before querying the
server. The intention of my patch assumes that the user querying for
object-info knows they don't have the object locally and would therefore
like to see the size of an object they could potentially fetch. This could
definitely be potential future work (which I should mention in my commit
message!)

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 6/6] cat-file: add remote-object-info to batch-command
  2022-07-29  6:25         ` Ævar Arnfjörð Bjarmason
@ 2022-08-07  5:50           ` ZheNing Hu
  2022-08-08 18:07           ` Calvin Wan
  1 sibling, 0 replies; 103+ messages in thread
From: ZheNing Hu @ 2022-08-07  5:50 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: Calvin Wan, Git List, Junio C Hamano, Jonathan Tan,
	Philip Oakley, johncai86

Hi, Ævar,

Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2022年7月29日周五 14:33写道:
>
>
> On Thu, Jul 28 2022, Calvin Wan wrote:
>
> > Since the `info` command in cat-file --batch-command prints object info
> > for a given object, it is natural to add another command in cat-file
> > --batch-command to print object info for a given object from a remote.
>
> Is it ?:)
>
> > Add `remote-object-info` to cat-file --batch-command.
>
> I realize this bit of implementation changed in v4, i.e. it used to be
> in "fetch", and I'm happy to have it moved out of there, we don't need
> to overload it more.
>
> But I remember thinking (and perhaps commenting on-list, I can't
> remember) that the "object-info" server verb was a bit odd at the time
> that it was implemented. I understand the motivation, but surely it was
> stumbling its way towards being something more generic, i.e. being able
> to just expose cmd_cat_file() in some form.
>
> Which is one of the goals I've had in mind with working on fixing memory
> leaks in various places, i.e. once you get common commands to clean up
> after themselves it usually becomes to have a "command server".
>

Now I'm starting to agree with you on this:

Maybe now git doesn't have a good interface to execute normal git commands
(except git-upload-pack, git-receive-pack...) on remote git server.

This patch wants to get remote object-info by the git-upload-pack interface.
But this thing can easily work by some RPC server e.g. Gitaly in Gitlab.

I don't know if git itself has the need to reimplement these remote
calls In some
secure environment?... Is it perhaps possible to get better
performance and versatility
than Gitaly? I donno.

> So (and I don't mind if this is longer term, just asking), is there a
> reason for why we wouldn't want to do away with object-info and this
> "cat-file talks to a remote", in favor of just having support for
> invoking arbitrary commands from a remote.
>
> Of course that set of allowed RCE commands would be zero by default, but
> if we had some way to define tha "cat-file" was allowed to be called,
> and only if you invoked:
>
>         cat-file --batch="%(objectsize)"
>
> Or whatever, but over the v2 protocol, wouldn't we basically have
> object-info in a more roundabout way?

ZheNing Hu

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 5/6] transport: add client support for object-info
  2022-07-28 23:02       ` [PATCH v5 5/6] transport: add client support for object-info Calvin Wan
                           ` (2 preceding siblings ...)
  2022-08-01 14:26         ` Phillip Wood
@ 2022-08-08  9:16         ` Phillip Wood
  3 siblings, 0 replies; 103+ messages in thread
From: Phillip Wood @ 2022-08-08  9:16 UTC (permalink / raw)
  To: Calvin Wan, git; +Cc: gitster, jonathantanmy, philipoakley, johncai86

Hi Calvin

On 29/07/2022 00:02, Calvin Wan wrote:
> diff --git a/transport.c b/transport.c
> index 52db7a3cb0..2d503e2fbd 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -363,10 +437,12 @@ static int fetch_refs_via_pack(struct transport *transport,
>   			       int nr_heads, struct ref **to_fetch)
>   {
>   	int ret = 0;
> +	size_t i;
>   	struct git_transport_data *data = transport->data;
>   	struct ref *refs = NULL;
>   	struct fetch_pack_args args;
>   	struct ref *refs_tmp = NULL;
> +	struct ref *object_info_refs = xcalloc(1, sizeof (struct ref));

When git is compiled with SANITIZE=address then one of the cat-file 
tests added in patch 6 fails with an out of bounds access. The problem 
is that the last member of struct ref is a flexible array that is 
expected to contain a NUL terminated string but we don't allocate any 
memory for the string here. We could just add one to the size of the 
allocation but as there is a dedicated allocation function it would be 
better to use alloc_ref("").

I think it would also be worth delaying this allocation until we're sure 
it is going to be needed.

>   	memset(&args, 0, sizeof(args));
>   	args.uploadpack = data->options.uploadpack;
> @@ -392,8 +468,25 @@ static int fetch_refs_via_pack(struct transport *transport,
>   	args.server_options = transport->server_options;
>   	args.negotiation_tips = data->options.negotiation_tips;
>   	args.reject_shallow_remote = transport->smart_options->reject_shallow;
> -
> -	if (!data->got_remote_heads) {
> +	args.object_info = transport->smart_options->object_info;
> +
> +	if (transport->smart_options && transport->smart_options->object_info) {
> +		struct ref *ref = object_info_refs;
> +
> +		if (!fetch_object_info(transport, data->options.object_info_data))
> +			goto cleanup;

If we allocate object_info_refs and initialize ref here then we avoid 
allocating it when it is not needed.

> +		args.object_info_data = data->options.object_info_data;
> +		args.quiet = 1;
> +		args.no_progress = 1;
> +		for (i = 0; i < transport->smart_options->object_info_oids->nr; i++) {
> +			struct ref *temp_ref = xcalloc(1, sizeof (struct ref));

This needs to use alloc_ref("") as well

> +			temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i);

This would be clearer as ...oids->oid[i] I think

Best Wishes

Phillip

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 5/6] transport: add client support for object-info
  2022-08-04 22:20           ` Calvin Wan
@ 2022-08-08 10:07             ` Phillip Wood
  0 siblings, 0 replies; 103+ messages in thread
From: Phillip Wood @ 2022-08-08 10:07 UTC (permalink / raw)
  To: Calvin Wan, phillip.wood
  Cc: git, gitster, jonathantanmy, philipoakley, johncai86

Hi Calvin

On 04/08/2022 23:20, Calvin Wan wrote:
>>> The client currently supports requesting a list of object ids with
>>> features 'size' and 'type' from a v2 server. If a server does not
>>> advertise either of the requested features, then the client falls back
>>> to making the request through 'fetch'.
>>
>> I'm confused by the 'type' support, I might have missed something as I'm
>> not familiar with this code but I couldn't see where we parse the type
>> returned by the server.
> 
> I should clarify that the server does not support 'type', only the client.
> Since the client falls back to 'fetch' to grab the object info if the server
> doesn't support a requested option (e.g. type), I have 'type' included
> as part of the supported client options.

Does that mean all the type client code is effectively unused and so 
untested?

>>> +                     if (!strcmp(object_info_values.items[1 + size_index].string, ""))
>>> +                             die("object-info: not our ref %s",
>>
>> I'm a bit confused by this message is it trying to say "object %s is
>> missing on the server"?
> 
> Correct. You'll find the same error message in upload-pack.c

I find the message confusing as we're talking about oids not refs. I 
have also realized that dying here is incompatible with the normal 
cat-file behavior of printing "<objectname> missing" rather than dying 
when a missing object is queried.

>> Should be we checking the object id matches what we asked for? (I'm not
>> sure if protocol-v2.txt mentions the order in which the objects are
>> returned)
> 
> Hmmmm I think I either check for an object id match or update
> protocol-v2.txt to mention order is consistent.

If we can then updating the protocol makes sense. Even if we do that a 
buggy or malicious server could return the objects in a different order. 
A malicious server can also lie about the size so I'm not sure how much 
benefit there is in checking the oids in terms of validating the 
response but I think we should definitely check that the server returns 
the expected number of sizes.

Best Wishes

Phillip

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command
  2022-07-31 15:02       ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command ZheNing Hu
@ 2022-08-08 17:32         ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-08-08 17:32 UTC (permalink / raw)
  To: ZheNing Hu
  Cc: Git List, Junio C Hamano, Jonathan Tan, Philip Oakley, johncai86,
	Taylor Blau

> I have to say I am very curious about this feature. Since the current
> git partial-clone interface supports only a few filters:
>
> blob:limit
> blob:none
> sparse:oid
> tree:0
>
> Though these filters reduce the number of objects downloaded each time,
> sometimes I just need *only* one blob object, git partial-clone will still
> download some additional commits and tree objects from the remote.
>
> This patch can get the remote-object-info by git cat-file, so can we go
> further to get the remote-object-content of the object?
>
> Something like:
>
> $ git cat-file --batch-command
> remote-object-content origin <oid>

I think this can be potentially future work. Part of my hesitation stems
from the fact that there is no need to print out any object info when
downloading an object. Therefore, it seems better suited to first be
implemented in git fetch (or added as a filter to partial-clone). And
then remote-object-content can be a combination of fetch +
remote-object-info.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 8/8] cat-file: add --batch-command remote-object-info command
  2022-07-31 15:24         ` ZheNing Hu
@ 2022-08-08 17:43           ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-08-08 17:43 UTC (permalink / raw)
  To: ZheNing Hu
  Cc: Git List, Junio C Hamano, Jonathan Tan, Philip Oakley, johncai86,
	Taylor Blau

> >  `rest`::
> >         If this atom is used in the output string, input lines are split
>
> Why not forbidden %(rest) here too?

Good catch. This should definitely be forbidden. I thought at first
this was inconsequential, but since I also have remote as part of
the input, this would no longer hold true.

> > +               if (strstr(format, "%(objectsize:disk)"))
> > +                       die(_("objectsize:disk is currently not supported with remote-object-info"));
> > +               if (strstr(format, "%(deltabase)"))
> > +                       die(_("deltabase is currently not supported with remote-object-info"));
>
> %(rest) too?

ditto

> Maybe such code style will be better?
>
>         if (!gtransport->smart_options) {
>                return -1;
>         }
>         ...
>         return transport_fetch_refs(gtransport, NULL);

That does look better!

> > -                       cmd->fn(opt, p, output, data);
> > +                       if (!strcmp(cmd->name, "remote-object-info")) {
> > +                               char *line = xstrdup_or_null(p);
> > +                               parse_remote_info(opt, line, output, data, cmd, NULL);
>
> memory leak: "line".

ack

> > +test_expect_success 'remote-object-info fails on unspported filter option (deltabase)' '
> > +       (
> > +               cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> > +
> > +               test_must_fail git cat-file --batch-command="%(deltabase)" 2>err <<-EOF &&
> > +               remote-object-info "$HTTPD_URL/smart/http_parent" $hello_sha1
> > +               EOF
> > +               test_i18ngrep "deltabase is currently not supported with remote-object-info" err
> > +       )
> > +'
> > +
>
> %(rest) too?

ditto

> > +test_expect_success 'batch-command remote-object-info file://' '
> > +       (
> > +               cd server &&
> > +
> > +               echo "$hello_sha1 $hello_size" >expect &&
> > +               echo "$tree_sha1 $tree_size" >>expect &&
> > +               echo "$commit_sha1 $commit_size" >>expect &&
> > +               echo "$tag_sha1 $tag_size" >>expect &&
> > +               git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF &&
> > +               remote-object-info "file://$(pwd)" $hello_sha1
> > +               remote-object-info "file://$(pwd)" $tree_sha1
> > +               remote-object-info "file://$(pwd)" $commit_sha1
> > +               remote-object-info "file://$(pwd)" $tag_sha1
> > +               EOF
> > +               test_cmp expect actual
> > +       )
>
> Can we support <rev> instead of only <oid> here?

Not at the current moment. The server is unable to handle
anything besides oids.

>
> $ git cat-file --batch-check
> HEAD
> 28583b8d8ca72730d7c9e0ea50861ad431a6dea4 commit 3038
> master
> ab336e8f1c8009c8b1aab8deb592148e69217085 commit 281
> origin/master
> 23b219f8e3f2adfb0441e135f0a880e6124f766c commit 282
> origin/master:git.c
> e5d62fa5a92e95e1ede041ebf913d841744c31f8 blob 28398
>
> So cat-file --batch-check can support it.
>
> $git cat-file --batch-commands
> remote-object-info "file://$(pwd)" master:git.c
>
> I guess it cannot work now, right?

Correct.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 6/6] cat-file: add remote-object-info to batch-command
  2022-07-29  6:25         ` Ævar Arnfjörð Bjarmason
  2022-08-07  5:50           ` ZheNing Hu
@ 2022-08-08 18:07           ` Calvin Wan
  2022-08-11 10:58             ` Ævar Arnfjörð Bjarmason
  1 sibling, 1 reply; 103+ messages in thread
From: Calvin Wan @ 2022-08-08 18:07 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: git, gitster, jonathantanmy, philipoakley, johncai86

> > Since the `info` command in cat-file --batch-command prints object info
> > for a given object, it is natural to add another command in cat-file
> > --batch-command to print object info for a given object from a remote.
>
> Is it ?:)

Haha yes this could use a little rewording

> > Add `remote-object-info` to cat-file --batch-command.
>
> I realize this bit of implementation changed in v4, i.e. it used to be
> in "fetch", and I'm happy to have it moved out of there, we don't need
> to overload it more.
>
> But I remember thinking (and perhaps commenting on-list, I can't
> remember) that the "object-info" server verb was a bit odd at the time
> that it was implemented. I understand the motivation, but surely it was
> stumbling its way towards being something more generic, i.e. being able
> to just expose cmd_cat_file() in some form.
>
> Which is one of the goals I've had in mind with working on fixing memory
> leaks in various places, i.e. once you get common commands to clean up
> after themselves it usually becomes to have a "command server".
>
> So (and I don't mind if this is longer term, just asking), is there a
> reason for why we wouldn't want to do away with object-info and this
> "cat-file talks to a remote", in favor of just having support for
> invoking arbitrary commands from a remote.
>
> Of course that set of allowed RCE commands would be zero by default, but
> if we had some way to define tha "cat-file" was allowed to be called,
> and only if you invoked:
>
>         cat-file --batch="%(objectsize)"
>
> Or whatever, but over the v2 protocol, wouldn't we basically have
> object-info in a more roundabout way?

While I do think that if we did have a set of allowed RCE commands, this
would be a good candidate to be one of those commands. I am worried
about security, maintainability, and server performance risks this change
would also carry. Figuring out which commands are secure and would
not overload the server, and then maintaining that set seems like a much
more worrisome design than having a secure git server.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 6/6] cat-file: add remote-object-info to batch-command
  2022-07-29 18:21         ` Junio C Hamano
@ 2022-08-08 18:37           ` Calvin Wan
  0 siblings, 0 replies; 103+ messages in thread
From: Calvin Wan @ 2022-08-08 18:37 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, jonathantanmy, philipoakley, johncai86

> > While `info` takes object ids one at a time, this creates overhead when
> > making requests to a server so `remote-object-info` instead can take
> > multiple object ids at once.
>
> I am not sure if the above is desirable---doesn't it make the
> protocol (i.e. the requests the invoker of "cat-file" needs to
> prepare) unnecessarily complex?  If we have "buffered" mode, the
> implementation can collect and batch requests and talk with the
> other side in a single communication with the other end, no?

I'm a little bit confused as to what you are suggesting here, in
terms of how a user would want to call this command. Are you
saying this functionality should only work in "buffered" mode?
Or are you saying that `remote-object-info` should only take
one object id at a time, and in buffer mode, those requests are
batched together?

>
> > In --buffer mode, this changes to:
> >
> >  - Receive and parse input from user
> >  - Store respective function attached to command in a queue
> >  - After flush, loop through commands in queue
> >     - Call respective function attached to command
> >     - Set batch mode state, get object info, print object info
>
> So, the existing buffered mode is poorly designed, in other words.
> Isn't this something we can fix for performance without affecting
> the existing callers?

Much of the existing cat-file code seems to be designed around
taking in one object/command at a time, and --buffer can be
thought of as a wrapper around that preexisting code that simply
stores a queue of commands rather than running them immediately.
I think the existing buffered mode is well designed -- this part of the
commit message is to explain why at some point I need to preprocess
what is in the buffer

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 6/6] cat-file: add remote-object-info to batch-command
  2022-08-08 18:07           ` Calvin Wan
@ 2022-08-11 10:58             ` Ævar Arnfjörð Bjarmason
  0 siblings, 0 replies; 103+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-08-11 10:58 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, gitster, jonathantanmy, philipoakley, johncai86


On Mon, Aug 08 2022, Calvin Wan wrote:

>> > Since the `info` command in cat-file --batch-command prints object info
>> > for a given object, it is natural to add another command in cat-file
>> > --batch-command to print object info for a given object from a remote.
>>
>> Is it ?:)
>
> Haha yes this could use a little rewording
>
>> > Add `remote-object-info` to cat-file --batch-command.
>>
>> I realize this bit of implementation changed in v4, i.e. it used to be
>> in "fetch", and I'm happy to have it moved out of there, we don't need
>> to overload it more.
>>
>> But I remember thinking (and perhaps commenting on-list, I can't
>> remember) that the "object-info" server verb was a bit odd at the time
>> that it was implemented. I understand the motivation, but surely it was
>> stumbling its way towards being something more generic, i.e. being able
>> to just expose cmd_cat_file() in some form.
>>
>> Which is one of the goals I've had in mind with working on fixing memory
>> leaks in various places, i.e. once you get common commands to clean up
>> after themselves it usually becomes to have a "command server".
>>
>> So (and I don't mind if this is longer term, just asking), is there a
>> reason for why we wouldn't want to do away with object-info and this
>> "cat-file talks to a remote", in favor of just having support for
>> invoking arbitrary commands from a remote.
>>
>> Of course that set of allowed RCE commands would be zero by default, but
>> if we had some way to define tha "cat-file" was allowed to be called,
>> and only if you invoked:
>>
>>         cat-file --batch="%(objectsize)"
>>
>> Or whatever, but over the v2 protocol, wouldn't we basically have
>> object-info in a more roundabout way?
>
> While I do think that if we did have a set of allowed RCE commands, this
> would be a good candidate to be one of those commands. I am worried
> about security, maintainability, and server performance risks this change
> would also carry. Figuring out which commands are secure and would
> not overload the server, and then maintaining that set seems like a much
> more worrisome design than having a secure git server.

I'm only suggesting that the interface be extendable enough to allow for
that, not that we do it right now, i.e. that the protocol command could
be:

	# With appropriate \0 delimiting etc.
	cmd cat-file -e <SHA1>

As opposed to something like:

	object-exists <SHA1>

But that we would *not* for now expose some mechanism where the server
operator could configure which arbitrary commands to allow, we'd just
intercept that specifically, and dispatch it to the appropriate code in
builtin/cat-file.c (or libify them enough to expose them, and then call
that).

Of course one of the points of doing so would be to eventually expose
the ability to run a larger set of safe commands remotely, if the server
operator agrees, but we'd leave that for the future.

Right now we'd get the benefit of not duplicating various parts of
existing code, and having a plain one-to-one mapping to existing
commands.


^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command
  2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
                         ` (15 preceding siblings ...)
  2022-07-31 15:02       ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command ZheNing Hu
@ 2022-08-13 22:17       ` Junio C Hamano
  16 siblings, 0 replies; 103+ messages in thread
From: Junio C Hamano @ 2022-08-13 22:17 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86, me

Style and coccinelle fixes; please squash in when you reroll.

 * var = xcalloc(count, sizeof(*var)) --> CALLOC_ARRAY(var, count)
   for count != 1

 * sizeof (type) --> sizeof(type)

Thanks.

 builtin/cat-file.c | 2 +-
 transport.c        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 57c090f249..4afe82322f 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -515,7 +515,7 @@ static int get_remote_info(struct batch_options *opt, int argc, const char **arg
 		size_t j;
 		int include_size = 0, include_type = 0;
 
-		remote_object_info = xcalloc(object_info_oids.nr, sizeof(struct object_info));
+		CALLOC_ARRAY(remote_object_info, object_info_oids.nr);
 		gtransport->smart_options->object_info = 1;
 		gtransport->smart_options->object_info_oids = &object_info_oids;
 		/**
diff --git a/transport.c b/transport.c
index 64bcc311ff..87197f0ec7 100644
--- a/transport.c
+++ b/transport.c
@@ -442,7 +442,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 	struct ref *refs = NULL;
 	struct fetch_pack_args args;
 	struct ref *refs_tmp = NULL;
-	struct ref *object_info_refs = xcalloc(1, sizeof (struct ref));
+	struct ref *object_info_refs = xcalloc(1, sizeof(struct ref));
 
 	memset(&args, 0, sizeof(args));
 	args.uploadpack = data->options.uploadpack;
@@ -479,7 +479,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 		args.quiet = 1;
 		args.no_progress = 1;
 		for (i = 0; i < transport->smart_options->object_info_oids->nr; i++) {
-			struct ref *temp_ref = xcalloc(1, sizeof (struct ref));
+			struct ref *temp_ref = xcalloc(1, sizeof(struct ref));
 			temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i);
 			temp_ref->exact_oid = 1;
 			ref->next = temp_ref;
-- 
2.37.2-479-gedbcd27e9d


^ permalink raw reply related	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 6/6] cat-file: add remote-object-info to batch-command
  2022-07-28 23:02       ` [PATCH v5 6/6] cat-file: add remote-object-info to batch-command Calvin Wan
  2022-07-29  6:25         ` Ævar Arnfjörð Bjarmason
  2022-07-29 18:21         ` Junio C Hamano
@ 2022-09-28 13:12         ` Ævar Arnfjörð Bjarmason
  2 siblings, 0 replies; 103+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-09-28 13:12 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, gitster, jonathantanmy, philipoakley, johncai86


On Thu, Jul 28 2022, Calvin Wan wrote:

> +			gtransport->smart_options->object_info_data = &remote_object_info;
> +			retval = transport_fetch_refs(gtransport, NULL);

I see Phillip Wood noted that this fails SANITIZE=address in
https://lore.kernel.org/git/00a58dbb-32e8-98dd-6c72-235e9c33af3b@gmail.com/;
just a *bump* about this issue. I noticed "seen" failed, bisected down
to 6/6 here, then saw his >month old report...

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH v5 0/6] cat-file: add --batch-command remote-object-info command
  2022-07-28 23:02       ` [PATCH v5 0/6] " Calvin Wan
  2022-07-28 23:56         ` Junio C Hamano
  2022-07-31  8:41         ` Phillip Wood
@ 2022-09-30 23:23         ` Junio C Hamano
  2 siblings, 0 replies; 103+ messages in thread
From: Junio C Hamano @ 2022-09-30 23:23 UTC (permalink / raw)
  To: Calvin Wan; +Cc: git, jonathantanmy, philipoakley, johncai86

Calvin Wan <calvinwan@google.com> writes:

> Sometimes it is useful to get information about an object without having
> to download it completely. The server logic has already been implemented
> as “a2ba162cda (object-info: support for retrieving object info,
> 2021-04-20)”. This patch implements the client option for it.
>
> Add `--object-info` option to `cat-file --batch-command`. This option
> allows the client to make an object-info command request to a server
> that supports protocol v2. If the server is v2, but does not allow for
> the object-info command request, the entire object is fetched and the
> relevant object info is returned.
>
> === Changes since v4 ===
>  - Instead of making 2 rounds trips to determine whether the server can
>    handle the client object-info request, the server now advertises the
>    features it can send back so the client can immediately fallback to
>    fetch if any feature is not supported
>  - Fixed nits and clarified some documentation/commit messages
>
> Signed-off-by: Calvin Wan <calvinwan@google.com>
> Helped-by: Jonathan Tan <jonathantanmy@google.com>

FWIW, this one seems to make t1006 fail under SANITIZE=address, i.e.

    $ SANITIZE=address make test

I thought it would be better to report this before you send out v6
after the release.

Thanks.

^ permalink raw reply	[flat|nested] 103+ messages in thread

end of thread, other threads:[~2022-09-30 23:24 UTC | newest]

Thread overview: 103+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-08 23:19 [PATCH] fetch —object-info-format: client option for object-info Calvin Wan
2022-02-08 23:56 ` [PATCH v2] " Calvin Wan
2022-02-09 12:48   ` Philip Oakley
2022-02-10 22:32     ` Calvin Wan
2022-02-09 20:41   ` [PATCH v2] fetch object-info-format: " Jonathan Tan
2022-02-10 22:58     ` Calvin Wan
2022-03-28 19:11   ` [PATCH v3 0/3] object-info: add option for retrieving object info Calvin Wan
2022-03-28 19:11     ` [PATCH v3 1/3] fetch-pack: refactor packet writing and fetch options Calvin Wan
2022-03-29 22:54       ` Junio C Hamano
2022-03-29 23:01       ` Taylor Blau
2022-03-30 21:55       ` Jonathan Tan
2022-03-28 19:11     ` [PATCH v3 2/3] transfer.advertiseObjectInfo: add object-info config Calvin Wan
2022-03-29 22:34       ` Junio C Hamano
2022-03-29 22:48         ` Calvin Wan
2022-03-29 23:09           ` Taylor Blau
2022-03-28 19:11     ` [PATCH v3 3/3] object-info: add option for retrieving object info Calvin Wan
2022-03-29 19:57       ` Junio C Hamano
2022-03-29 22:54       ` Junio C Hamano
2022-03-30  0:49         ` Junio C Hamano
2022-03-30 22:31           ` Calvin Wan
2022-03-30 22:43         ` Jonathan Tan
2022-03-30 23:42           ` Junio C Hamano
2022-03-29 23:19       ` Taylor Blau
2022-03-30 22:47         ` Calvin Wan
2022-03-30 22:06       ` John Cai
2022-03-31 19:56         ` Calvin Wan
2022-04-01 16:16           ` Junio C Hamano
2022-03-30 22:07       ` Jonathan Tan
2022-03-30 22:12       ` Josh Steadmon
2022-03-30 22:46         ` Calvin Wan
2022-03-29 20:35     ` [PATCH v3 0/3] " Junio C Hamano
2022-03-29 22:40       ` Calvin Wan
2022-03-31  1:50     ` Junio C Hamano
2022-05-02 17:08     ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command Calvin Wan
2022-05-02 17:08       ` [PATCH v4 1/8] fetch-pack: refactor packet writing Calvin Wan
2022-05-02 17:08       ` [PATCH v4 2/8] fetch-pack: move fetch default settings Calvin Wan
2022-05-02 22:58         ` Junio C Hamano
2022-05-03 23:06           ` Jonathan Tan
2022-05-05 18:13             ` Calvin Wan
2022-05-02 17:08       ` [PATCH v4 3/8] object-store: add function to free object_info contents Calvin Wan
2022-05-02 23:23         ` Junio C Hamano
2022-05-04 19:09           ` Junio C Hamano
2022-05-05  0:15           ` Junio C Hamano
2022-05-05 16:47             ` Calvin Wan
2022-05-05 17:01               ` Junio C Hamano
2022-05-02 17:09       ` [PATCH v4 4/8] object-info: send attribute packet regardless of object ids Calvin Wan
2022-05-03  0:05         ` Junio C Hamano
2022-05-03 19:21           ` Calvin Wan
2022-05-03 23:11         ` Jonathan Tan
2022-05-02 17:09       ` [PATCH v4 5/8] transport: add client side capability to request object-info Calvin Wan
2022-05-03  0:54         ` Junio C Hamano
2022-05-03 18:58           ` Calvin Wan
2022-05-04 15:42             ` Junio C Hamano
2022-05-03 23:15         ` Jonathan Tan
2022-05-04 15:50           ` Junio C Hamano
2022-05-02 17:09       ` [PATCH v4 6/8] transport: add object-info fallback to fetch Calvin Wan
2022-05-03 23:27         ` Jonathan Tan
2022-05-02 17:09       ` [PATCH v4 7/8] cat-file: move parse_cmd and DEFAULT_FORMAT up Calvin Wan
2022-05-02 17:09       ` [PATCH v4 8/8] cat-file: add --batch-command remote-object-info command Calvin Wan
2022-05-04 21:27         ` Jonathan Tan
2022-05-05 18:13           ` Calvin Wan
2022-05-05 18:44             ` Junio C Hamano
2022-05-05 19:09               ` Junio C Hamano
2022-05-05 19:19                 ` Calvin Wan
2022-07-31 15:24         ` ZheNing Hu
2022-08-08 17:43           ` Calvin Wan
2022-07-28 23:02       ` [PATCH v5 0/6] " Calvin Wan
2022-07-28 23:56         ` Junio C Hamano
2022-07-29  0:02           ` Junio C Hamano
2022-07-31  8:41         ` Phillip Wood
2022-08-04 22:57           ` Calvin Wan
2022-09-30 23:23         ` Junio C Hamano
2022-07-28 23:02       ` [PATCH v5 1/6] fetch-pack: refactor packet writing Calvin Wan
2022-07-28 23:02       ` [PATCH v5 2/6] fetch-pack: move fetch initialization Calvin Wan
2022-07-28 23:02       ` [PATCH v5 3/6] protocol-caps: initialization bug fix Calvin Wan
2022-07-29 17:51         ` Junio C Hamano
2022-07-28 23:02       ` [PATCH v5 4/6] serve: advertise object-info feature Calvin Wan
2022-07-29 17:57         ` Junio C Hamano
2022-08-01 18:28           ` Calvin Wan
2022-08-01 18:44             ` Ævar Arnfjörð Bjarmason
2022-08-01 18:47             ` Junio C Hamano
2022-08-01 18:58               ` Calvin Wan
2022-07-28 23:02       ` [PATCH v5 5/6] transport: add client support for object-info Calvin Wan
2022-07-29 18:06         ` Junio C Hamano
2022-08-04 20:28           ` Calvin Wan
2022-08-01 13:30         ` Phillip Wood
2022-08-04 22:20           ` Calvin Wan
2022-08-08 10:07             ` Phillip Wood
2022-08-01 14:26         ` Phillip Wood
2022-08-08  9:16         ` Phillip Wood
2022-07-28 23:02       ` [PATCH v5 6/6] cat-file: add remote-object-info to batch-command Calvin Wan
2022-07-29  6:25         ` Ævar Arnfjörð Bjarmason
2022-08-07  5:50           ` ZheNing Hu
2022-08-08 18:07           ` Calvin Wan
2022-08-11 10:58             ` Ævar Arnfjörð Bjarmason
2022-07-29 18:21         ` Junio C Hamano
2022-08-08 18:37           ` Calvin Wan
2022-09-28 13:12         ` Ævar Arnfjörð Bjarmason
2022-07-31 15:02       ` [PATCH v4 0/8] cat-file: add --batch-command remote-object-info command ZheNing Hu
2022-08-08 17:32         ` Calvin Wan
2022-08-13 22:17       ` Junio C Hamano
2022-02-09 19:09 ` [PATCH] fetch —object-info-format: client option for object-info John Cai
2022-02-10 22:49   ` Calvin Wan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.