git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jacob Vosmaer <jacob@gitlab.com>
To: peff@peff.net, me@ttaylorr.com, git@vger.kernel.org,
	gitster@pobox.com, ps@pks.im
Cc: Jacob Vosmaer <jacob@gitlab.com>
Subject: [PATCH 2/2] upload-pack: use stdio in send_ref callbacks
Date: Thu, 26 Aug 2021 12:06:48 +0200	[thread overview]
Message-ID: <20210826100648.10333-2-jacob@gitlab.com> (raw)
In-Reply-To: <20210826100648.10333-1-jacob@gitlab.com>

In both protocol v0 and v2, upload-pack writes one pktline packet per
advertised ref to stdout. That means one or two write(2) syscalls per
ref. This is problematic if these writes become network sends with
high overhead.

This commit changes both send_ref callbacks to use buffered IO using
stdio. The default buffer size is usually 4KB, but with musl libc it
is only 1KB. So for consistent results we need to set a buffer size
ourselves. During startup of upload-pack, we set the stdout buffer
size to LARGE_PACKET_MAX, i.e. just shy of 64KB.

To give an example of the impact: I set up a single-threaded loop that
calls ls-remote (with HTTP and protocol v2) on a local GitLab
instance, on a repository with 11K refs. When I switch from Git
v2.32.0 to this patch, I see a 50% reduction in CPU time for Git, and
75% for Gitaly (GitLab's Git RPC service).

So using buffered IO not only saves syscalls in upload-pack, it also
saves time in things that consume upload-pack's output.

Signed-off-by: Jacob Vosmaer <jacob@gitlab.com>
---
 builtin/upload-pack.c | 10 ++++++++++
 ls-refs.c             |  5 ++++-
 upload-pack.c         | 15 ++++++++++++---
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/builtin/upload-pack.c b/builtin/upload-pack.c
index 6da8fa2607..8033f84124 100644
--- a/builtin/upload-pack.c
+++ b/builtin/upload-pack.c
@@ -48,6 +48,16 @@ int cmd_upload_pack(int argc, const char **argv, const char *prefix)
 	if (!enter_repo(dir, strict))
 		die("'%s' does not appear to be a git repository", dir);
 
+	/*
+	 * Increase the stdio buffer size for stdout, for the benefit of ref
+	 * advertisement writes. We are only allowed to call setvbuf(3) "after
+	 * opening a stream and before any other operations have been performed
+	 * on it", so let's call it before we have written anything to stdout.
+	 */
+	if (setvbuf(stdout, xmalloc(LARGE_PACKET_MAX), _IOFBF,
+			LARGE_PACKET_MAX))
+		die_errno("failed to grow stdout buffer");
+
 	switch (determine_protocol_version_server()) {
 	case protocol_v2:
 		serve_opts.advertise_capabilities = opts.advertise_refs;
diff --git a/ls-refs.c b/ls-refs.c
index 88f6c3f60d..83f2948fc3 100644
--- a/ls-refs.c
+++ b/ls-refs.c
@@ -105,7 +105,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
 	}
 
 	strbuf_addch(&refline, '\n');
-	packet_write(1, refline.buf, refline.len);
+	packet_fwrite(stdout, refline.buf, refline.len);
 
 	strbuf_release(&refline);
 	return 0;
@@ -171,6 +171,9 @@ int ls_refs(struct repository *r, struct strvec *keys,
 		strvec_push(&data.prefixes, "");
 	for_each_fullref_in_prefixes(get_git_namespace(), data.prefixes.v,
 				     send_ref, &data, 0);
+	/* Call fflush because send_ref uses stdio. */
+	if (fflush(stdout))
+		die_errno(_("write failure on standard output"));
 	packet_flush(1);
 	strvec_clear(&data.prefixes);
 	return 0;
diff --git a/upload-pack.c b/upload-pack.c
index 297b76fcb4..b592ac6cfb 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -58,6 +58,7 @@ enum allow_uor {
  */
 struct upload_pack_data {
 	struct string_list symref;				/* v0 only */
+	struct strbuf send_ref_buf;				/* v0 only */
 	struct object_array want_obj;
 	struct object_array have_obj;
 	struct oid_array haves;					/* v2 only */
@@ -126,6 +127,7 @@ static void upload_pack_data_init(struct upload_pack_data *data)
 	struct string_list uri_protocols = STRING_LIST_INIT_DUP;
 	struct object_array extra_edge_obj = OBJECT_ARRAY_INIT;
 	struct string_list allowed_filters = STRING_LIST_INIT_DUP;
+	struct strbuf send_ref_buf = STRBUF_INIT;
 
 	memset(data, 0, sizeof(*data));
 	data->symref = symref;
@@ -141,6 +143,7 @@ static void upload_pack_data_init(struct upload_pack_data *data)
 	data->allow_filter_fallback = 1;
 	data->tree_filter_max_depth = ULONG_MAX;
 	packet_writer_init(&data->writer, 1);
+	data->send_ref_buf = send_ref_buf;
 
 	data->keepalive = 5;
 	data->advertise_sid = 0;
@@ -158,6 +161,7 @@ static void upload_pack_data_clear(struct upload_pack_data *data)
 	object_array_clear(&data->extra_edge_obj);
 	list_objects_filter_release(&data->filter_options);
 	string_list_clear(&data->allowed_filters, 0);
+	strbuf_release(&data->send_ref_buf);
 
 	free((char *)data->pack_objects_hook);
 }
@@ -1201,13 +1205,14 @@ static int send_ref(const char *refname, const struct object_id *oid,
 	if (mark_our_ref(refname_nons, refname, oid))
 		return 0;
 
+	strbuf_reset(&data->send_ref_buf);
 	if (capabilities) {
 		struct strbuf symref_info = STRBUF_INIT;
 		struct strbuf session_id = STRBUF_INIT;
 
 		format_symref_info(&symref_info, &data->symref);
 		format_session_id(&session_id, data);
-		packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n",
+		packet_buf_write(&data->send_ref_buf, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n",
 			     oid_to_hex(oid), refname_nons,
 			     0, capabilities,
 			     (data->allow_uor & ALLOW_TIP_SHA1) ?
@@ -1223,11 +1228,12 @@ static int send_ref(const char *refname, const struct object_id *oid,
 		strbuf_release(&symref_info);
 		strbuf_release(&session_id);
 	} else {
-		packet_write_fmt(1, "%s %s\n", oid_to_hex(oid), refname_nons);
+		packet_buf_write(&data->send_ref_buf, "%s %s\n", oid_to_hex(oid), refname_nons);
 	}
 	capabilities = NULL;
 	if (!peel_iterated_oid(oid, &peeled))
-		packet_write_fmt(1, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
+		packet_buf_write(&data->send_ref_buf, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
+	fwrite_or_die(stdout, data->send_ref_buf.buf, data->send_ref_buf.len);
 	return 0;
 }
 
@@ -1348,6 +1354,9 @@ void upload_pack(struct upload_pack_options *options)
 		reset_timeout(data.timeout);
 		head_ref_namespaced(send_ref, &data);
 		for_each_namespaced_ref(send_ref, &data);
+		/* Call fflush because send_ref uses stdio. */
+		if (fflush(stdout))
+			die_errno(_("write failure on standard output"));
 		advertise_shallow_grafts(1);
 		packet_flush(1);
 	} else {
-- 
2.32.0


  reply	other threads:[~2021-08-26 10:09 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-24 14:02 [PATCH 1/1] upload-pack: buffer ref advertisement writes Jacob Vosmaer
2021-08-24 21:07 ` Taylor Blau
2021-08-24 21:42   ` Junio C Hamano
2021-08-25  0:44     ` Jeff King
2021-08-26 10:02       ` Jacob Vosmaer
2021-08-26 10:06         ` [PATCH 1/2] pkt-line: add packet_fwrite Jacob Vosmaer
2021-08-26 10:06           ` Jacob Vosmaer [this message]
2021-08-26 16:33             ` [PATCH 2/2] upload-pack: use stdio in send_ref callbacks Junio C Hamano
2021-08-26 20:21               ` Junio C Hamano
2021-08-26 22:35               ` Taylor Blau
2021-08-26 23:24               ` Jeff King
2021-08-27 16:15                 ` Junio C Hamano
2021-08-31  9:34                   ` [PATCH v3 0/2] send_ref buffering Jacob Vosmaer
2021-08-31  9:34                     ` [PATCH v3 1/2] pkt-line: add stdio packet write functions Jacob Vosmaer
2021-08-31 10:37                       ` Jeff King
2021-08-31 18:13                       ` Junio C Hamano
2021-09-01 12:54                         ` [PATCH v4 0/2] send_ref buffering Jacob Vosmaer
2021-09-01 12:54                           ` [PATCH v4 1/2] pkt-line: add stdio packet write functions Jacob Vosmaer
2021-09-01 12:54                           ` [PATCH v4 2/2] upload-pack: use stdio in send_ref callbacks Jacob Vosmaer
2021-09-02  9:18                           ` [PATCH v4 0/2] send_ref buffering Jeff King
2021-08-31  9:34                     ` [PATCH v3 2/2] upload-pack: use stdio in send_ref callbacks Jacob Vosmaer
2021-08-31 10:25                     ` [PATCH v3 0/2] send_ref buffering Jeff King
2021-08-31 13:08                       ` Jacob Vosmaer
2021-08-31 17:44                         ` Jacob Vosmaer
2021-09-01  0:15                         ` Jeff King
2021-08-26 23:32             ` [PATCH 2/2] upload-pack: use stdio in send_ref callbacks Jeff King
2021-08-26 16:33           ` [PATCH 1/2] pkt-line: add packet_fwrite Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210826100648.10333-2-jacob@gitlab.com \
    --to=jacob@gitlab.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=me@ttaylorr.com \
    --cc=peff@peff.net \
    --cc=ps@pks.im \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).