All of lore.kernel.org
 help / color / mirror / Atom feed
From: Patrick Steinhardt <ps@pks.im>
To: git@vger.kernel.org
Subject: [PATCH 4/9] refs/reftable: don't recompute committer ident
Date: Tue, 2 Apr 2024 19:30:04 +0200	[thread overview]
Message-ID: <a9a6795c025b23035bfdd3e23b0113df9f6c5e4b.1712078736.git.ps@pks.im> (raw)
In-Reply-To: <cover.1712078736.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 6630 bytes --]

In order to write reflog entries we need to compute the committer's
identity as it becomes encoded in the log record itself. In the reftable
backend, computing the identity is repeated for every single reflog
entry which we are about to write in a transaction. Needless to say,
this can be quite a waste of effort when writing many refs with reflog
entries in a single transaction.

Refactor the code to pre-compute the committer information. This results
in a small speedup when writing 100000 refs in a single transaction:

  Benchmark 1: update-ref: create many refs (HEAD~)
    Time (mean ± σ):      2.895 s ±  0.020 s    [User: 1.516 s, System: 1.374 s]
    Range (min … max):    2.868 s …  2.983 s    100 runs

  Benchmark 2: update-ref: create many refs (HEAD)
    Time (mean ± σ):      2.845 s ±  0.017 s    [User: 1.461 s, System: 1.379 s]
    Range (min … max):    2.803 s …  2.913 s    100 runs

  Summary
    update-ref: create many refs (HEAD) ran
      1.02 ± 0.01 times faster than update-ref: create many refs (HEAD~)

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 52 +++++++++++++++++++++++++++--------------
 1 file changed, 34 insertions(+), 18 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 7515dd3019..9e8967e82f 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -171,32 +171,30 @@ static int should_write_log(struct ref_store *refs, const char *refname)
 	}
 }
 
-static void fill_reftable_log_record(struct reftable_log_record *log)
+static void fill_reftable_log_record(struct reftable_log_record *log, const struct ident_split *split)
 {
-	const char *info = git_committer_info(0);
-	struct ident_split split = {0};
+	const char *tz_begin;
 	int sign = 1;
 
-	if (split_ident_line(&split, info, strlen(info)))
-		BUG("failed splitting committer info");
-
 	reftable_log_record_release(log);
 	log->value_type = REFTABLE_LOG_UPDATE;
 	log->value.update.name =
-		xstrndup(split.name_begin, split.name_end - split.name_begin);
+		xstrndup(split->name_begin, split->name_end - split->name_begin);
 	log->value.update.email =
-		xstrndup(split.mail_begin, split.mail_end - split.mail_begin);
-	log->value.update.time = atol(split.date_begin);
-	if (*split.tz_begin == '-') {
+		xstrndup(split->mail_begin, split->mail_end - split->mail_begin);
+	log->value.update.time = atol(split->date_begin);
+
+	tz_begin = split->tz_begin;
+	if (*tz_begin == '-') {
 		sign = -1;
-		split.tz_begin++;
+		tz_begin++;
 	}
-	if (*split.tz_begin == '+') {
+	if (*tz_begin == '+') {
 		sign = 1;
-		split.tz_begin++;
+		tz_begin++;
 	}
 
-	log->value.update.tz_offset = sign * atoi(split.tz_begin);
+	log->value.update.tz_offset = sign * atoi(tz_begin);
 }
 
 static int read_ref_without_reload(struct reftable_stack *stack,
@@ -1023,9 +1021,15 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data
 		reftable_stack_merged_table(arg->stack);
 	uint64_t ts = reftable_stack_next_update_index(arg->stack);
 	struct reftable_log_record *logs = NULL;
+	struct ident_split committer_ident = {0};
 	size_t logs_nr = 0, logs_alloc = 0, i;
+	const char *committer_info;
 	int ret = 0;
 
+	committer_info = git_committer_info(0);
+	if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
+		BUG("failed splitting committer info");
+
 	QSORT(arg->updates, arg->updates_nr, transaction_update_cmp);
 
 	reftable_writer_set_limits(writer, ts, ts);
@@ -1091,7 +1095,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data
 			log = &logs[logs_nr++];
 			memset(log, 0, sizeof(*log));
 
-			fill_reftable_log_record(log);
+			fill_reftable_log_record(log, &committer_ident);
 			log->update_index = ts;
 			log->refname = xstrdup(u->refname);
 			memcpy(log->value.update.new_hash, u->new_oid.hash, GIT_MAX_RAWSZ);
@@ -1238,9 +1242,11 @@ static int write_create_symref_table(struct reftable_writer *writer, void *cb_da
 		.value.symref = (char *)create->target,
 		.update_index = ts,
 	};
+	struct ident_split committer_ident = {0};
 	struct reftable_log_record log = {0};
 	struct object_id new_oid;
 	struct object_id old_oid;
+	const char *committer_info;
 	int ret;
 
 	reftable_writer_set_limits(writer, ts, ts);
@@ -1268,7 +1274,11 @@ static int write_create_symref_table(struct reftable_writer *writer, void *cb_da
 	    !should_write_log(&create->refs->base, create->refname))
 		return 0;
 
-	fill_reftable_log_record(&log);
+	committer_info = git_committer_info(0);
+	if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
+		BUG("failed splitting committer info");
+
+	fill_reftable_log_record(&log, &committer_ident);
 	log.refname = xstrdup(create->refname);
 	log.update_index = ts;
 	log.value.update.message = xstrndup(create->logmsg,
@@ -1344,10 +1354,16 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	struct reftable_log_record old_log = {0}, *logs = NULL;
 	struct reftable_iterator it = {0};
 	struct string_list skip = STRING_LIST_INIT_NODUP;
+	struct ident_split committer_ident = {0};
 	struct strbuf errbuf = STRBUF_INIT;
 	size_t logs_nr = 0, logs_alloc = 0, i;
+	const char *committer_info;
 	int ret;
 
+	committer_info = git_committer_info(0);
+	if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
+		BUG("failed splitting committer info");
+
 	if (reftable_stack_read_ref(arg->stack, arg->oldname, &old_ref)) {
 		ret = error(_("refname %s not found"), arg->oldname);
 		goto done;
@@ -1422,7 +1438,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 
 		ALLOC_GROW(logs, logs_nr + 1, logs_alloc);
 		memset(&logs[logs_nr], 0, sizeof(logs[logs_nr]));
-		fill_reftable_log_record(&logs[logs_nr]);
+		fill_reftable_log_record(&logs[logs_nr], &committer_ident);
 		logs[logs_nr].refname = (char *)arg->newname;
 		logs[logs_nr].update_index = deletion_ts;
 		logs[logs_nr].value.update.message =
@@ -1454,7 +1470,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	 */
 	ALLOC_GROW(logs, logs_nr + 1, logs_alloc);
 	memset(&logs[logs_nr], 0, sizeof(logs[logs_nr]));
-	fill_reftable_log_record(&logs[logs_nr]);
+	fill_reftable_log_record(&logs[logs_nr], &committer_ident);
 	logs[logs_nr].refname = (char *)arg->newname;
 	logs[logs_nr].update_index = creation_ts;
 	logs[logs_nr].value.update.message =
-- 
2.44.GIT


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  parent reply	other threads:[~2024-04-02 17:30 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-02 17:29 [PATCH 0/9] reftable: optimize write performance Patrick Steinhardt
2024-04-02 17:29 ` [PATCH 1/9] refs/reftable: fix D/F conflict error message on ref copy Patrick Steinhardt
2024-04-03 18:28   ` Junio C Hamano
2024-04-02 17:29 ` [PATCH 2/9] refs/reftable: perform explicit D/F check when writing symrefs Patrick Steinhardt
2024-04-02 17:30 ` [PATCH 3/9] refs/reftable: skip duplicate name checks Patrick Steinhardt
2024-04-02 17:30 ` Patrick Steinhardt [this message]
2024-04-03 18:58   ` [PATCH 4/9] refs/reftable: don't recompute committer ident Junio C Hamano
2024-04-04  5:36     ` Patrick Steinhardt
2024-04-02 17:30 ` [PATCH 5/9] reftable/writer: refactorings for `writer_add_record()` Patrick Steinhardt
2024-04-02 17:30 ` [PATCH 6/9] reftable/writer: refactorings for `writer_flush_nonempty_block()` Patrick Steinhardt
2024-04-02 17:30 ` [PATCH 7/9] reftable/block: reuse zstream when writing log blocks Patrick Steinhardt
2024-04-03 19:35   ` Junio C Hamano
2024-04-04  5:36     ` Patrick Steinhardt
2024-04-02 17:30 ` [PATCH 8/9] reftable/block: reuse compressed array Patrick Steinhardt
2024-04-02 17:30 ` [PATCH 9/9] reftable/writer: reset `last_key` instead of releasing it Patrick Steinhardt
2024-04-04  5:48 ` [PATCH v2 00/11] reftable: optimize write performance Patrick Steinhardt
2024-04-04  5:48   ` [PATCH v2 01/11] refs/reftable: fix D/F conflict error message on ref copy Patrick Steinhardt
2024-04-04  5:48   ` [PATCH v2 02/11] refs/reftable: perform explicit D/F check when writing symrefs Patrick Steinhardt
2024-04-04  5:48   ` [PATCH v2 03/11] refs/reftable: skip duplicate name checks Patrick Steinhardt
2024-04-04  5:48   ` [PATCH v2 04/11] reftable: remove " Patrick Steinhardt
2024-04-04  5:48   ` [PATCH v2 05/11] refs/reftable: don't recompute committer ident Patrick Steinhardt
2024-04-04  5:48   ` [PATCH v2 06/11] reftable/writer: refactorings for `writer_add_record()` Patrick Steinhardt
2024-04-04  6:58     ` Han-Wen Nienhuys
2024-04-04  7:32       ` Patrick Steinhardt
2024-04-04  5:48   ` [PATCH v2 07/11] reftable/writer: refactorings for `writer_flush_nonempty_block()` Patrick Steinhardt
2024-04-04  5:48   ` [PATCH v2 08/11] reftable/writer: unify releasing memory Patrick Steinhardt
2024-04-04  7:08     ` Han-Wen Nienhuys
2024-04-04  7:32       ` Patrick Steinhardt
2024-04-04  9:00         ` Han-Wen Nienhuys
2024-04-04 11:43           ` Patrick Steinhardt
2024-04-04  5:48   ` [PATCH v2 09/11] reftable/writer: reset `last_key` instead of releasing it Patrick Steinhardt
2024-04-04  5:48   ` [PATCH v2 10/11] reftable/block: reuse zstream when writing log blocks Patrick Steinhardt
2024-04-04  5:48   ` [PATCH v2 11/11] reftable/block: reuse compressed array Patrick Steinhardt
2024-04-04  7:09   ` [PATCH v2 00/11] reftable: optimize write performance Han-Wen Nienhuys
2024-04-04  7:32     ` Patrick Steinhardt
2024-04-08 12:23 ` [PATCH v3 " Patrick Steinhardt
2024-04-08 12:23   ` [PATCH v3 01/11] refs/reftable: fix D/F conflict error message on ref copy Patrick Steinhardt
2024-04-08 12:23   ` [PATCH v3 02/11] refs/reftable: perform explicit D/F check when writing symrefs Patrick Steinhardt
2024-04-08 12:24   ` [PATCH v3 03/11] refs/reftable: skip duplicate name checks Patrick Steinhardt
2024-04-08 12:24   ` [PATCH v3 04/11] reftable: remove " Patrick Steinhardt
2024-04-08 12:24   ` [PATCH v3 05/11] refs/reftable: don't recompute committer ident Patrick Steinhardt
2024-04-08 12:24   ` [PATCH v3 06/11] reftable/writer: refactorings for `writer_add_record()` Patrick Steinhardt
2024-04-08 12:24   ` [PATCH v3 07/11] reftable/writer: refactorings for `writer_flush_nonempty_block()` Patrick Steinhardt
2024-04-08 12:24   ` [PATCH v3 08/11] reftable/writer: unify releasing memory Patrick Steinhardt
2024-04-08 12:24   ` [PATCH v3 09/11] reftable/writer: reset `last_key` instead of releasing it Patrick Steinhardt
2024-04-08 12:24   ` [PATCH v3 10/11] reftable/block: reuse zstream when writing log blocks Patrick Steinhardt
2024-04-08 12:24   ` [PATCH v3 11/11] reftable/block: reuse compressed array Patrick Steinhardt
2024-04-09  0:09   ` [PATCH v3 00/11] reftable: optimize write performance Junio C Hamano
2024-04-09  3:16     ` Patrick Steinhardt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a9a6795c025b23035bfdd3e23b0113df9f6c5e4b.1712078736.git.ps@pks.im \
    --to=ps@pks.im \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.