Git Mailing List Archive on lore.kernel.org
 help / color / Atom feed
From: "Abhishek Kumar via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: "Derrick Stolee" <stolee@gmail.com>,
	"Jakub Narębski" <jnareb@gmail.com>,
	"Taylor Blau" <me@ttaylor.com>,
	"Abhishek Kumar" <abhishekkumar8222@gmail.com>,
	"Abhishek Kumar" <abhishekkumar8222@gmail.com>
Subject: [PATCH v2 07/10] commit-graph: implement corrected commit date
Date: Sun, 09 Aug 2020 02:53:41 +0000
Message-ID: <bfe14732014807ff19f943cdf51068f0d3043c30.1596941625.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.676.v2.git.1596941624.gitgitgadget@gmail.com>

From: Abhishek Kumar <abhishekkumar8222@gmail.com>

With most of preparations done, let's implement corrected commit date
offset. We add a new commit-slab to store topogical levels while
writing commit graph and upgrade the generation member in struct
commit_graph_data to a 64-bit timestamp. We store topological levels to
ensure that older versions of Git will still have the performance
benefits from generation number v2.

Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
---
 commit-graph.c | 89 ++++++++++++++++++++++++++++----------------------
 commit.h       |  1 +
 2 files changed, 51 insertions(+), 39 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 42f3ec5460..d0f977852b 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -65,6 +65,8 @@ void git_test_write_commit_graph_or_die(void)
 /* Remember to update object flag allocation in object.h */
 #define REACHABLE       (1u<<15)
 
+define_commit_slab(topo_level_slab, uint32_t);
+
 /* Keep track of the order in which commits are added to our list. */
 define_commit_slab(commit_pos, int);
 static struct commit_pos commit_pos = COMMIT_SLAB_INIT(1, commit_pos);
@@ -168,11 +170,6 @@ static int commit_gen_cmp(const void *va, const void *vb)
 	else if (generation_a > generation_b)
 		return 1;
 
-	/* use date as a heuristic when generations are equal */
-	if (a->date < b->date)
-		return -1;
-	else if (a->date > b->date)
-		return 1;
 	return 0;
 }
 
@@ -767,7 +764,10 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g,
 	item->date = (timestamp_t)((date_high << 32) | date_low);
 
 	if (g->chunk_generation_data)
-		graph_data->generation = get_be32(g->chunk_generation_data + sizeof(uint32_t) * lex_index);
+	{
+		graph_data->generation = item->date +
+			(timestamp_t) get_be32(g->chunk_generation_data + sizeof(uint32_t) * lex_index);
+	}
 	else
 		graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
 }
@@ -948,6 +948,7 @@ struct write_commit_graph_context {
 	struct progress *progress;
 	int progress_done;
 	uint64_t progress_cnt;
+	struct topo_level_slab *topo_levels;
 
 	char *base_graph_name;
 	int num_commit_graphs_before;
@@ -1106,7 +1107,7 @@ static int write_graph_chunk_data(struct hashfile *f,
 		else
 			packedDate[0] = 0;
 
-		packedDate[0] |= htonl(commit_graph_data_at(*list)->generation << 2);
+		packedDate[0] |= htonl(*topo_level_slab_at(ctx->topo_levels, *list) << 2);
 
 		packedDate[1] = htonl((*list)->date);
 		hashwrite(f, packedDate, 8);
@@ -1123,8 +1124,13 @@ static int write_graph_chunk_generation_data(struct hashfile *f,
 	int i;
 	for (i = 0; i < ctx->commits.nr; i++) {
 		struct commit *c = ctx->commits.list[i];
+		timestamp_t offset = commit_graph_data_at(c)->generation - c->date;
 		display_progress(ctx->progress, ++ctx->progress_cnt);
-		hashwrite_be32(f, commit_graph_data_at(c)->generation);
+
+		if (offset > GENERATION_NUMBER_V2_OFFSET_MAX)
+			offset = GENERATION_NUMBER_V2_OFFSET_MAX;
+
+		hashwrite_be32(f, offset);
 	}
 
 	return 0;
@@ -1360,11 +1366,11 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
 					_("Computing commit graph generation numbers"),
 					ctx->commits.nr);
 	for (i = 0; i < ctx->commits.nr; i++) {
-		uint32_t generation = commit_graph_data_at(ctx->commits.list[i])->generation;
+		uint32_t topo_level = *topo_level_slab_at(ctx->topo_levels, ctx->commits.list[i]);
 
 		display_progress(ctx->progress, i + 1);
-		if (generation != GENERATION_NUMBER_V1_INFINITY &&
-		    generation != GENERATION_NUMBER_ZERO)
+		if (topo_level != GENERATION_NUMBER_V1_INFINITY &&
+		    topo_level != GENERATION_NUMBER_ZERO)
 			continue;
 
 		commit_list_insert(ctx->commits.list[i], &list);
@@ -1372,29 +1378,38 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
 			struct commit *current = list->item;
 			struct commit_list *parent;
 			int all_parents_computed = 1;
-			uint32_t max_generation = 0;
+			uint32_t max_level = 0;
+			timestamp_t max_corrected_commit_date = current->date - 1;
 
 			for (parent = current->parents; parent; parent = parent->next) {
-				generation = commit_graph_data_at(parent->item)->generation;
+				topo_level = *topo_level_slab_at(ctx->topo_levels, parent->item);
 
-				if (generation == GENERATION_NUMBER_V1_INFINITY ||
-				    generation == GENERATION_NUMBER_ZERO) {
+				if (topo_level == GENERATION_NUMBER_V1_INFINITY ||
+				    topo_level == GENERATION_NUMBER_ZERO) {
 					all_parents_computed = 0;
 					commit_list_insert(parent->item, &list);
 					break;
-				} else if (generation > max_generation) {
-					max_generation = generation;
+				} else {
+					struct commit_graph_data *data = commit_graph_data_at(parent->item);
+
+					if (topo_level > max_level)
+						max_level = topo_level;
+
+					if (data->generation > max_corrected_commit_date)
+						max_corrected_commit_date = data->generation;
 				}
 			}
 
 			if (all_parents_computed) {
 				struct commit_graph_data *data = commit_graph_data_at(current);
 
-				data->generation = max_generation + 1;
-				pop_commit(&list);
+				if (max_level > GENERATION_NUMBER_MAX - 1)
+					max_level = GENERATION_NUMBER_MAX - 1;
+
+				*topo_level_slab_at(ctx->topo_levels, current) = max_level + 1;
+				data->generation = max_corrected_commit_date + 1;
 
-				if (data->generation > GENERATION_NUMBER_MAX)
-					data->generation = GENERATION_NUMBER_MAX;
+				pop_commit(&list);
 			}
 		}
 	}
@@ -2132,6 +2147,7 @@ int write_commit_graph(struct object_directory *odb,
 	uint32_t i, count_distinct = 0;
 	int res = 0;
 	int replace = 0;
+	struct topo_level_slab topo_levels;
 
 	if (!commit_graph_compatible(the_repository))
 		return 0;
@@ -2146,6 +2162,9 @@ int write_commit_graph(struct object_directory *odb,
 	ctx->total_bloom_filter_data_size = 0;
 	ctx->write_generation_data = !git_env_bool(GIT_TEST_COMMIT_GRAPH_NO_GDAT, 0);
 
+	init_topo_level_slab(&topo_levels);
+	ctx->topo_levels = &topo_levels;
+
 	if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
 		ctx->changed_paths = 1;
 	if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
@@ -2387,8 +2406,8 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
 	for (i = 0; i < g->num_commits; i++) {
 		struct commit *graph_commit, *odb_commit;
 		struct commit_list *graph_parents, *odb_parents;
-		timestamp_t max_generation = 0;
-		timestamp_t generation;
+		timestamp_t max_parent_corrected_commit_date = 0;
+		timestamp_t corrected_commit_date;
 
 		display_progress(progress, i + 1);
 		hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
@@ -2427,9 +2446,9 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
 					     oid_to_hex(&graph_parents->item->object.oid),
 					     oid_to_hex(&odb_parents->item->object.oid));
 
-			generation = commit_graph_generation(graph_parents->item);
-			if (generation > max_generation)
-				max_generation = generation;
+			corrected_commit_date = commit_graph_generation(graph_parents->item);
+			if (corrected_commit_date > max_parent_corrected_commit_date)
+				max_parent_corrected_commit_date = corrected_commit_date;
 
 			graph_parents = graph_parents->next;
 			odb_parents = odb_parents->next;
@@ -2451,20 +2470,12 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
 		if (generation_zero == GENERATION_ZERO_EXISTS)
 			continue;
 
-		/*
-		 * If one of our parents has generation GENERATION_NUMBER_MAX, then
-		 * our generation is also GENERATION_NUMBER_MAX. Decrement to avoid
-		 * extra logic in the following condition.
-		 */
-		if (max_generation == GENERATION_NUMBER_MAX)
-			max_generation--;
-
-		generation = commit_graph_generation(graph_commit);
-		if (generation != max_generation + 1)
-			graph_report(_("commit-graph generation for commit %s is %u != %u"),
+		corrected_commit_date = commit_graph_generation(graph_commit);
+		if (corrected_commit_date < max_parent_corrected_commit_date + 1)
+			graph_report(_("commit-graph generation for commit %s is %"PRItime" < %"PRItime),
 				     oid_to_hex(&cur_oid),
-				     generation,
-				     max_generation + 1);
+				     corrected_commit_date,
+				     max_parent_corrected_commit_date + 1);
 
 		if (graph_commit->date != odb_commit->date)
 			graph_report(_("commit date for commit %s in commit-graph is %"PRItime" != %"PRItime),
diff --git a/commit.h b/commit.h
index bc0732a4fe..bb846e0025 100644
--- a/commit.h
+++ b/commit.h
@@ -15,6 +15,7 @@
 #define GENERATION_NUMBER_V1_INFINITY 0xFFFFFFFF
 #define GENERATION_NUMBER_MAX 0x3FFFFFFF
 #define GENERATION_NUMBER_ZERO 0
+#define GENERATION_NUMBER_V2_OFFSET_MAX 0xFFFFFFFF
 
 struct commit_list {
 	struct commit *item;
-- 
gitgitgadget


  parent reply index

Thread overview: 107+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-28  9:13 [PATCH 0/6] [GSoC] Implement Corrected Commit Date Abhishek Kumar via GitGitGadget
2020-07-28  9:13 ` [PATCH 1/6] commit-graph: fix regression when computing bloom filter Abhishek Kumar via GitGitGadget
2020-07-28 15:28   ` Taylor Blau
2020-07-30  5:24     ` Abhishek Kumar
2020-08-04  0:46   ` Jakub Narębski
2020-08-04  0:56     ` Taylor Blau
2020-08-04 10:10       ` Jakub Narębski
2020-08-04  7:55     ` Jakub Narębski
2020-07-28  9:13 ` [PATCH 2/6] revision: parse parent in indegree_walk_step() Abhishek Kumar via GitGitGadget
2020-07-28 13:00   ` Derrick Stolee
2020-07-28 15:30     ` Taylor Blau
2020-08-05 23:16   ` Jakub Narębski
2020-07-28  9:13 ` [PATCH 3/6] commit-graph: consolidate fill_commit_graph_info Abhishek Kumar via GitGitGadget
2020-07-28 13:14   ` Derrick Stolee
2020-07-28 15:19     ` René Scharfe
2020-07-28 15:58       ` Derrick Stolee
2020-07-28 16:01     ` Taylor Blau
2020-07-30  6:07     ` Abhishek Kumar
2020-07-28  9:13 ` [PATCH 4/6] commit-graph: consolidate compare_commits_by_gen Abhishek Kumar via GitGitGadget
2020-07-28 16:03   ` Taylor Blau
2020-07-28  9:13 ` [PATCH 5/6] commit-graph: implement generation data chunk Abhishek Kumar via GitGitGadget
2020-07-28 16:12   ` Taylor Blau
2020-07-30  6:52     ` Abhishek Kumar
2020-07-28  9:13 ` [PATCH 6/6] commit-graph: implement corrected commit date offset Abhishek Kumar via GitGitGadget
2020-07-28 15:55   ` Derrick Stolee
2020-07-28 16:23     ` Taylor Blau
2020-07-30  7:27     ` Abhishek Kumar
2020-07-28 14:54 ` [PATCH 0/6] [GSoC] Implement Corrected Commit Date Taylor Blau
2020-07-30  7:47   ` Abhishek Kumar
2020-07-28 16:35 ` Derrick Stolee
2020-08-09  2:53 ` [PATCH v2 00/10] " Abhishek Kumar via GitGitGadget
2020-08-09  2:53   ` [PATCH v2 01/10] commit-graph: fix regression when computing bloom filter Abhishek Kumar via GitGitGadget
2020-08-09  2:53   ` [PATCH v2 02/10] revision: parse parent in indegree_walk_step() Abhishek Kumar via GitGitGadget
2020-08-09  2:53   ` [PATCH v2 03/10] commit-graph: consolidate fill_commit_graph_info Abhishek Kumar via GitGitGadget
2020-08-09  2:53   ` [PATCH v2 04/10] commit-graph: consolidate compare_commits_by_gen Abhishek Kumar via GitGitGadget
2020-08-09  2:53   ` [PATCH v2 05/10] commit-graph: implement generation data chunk Abhishek Kumar via GitGitGadget
2020-08-10 16:28     ` Derrick Stolee
2020-08-11 11:03       ` Abhishek Kumar
2020-08-11 12:27         ` Derrick Stolee
2020-08-11 18:58           ` Taylor Blau
2020-08-09  2:53   ` [PATCH v2 06/10] commit-graph: return 64-bit generation number Abhishek Kumar via GitGitGadget
2020-08-09  2:53   ` Abhishek Kumar via GitGitGadget [this message]
2020-08-10 14:23     ` [PATCH v2 07/10] commit-graph: implement corrected commit date Derrick Stolee
2020-08-14  4:59       ` Abhishek Kumar
2020-08-14 12:24         ` Derrick Stolee
2020-08-09  2:53   ` [PATCH v2 08/10] commit-graph: handle mixed generation commit chains Abhishek Kumar via GitGitGadget
2020-08-10 16:42     ` Derrick Stolee
2020-08-11 11:36       ` Abhishek Kumar
2020-08-11 12:43         ` Derrick Stolee
2020-08-09  2:53   ` [PATCH v2 09/10] commit-reach: use corrected commit dates in paint_down_to_common() Abhishek Kumar via GitGitGadget
2020-08-09  2:53   ` [PATCH v2 10/10] doc: add corrected commit date info Abhishek Kumar via GitGitGadget
2020-08-10 16:47   ` [PATCH v2 00/10] [GSoC] Implement Corrected Commit Date Derrick Stolee
2020-08-15 16:39   ` [PATCH v3 00/11] " Abhishek Kumar via GitGitGadget
2020-08-15 16:39     ` [PATCH v3 01/11] commit-graph: fix regression when computing bloom filter Abhishek Kumar via GitGitGadget
2020-08-17 22:30       ` Jakub Narębski
2020-08-15 16:39     ` [PATCH v3 02/11] revision: parse parent in indegree_walk_step() Abhishek Kumar via GitGitGadget
2020-08-18 14:18       ` Jakub Narębski
2020-08-15 16:39     ` [PATCH v3 03/11] commit-graph: consolidate fill_commit_graph_info Abhishek Kumar via GitGitGadget
2020-08-19 17:54       ` Jakub Narębski
2020-08-21  4:11         ` Abhishek Kumar
2020-08-25 11:11           ` Jakub Narębski
2020-09-01 11:35             ` Abhishek Kumar
2020-08-15 16:39     ` [PATCH v3 04/11] commit-graph: consolidate compare_commits_by_gen Abhishek Kumar via GitGitGadget
2020-08-17 13:22       ` Derrick Stolee
2020-08-21 11:05       ` Jakub Narębski
2020-08-15 16:39     ` [PATCH v3 05/11] commit-graph: return 64-bit generation number Abhishek Kumar via GitGitGadget
2020-08-21 13:14       ` Jakub Narębski
2020-08-25  5:04         ` Abhishek Kumar
2020-08-25 12:18           ` Jakub Narębski
2020-09-01 12:06             ` Abhishek Kumar
2020-09-03 13:42               ` Jakub Narębski
2020-09-05 17:21                 ` Abhishek Kumar
2020-09-13 15:39                   ` Jakub Narębski
2020-08-15 16:39     ` [PATCH v3 06/11] commit-graph: add a slab to store topological levels Abhishek Kumar via GitGitGadget
2020-08-21 18:43       ` Jakub Narębski
2020-08-25  6:14         ` Abhishek Kumar
2020-08-25  7:33           ` Jakub Narębski
2020-08-25  7:56             ` Jakub Narębski
2020-09-01 10:26               ` Abhishek Kumar
2020-09-03  9:25                 ` Jakub Narębski
2020-08-15 16:39     ` [PATCH v3 07/11] commit-graph: implement corrected commit date Abhishek Kumar via GitGitGadget
2020-08-22  0:05       ` Jakub Narębski
2020-08-25  6:49         ` Abhishek Kumar
2020-08-25 10:07           ` Jakub Narębski
2020-09-01 11:01             ` Abhishek Kumar
2020-08-15 16:39     ` [PATCH v3 08/11] commit-graph: implement generation data chunk Abhishek Kumar via GitGitGadget
2020-08-22 13:09       ` Jakub Narębski
2020-08-15 16:39     ` [PATCH v3 09/11] commit-graph: use generation v2 only if entire chain does Abhishek Kumar via GitGitGadget
2020-08-22 17:14       ` Jakub Narębski
2020-08-26  7:15         ` Abhishek Kumar
2020-08-26 10:38           ` Jakub Narębski
2020-08-15 16:39     ` [PATCH v3 10/11] commit-reach: use corrected commit dates in paint_down_to_common() Abhishek Kumar via GitGitGadget
2020-08-22 19:09       ` Jakub Narębski
2020-09-01 10:08         ` Abhishek Kumar
2020-09-03 19:11           ` Jakub Narębski
2020-08-15 16:39     ` [PATCH v3 11/11] doc: add corrected commit date info Abhishek Kumar via GitGitGadget
2020-08-22 22:20       ` Jakub Narębski
2020-08-27  6:39         ` Abhishek Kumar
2020-08-27 12:43           ` Jakub Narębski
2020-08-27 13:15           ` Derrick Stolee
2020-09-01 13:01             ` Abhishek Kumar
2020-08-17  0:13     ` [PATCH v3 00/11] [GSoC] Implement Corrected Commit Date Jakub Narębski
     [not found]       ` <CANQwDwdKp7oKy9BeKdvKhwPUiq0R5MS8TCw-eWGCYCoMGv=G-g@mail.gmail.com>
2020-08-17  1:32         ` Fwd: " Taylor Blau
2020-08-17  7:56           ` Jakub Narębski
2020-08-18  6:12       ` Abhishek Kumar
2020-08-23 15:27       ` Jakub Narębski
2020-08-24  2:49         ` Abhishek Kumar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bfe14732014807ff19f943cdf51068f0d3043c30.1596941625.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=abhishekkumar8222@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=jnareb@gmail.com \
    --cc=me@ttaylor.com \
    --cc=stolee@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Mailing List Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/git/0 git/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 git git/ https://lore.kernel.org/git \
		git@vger.kernel.org
	public-inbox-index git

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.git


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git