All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jonathan Tan <jonathantanmy@google.com>
To: git@vger.kernel.org
Cc: Jonathan Tan <jonathantanmy@google.com>,
	peff@peff.net, steadmon@google.com
Subject: [PATCH 5/7] index-pack: calculate {ref,ofs}_{first,last} early
Date: Mon, 24 Aug 2020 12:16:36 -0700	[thread overview]
Message-ID: <91459ab4119a33d40d39a565c96282db7865c946.1598296530.git.jonathantanmy@google.com> (raw)
In-Reply-To: <cover.1598296530.git.jonathantanmy@google.com>

This is refactoring 2 of 2 to simplify struct base_data.

Whenever we make a struct base_data, immediately calculate its delta
children. This eliminates confusion as to when the
{ref,ofs}_{first,last} fields are initialized.

Before this patch, the delta children were calculated at the last
possible moment. This allowed the members of struct base_data to be
populated in any order, superficially useful when we have the object
contents before the struct object_entry. But this makes reasoning about
the state of struct base_data more complicated, hence this patch.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 builtin/index-pack.c | 123 +++++++++++++++++++++----------------------
 1 file changed, 60 insertions(+), 63 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 032716553c..e98b11ab37 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -33,12 +33,15 @@ struct object_stat {
 };
 
 struct base_data {
+	/* Initialized by make_base(). */
 	struct base_data *base;
 	struct object_entry *obj;
-	void *data;
-	unsigned long size;
 	int ref_first, ref_last;
 	int ofs_first, ofs_last;
+
+	/* Not initialized by make_base(). */
+	void *data;
+	unsigned long size;
 };
 
 struct thread_local {
@@ -362,14 +365,6 @@ static void set_thread_data(struct thread_local *data)
 		pthread_setspecific(key, data);
 }
 
-static struct base_data *alloc_base_data(void)
-{
-	struct base_data *base = xcalloc(1, sizeof(struct base_data));
-	base->ref_last = -1;
-	base->ofs_last = -1;
-	return base;
-}
-
 static void free_base_data(struct base_data *c)
 {
 	if (c->data) {
@@ -406,19 +401,6 @@ static void prune_base_data(struct base_data *youngest_child)
 	free(ancestry);
 }
 
-static void link_base_data(struct base_data *base, struct base_data *c)
-{
-	c->base = base;
-	if (c->data)
-		get_thread_data()->base_cache_used += c->size;
-	prune_base_data(c);
-}
-
-static void unlink_base_data(struct base_data *c)
-{
-	free_base_data(c);
-}
-
 static int is_delta_type(enum object_type type)
 {
 	return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA);
@@ -929,10 +911,25 @@ static void *get_base_data(struct base_data *c)
 	return c->data;
 }
 
-static void resolve_delta(struct object_entry *delta_obj,
-			  struct base_data *base, struct base_data *result)
+static struct base_data *make_base(struct object_entry *obj,
+				   struct base_data *parent)
 {
-	void *base_data, *delta_data;
+	struct base_data *base = xcalloc(1, sizeof(struct base_data));
+	base->base = parent;
+	base->obj = obj;
+	find_ref_delta_children(&obj->idx.oid,
+				&base->ref_first, &base->ref_last);
+	find_ofs_delta_children(obj->idx.offset,
+				&base->ofs_first, &base->ofs_last);
+	return base;
+}
+
+static struct base_data *resolve_delta(struct object_entry *delta_obj,
+				       struct base_data *base)
+{
+	void *base_data, *delta_data, *result_data;
+	struct base_data *result;
+	unsigned long result_size;
 
 	if (show_stat) {
 		int i = delta_obj - objects;
@@ -946,19 +943,31 @@ static void resolve_delta(struct object_entry *delta_obj,
 	}
 	delta_data = get_data_from_pack(delta_obj);
 	base_data = get_base_data(base);
-	result->obj = delta_obj;
-	result->data = patch_delta(base_data, base->size,
-				   delta_data, delta_obj->size, &result->size);
+	result_data = patch_delta(base_data, base->size,
+				  delta_data, delta_obj->size, &result_size);
 	free(delta_data);
-	if (!result->data)
+	if (!result_data)
 		bad_object(delta_obj->idx.offset, _("failed to apply delta"));
-	hash_object_file(the_hash_algo, result->data, result->size,
+	hash_object_file(the_hash_algo, result_data, result_size,
 			 type_name(delta_obj->real_type), &delta_obj->idx.oid);
-	sha1_object(result->data, NULL, result->size, delta_obj->real_type,
+	sha1_object(result_data, NULL, result_size, delta_obj->real_type,
 		    &delta_obj->idx.oid);
+
+	result = make_base(delta_obj, base);
+	if (result->ref_last == -1 && result->ofs_last == -1) {
+		free(result_data);
+	} else {
+		result->data = result_data;
+		result->size = result_size;
+		get_thread_data()->base_cache_used += result->size;
+		prune_base_data(result);
+	}
+
 	counter_lock();
 	nr_resolved_deltas++;
 	counter_unlock();
+
+	return result;
 }
 
 /*
@@ -984,24 +993,9 @@ static int compare_and_swap_type(signed char *type,
 static struct base_data *find_unresolved_deltas_1(struct base_data *base,
 						  struct base_data *prev_base)
 {
-	if (base->ref_last == -1 && base->ofs_last == -1) {
-		find_ref_delta_children(&base->obj->idx.oid,
-					&base->ref_first, &base->ref_last);
-
-		find_ofs_delta_children(base->obj->idx.offset,
-					&base->ofs_first, &base->ofs_last);
-
-		if (base->ref_last == -1 && base->ofs_last == -1) {
-			free(base->data);
-			return NULL;
-		}
-
-		link_base_data(prev_base, base);
-	}
-
 	if (base->ref_first <= base->ref_last) {
 		struct object_entry *child = objects + ref_deltas[base->ref_first].obj_no;
-		struct base_data *result = alloc_base_data();
+		struct base_data *result;
 
 		if (!compare_and_swap_type(&child->real_type, OBJ_REF_DELTA,
 					   base->obj->real_type))
@@ -1009,7 +1003,7 @@ static struct base_data *find_unresolved_deltas_1(struct base_data *base,
 			    (uintmax_t)child->idx.offset,
 			    oid_to_hex(&base->obj->idx.oid));
 
-		resolve_delta(child, base, result);
+		result = resolve_delta(child, base);
 		if (base->ref_first == base->ref_last && base->ofs_last == -1)
 			free_base_data(base);
 
@@ -1019,11 +1013,11 @@ static struct base_data *find_unresolved_deltas_1(struct base_data *base,
 
 	if (base->ofs_first <= base->ofs_last) {
 		struct object_entry *child = objects + ofs_deltas[base->ofs_first].obj_no;
-		struct base_data *result = alloc_base_data();
+		struct base_data *result;
 
 		assert(child->real_type == OBJ_OFS_DELTA);
 		child->real_type = base->obj->real_type;
-		resolve_delta(child, base, result);
+		result = resolve_delta(child, base);
 		if (base->ofs_first == base->ofs_last)
 			free_base_data(base);
 
@@ -1031,7 +1025,7 @@ static struct base_data *find_unresolved_deltas_1(struct base_data *base,
 		return result;
 	}
 
-	unlink_base_data(base);
+	free_base_data(base);
 	return NULL;
 }
 
@@ -1074,9 +1068,8 @@ static int compare_ref_delta_entry(const void *a, const void *b)
 
 static void resolve_base(struct object_entry *obj)
 {
-	struct base_data *base_obj = alloc_base_data();
-	base_obj->obj = obj;
-	base_obj->data = NULL;
+	struct base_data *base_obj = make_base(obj, NULL);
+
 	find_unresolved_deltas(base_obj);
 }
 
@@ -1369,22 +1362,26 @@ static void fix_unresolved_deltas(struct hashfile *f)
 	for (i = 0; i < nr_ref_deltas; i++) {
 		struct ref_delta_entry *d = sorted_by_pos[i];
 		enum object_type type;
-		struct base_data *base_obj = alloc_base_data();
+		struct base_data *base;
+		void *data;
+		unsigned long size;
+		struct object_entry *obj;
 
 		if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
 			continue;
-		base_obj->data = read_object_file(&d->oid, &type,
-						  &base_obj->size);
-		if (!base_obj->data)
+		data = read_object_file(&d->oid, &type, &size);
+		if (!data)
 			continue;
 
 		if (check_object_signature(the_repository, &d->oid,
-					   base_obj->data, base_obj->size,
+					   data, size,
 					   type_name(type)))
 			die(_("local object %s is corrupt"), oid_to_hex(&d->oid));
-		base_obj->obj = append_obj_to_pack(f, d->oid.hash,
-					base_obj->data, base_obj->size, type);
-		find_unresolved_deltas(base_obj);
+		obj = append_obj_to_pack(f, d->oid.hash, data, size, type);
+		base = make_base(obj, NULL);
+		base->data = data;
+		base->size = size;
+		find_unresolved_deltas(base);
 		display_progress(progress, nr_resolved_deltas);
 	}
 	free(sorted_by_pos);
-- 
2.28.0.297.g1956fa8f8d-goog


  parent reply	other threads:[~2020-08-24 19:17 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-24 19:16 [PATCH 0/7] Better threaded delta resolution in index-pack (another try) Jonathan Tan
2020-08-24 19:16 ` [PATCH 1/7] Documentation: deltaBaseCacheLimit is per-thread Jonathan Tan
2020-08-24 19:16 ` [PATCH] fetch-pack: in partial clone, pass --promisor Jonathan Tan
2020-08-24 19:36   ` Jonathan Tan
2020-08-24 19:16 ` [PATCH 2/7] index-pack: remove redundant parameter Jonathan Tan
2020-08-24 21:01   ` Junio C Hamano
2020-08-24 19:16 ` [PATCH 3/7] index-pack: unify threaded and unthreaded code Jonathan Tan
2020-08-24 21:11   ` Junio C Hamano
2020-08-24 19:16 ` [PATCH 4/7] index-pack: remove redundant child field Jonathan Tan
2020-08-24 19:16 ` Jonathan Tan [this message]
2020-08-24 19:16 ` [PATCH 6/7] index-pack: make resolve_delta() assume base data Jonathan Tan
2020-08-24 19:16 ` [PATCH 7/7] index-pack: make quantum of work smaller Jonathan Tan
2020-08-24 21:19   ` Junio C Hamano
2020-08-24 20:47 ` [PATCH 0/7] Better threaded delta resolution in index-pack (another try) Junio C Hamano
2020-08-24 21:27 ` [PATCH] fixup! index-pack: make quantum of work smaller Jonathan Tan
2020-08-24 22:08 ` [PATCH 0/7] Better threaded delta resolution in index-pack (another try) Jeff King
2020-08-25 18:11   ` Jonathan Tan
2020-08-25 21:18     ` Jeff King
2020-08-25 21:46       ` Jeff King
2020-09-08 19:48 ` [PATCH v2 " Jonathan Tan
2020-09-08 19:48   ` [PATCH v2 1/7] Documentation: deltaBaseCacheLimit is per-thread Jonathan Tan
2020-09-08 19:48   ` [PATCH v2 2/7] index-pack: remove redundant parameter Jonathan Tan
2020-09-08 19:48   ` [PATCH v2 3/7] index-pack: unify threaded and unthreaded code Jonathan Tan
2020-09-08 19:48   ` [PATCH v2 4/7] index-pack: remove redundant child field Jonathan Tan
2020-09-08 19:48   ` [PATCH v2 5/7] index-pack: calculate {ref,ofs}_{first,last} early Jonathan Tan
2020-09-08 19:48   ` [PATCH v2 6/7] index-pack: make resolve_delta() assume base data Jonathan Tan
2020-09-08 19:48   ` [PATCH v2 7/7] index-pack: make quantum of work smaller Jonathan Tan
2020-09-08 22:53   ` [PATCH v2 0/7] Better threaded delta resolution in index-pack (another try) Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=91459ab4119a33d40d39a565c96282db7865c946.1598296530.git.jonathantanmy@google.com \
    --to=jonathantanmy@google.com \
    --cc=git@vger.kernel.org \
    --cc=peff@peff.net \
    --cc=steadmon@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.