git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] Eliminate recursion in setting/clearing marks in commit list
@ 2011-12-26 12:04 Nguyễn Thái Ngọc Duy
  2011-12-26 12:04 ` [PATCH 2/3] index-pack: eliminate recursion in find_unresolved_deltas Nguyễn Thái Ngọc Duy
                   ` (5 more replies)
  0 siblings, 6 replies; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2011-12-26 12:04 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Shawn O. Pearce, Nguyen Thai Ngoc Duy

From: Nguyen Thai Ngoc Duy <pclouds@gmail.com>

Recursion in a DAG is generally a bad idea because it could be very
deep. Be defensive and avoid recursion in mark_parents_uninteresting()
and clear_commit_marks().

mark_parents_uninteresting() learns a trick from clear_commit_marks()
to avoid malloc() in (dorminant) single-parent case.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 commit.c   |   13 +++++++++++--
 revision.c |   45 +++++++++++++++++++++++++++++----------------
 2 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/commit.c b/commit.c
index 73b7e00..628066c 100644
--- a/commit.c
+++ b/commit.c
@@ -421,7 +421,8 @@ struct commit *pop_most_recent_commit(struct commit_list **list,
 	return ret;
 }
 
-void clear_commit_marks(struct commit *commit, unsigned int mark)
+static void clear_commit_marks_1(struct commit_list **plist,
+				 struct commit *commit, unsigned int mark)
 {
 	while (commit) {
 		struct commit_list *parents;
@@ -436,12 +437,20 @@ void clear_commit_marks(struct commit *commit, unsigned int mark)
 			return;
 
 		while ((parents = parents->next))
-			clear_commit_marks(parents->item, mark);
+			commit_list_insert(parents->item, plist);
 
 		commit = commit->parents->item;
 	}
 }
 
+void clear_commit_marks(struct commit *commit, unsigned int mark)
+{
+	struct commit_list *list = NULL;
+	commit_list_insert(commit, &list);
+	while (list)
+		clear_commit_marks_1(&list, pop_commit(&list), mark);
+}
+
 void clear_commit_marks_for_object_array(struct object_array *a, unsigned mark)
 {
 	struct object *object;
diff --git a/revision.c b/revision.c
index 8764dde..7cc72fc 100644
--- a/revision.c
+++ b/revision.c
@@ -139,11 +139,32 @@ void mark_tree_uninteresting(struct tree *tree)
 
 void mark_parents_uninteresting(struct commit *commit)
 {
-	struct commit_list *parents = commit->parents;
+	struct commit_list *parents = NULL, *l;
+
+	for (l = commit->parents; l; l = l->next)
+		commit_list_insert(l->item, &parents);
 
 	while (parents) {
 		struct commit *commit = parents->item;
-		if (!(commit->object.flags & UNINTERESTING)) {
+		l = parents;
+		parents = parents->next;
+		free(l);
+
+		while (commit) {
+			/*
+			 * A missing commit is ok iff its parent is marked
+			 * uninteresting.
+			 *
+			 * We just mark such a thing parsed, so that when
+			 * it is popped next time around, we won't be trying
+			 * to parse it and get an error.
+			 */
+			if (!has_sha1_file(commit->object.sha1))
+				commit->object.parsed = 1;
+
+			if (commit->object.flags & UNINTERESTING)
+				break;
+
 			commit->object.flags |= UNINTERESTING;
 
 			/*
@@ -154,21 +175,13 @@ void mark_parents_uninteresting(struct commit *commit)
 			 * wasn't uninteresting), in which case we need
 			 * to mark its parents recursively too..
 			 */
-			if (commit->parents)
-				mark_parents_uninteresting(commit);
-		}
+			if (!commit->parents)
+				break;
 
-		/*
-		 * A missing commit is ok iff its parent is marked
-		 * uninteresting.
-		 *
-		 * We just mark such a thing parsed, so that when
-		 * it is popped next time around, we won't be trying
-		 * to parse it and get an error.
-		 */
-		if (!has_sha1_file(commit->object.sha1))
-			commit->object.parsed = 1;
-		parents = parents->next;
+			for (l = commit->parents->next; l; l = l->next)
+				commit_list_insert(l->item, &parents);
+			commit = commit->parents->item;
+		}
 	}
 }
 
-- 
1.7.8.36.g69ee2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 2/3] index-pack: eliminate recursion in find_unresolved_deltas
  2011-12-26 12:04 [PATCH 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
@ 2011-12-26 12:04 ` Nguyễn Thái Ngọc Duy
  2011-12-26 12:04 ` [PATCH 3/3] index-pack: eliminate unlimited recursion in get_delta_base() Nguyễn Thái Ngọc Duy
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2011-12-26 12:04 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Shawn O. Pearce, Nguyễn Thái Ngọc Duy


Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 builtin/index-pack.c |  112 +++++++++++++++++++++++++++++++------------------
 1 files changed, 71 insertions(+), 41 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 98025da..d311c05 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -34,6 +34,8 @@ struct base_data {
 	struct object_entry *obj;
 	void *data;
 	unsigned long size;
+	int ref_first, ref_last;
+	int ofs_first, ofs_last;
 };
 
 /*
@@ -221,6 +223,15 @@ static NORETURN void bad_object(unsigned long offset, const char *format, ...)
 	die("pack has bad object at offset %lu: %s", offset, buf);
 }
 
+static struct base_data *alloc_base_data()
+{
+	struct base_data *base = xmalloc(sizeof(struct base_data));
+	memset(base, 0, sizeof(*base));
+	base->ref_last = -1;
+	base->ofs_last = -1;
+	return base;
+}
+
 static void free_base_data(struct base_data *c)
 {
 	if (c->data) {
@@ -553,58 +564,77 @@ static void resolve_delta(struct object_entry *delta_obj,
 	nr_resolved_deltas++;
 }
 
-static void find_unresolved_deltas(struct base_data *base,
-				   struct base_data *prev_base)
+static struct base_data *find_unresolved_deltas_1(struct base_data *base,
+						  struct base_data *prev_base)
 {
-	int i, ref_first, ref_last, ofs_first, ofs_last;
-
-	/*
-	 * This is a recursive function. Those brackets should help reducing
-	 * stack usage by limiting the scope of the delta_base union.
-	 */
-	{
+	if (base->ref_last == -1 && base->ofs_last == -1) {
 		union delta_base base_spec;
 
 		hashcpy(base_spec.sha1, base->obj->idx.sha1);
 		find_delta_children(&base_spec,
-				    &ref_first, &ref_last, OBJ_REF_DELTA);
+				    &base->ref_first, &base->ref_last, OBJ_REF_DELTA);
 
 		memset(&base_spec, 0, sizeof(base_spec));
 		base_spec.offset = base->obj->idx.offset;
 		find_delta_children(&base_spec,
-				    &ofs_first, &ofs_last, OBJ_OFS_DELTA);
-	}
+				    &base->ofs_first, &base->ofs_last, OBJ_OFS_DELTA);
 
-	if (ref_last == -1 && ofs_last == -1) {
-		free(base->data);
-		return;
-	}
+		if (base->ref_last == -1 && base->ofs_last == -1) {
+			free(base->data);
+			return NULL;
+		}
 
-	link_base_data(prev_base, base);
+		link_base_data(prev_base, base);
+	}
 
-	for (i = ref_first; i <= ref_last; i++) {
-		struct object_entry *child = objects + deltas[i].obj_no;
-		struct base_data result;
+	if (base->ref_first <= base->ref_last) {
+		struct object_entry *child = objects + deltas[base->ref_first].obj_no;
+		struct base_data *result = alloc_base_data();
 
 		assert(child->real_type == OBJ_REF_DELTA);
-		resolve_delta(child, base, &result);
-		if (i == ref_last && ofs_last == -1)
+		resolve_delta(child, base, result);
+		if (base->ref_first == base->ref_last && base->ofs_last == -1)
 			free_base_data(base);
-		find_unresolved_deltas(&result, base);
+
+		base->ref_first++;
+		return result;
 	}
 
-	for (i = ofs_first; i <= ofs_last; i++) {
-		struct object_entry *child = objects + deltas[i].obj_no;
-		struct base_data result;
+	if (base->ofs_first <= base->ofs_last) {
+		struct object_entry *child = objects + deltas[base->ofs_first].obj_no;
+		struct base_data *result = alloc_base_data();
 
 		assert(child->real_type == OBJ_OFS_DELTA);
-		resolve_delta(child, base, &result);
-		if (i == ofs_last)
+		resolve_delta(child, base, result);
+		if (base->ofs_first == base->ofs_last)
 			free_base_data(base);
-		find_unresolved_deltas(&result, base);
+
+		base->ofs_first++;
+		return result;
 	}
 
 	unlink_base_data(base);
+	return NULL;
+}
+
+static void find_unresolved_deltas(struct base_data *base)
+{
+	struct base_data *new_base, *prev_base = NULL;
+	for (;;) {
+		new_base = find_unresolved_deltas_1(base, prev_base);
+
+		if (new_base) {
+			prev_base = base;
+			base = new_base;
+		}
+		else {
+			free(base);
+			base = prev_base;
+			if (!base)
+				return;
+			prev_base = base->base;
+		}
+	}
 }
 
 static int compare_delta_entry(const void *a, const void *b)
@@ -684,13 +714,13 @@ static void parse_pack_objects(unsigned char *sha1)
 		progress = start_progress("Resolving deltas", nr_deltas);
 	for (i = 0; i < nr_objects; i++) {
 		struct object_entry *obj = &objects[i];
-		struct base_data base_obj;
+		struct base_data *base_obj = alloc_base_data();
 
 		if (is_delta_type(obj->type))
 			continue;
-		base_obj.obj = obj;
-		base_obj.data = NULL;
-		find_unresolved_deltas(&base_obj, NULL);
+		base_obj->obj = obj;
+		base_obj->data = NULL;
+		find_unresolved_deltas(base_obj);
 		display_progress(progress, nr_resolved_deltas);
 	}
 }
@@ -783,20 +813,20 @@ static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved)
 	for (i = 0; i < n; i++) {
 		struct delta_entry *d = sorted_by_pos[i];
 		enum object_type type;
-		struct base_data base_obj;
+		struct base_data *base_obj = alloc_base_data();
 
 		if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
 			continue;
-		base_obj.data = read_sha1_file(d->base.sha1, &type, &base_obj.size);
-		if (!base_obj.data)
+		base_obj->data = read_sha1_file(d->base.sha1, &type, &base_obj->size);
+		if (!base_obj->data)
 			continue;
 
-		if (check_sha1_signature(d->base.sha1, base_obj.data,
-				base_obj.size, typename(type)))
+		if (check_sha1_signature(d->base.sha1, base_obj->data,
+				base_obj->size, typename(type)))
 			die("local object %s is corrupt", sha1_to_hex(d->base.sha1));
-		base_obj.obj = append_obj_to_pack(f, d->base.sha1,
-					base_obj.data, base_obj.size, type);
-		find_unresolved_deltas(&base_obj, NULL);
+		base_obj->obj = append_obj_to_pack(f, d->base.sha1,
+					base_obj->data, base_obj->size, type);
+		find_unresolved_deltas(base_obj);
 		display_progress(progress, nr_resolved_deltas);
 	}
 	free(sorted_by_pos);
-- 
1.7.8.36.g69ee2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 3/3] index-pack: eliminate unlimited recursion in get_delta_base()
  2011-12-26 12:04 [PATCH 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
  2011-12-26 12:04 ` [PATCH 2/3] index-pack: eliminate recursion in find_unresolved_deltas Nguyễn Thái Ngọc Duy
@ 2011-12-26 12:04 ` Nguyễn Thái Ngọc Duy
  2012-01-09  3:59 ` [PATCH v2 0/3] nd/index-pack-no-recurse Nguyễn Thái Ngọc Duy
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2011-12-26 12:04 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Shawn O. Pearce, Nguyễn Thái Ngọc Duy

Revert the order of delta applying so that by the time a delta is
applied, its base is either non-delta or already inflated.
get_delta_base() is still recursive, but because base's data is always
ready, the inner get_delta_base() call never has any chance to call
itself again.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 builtin/index-pack.c |   30 +++++++++++++++++++++---------
 1 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index d311c05..e8801bb 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -519,10 +519,25 @@ static void *get_base_data(struct base_data *c)
 {
 	if (!c->data) {
 		struct object_entry *obj = c->obj;
+		struct base_data **delta = NULL;
+		int delta_nr = 0, delta_alloc = 0;
 
-		if (is_delta_type(obj->type)) {
-			void *base = get_base_data(c->base);
-			void *raw = get_data_from_pack(obj);
+		for (; is_delta_type(c->obj->type); c = c->base) {
+			ALLOC_GROW(delta, delta_nr + 1, delta_alloc);
+			delta[delta_nr++] = c;
+		}
+		if (!delta_nr) {
+			c->data = get_data_from_pack(obj);
+			c->size = obj->size;
+			base_cache_used += c->size;
+			prune_base_data(c);
+		}
+		for (; delta_nr > 0; delta_nr--) {
+			void *base, *raw;
+			c = delta[delta_nr - 1];
+			obj = c->obj;
+			base = get_base_data(c->base);
+			raw = get_data_from_pack(obj);
 			c->data = patch_delta(
 				base, c->base->size,
 				raw, obj->size,
@@ -530,13 +545,10 @@ static void *get_base_data(struct base_data *c)
 			free(raw);
 			if (!c->data)
 				bad_object(obj->idx.offset, "failed to apply delta");
-		} else {
-			c->data = get_data_from_pack(obj);
-			c->size = obj->size;
+			base_cache_used += c->size;
+			prune_base_data(c);
 		}
-
-		base_cache_used += c->size;
-		prune_base_data(c);
+		free(delta);
 	}
 	return c->data;
 }
-- 
1.7.8.36.g69ee2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 0/3] nd/index-pack-no-recurse
  2011-12-26 12:04 [PATCH 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
  2011-12-26 12:04 ` [PATCH 2/3] index-pack: eliminate recursion in find_unresolved_deltas Nguyễn Thái Ngọc Duy
  2011-12-26 12:04 ` [PATCH 3/3] index-pack: eliminate unlimited recursion in get_delta_base() Nguyễn Thái Ngọc Duy
@ 2012-01-09  3:59 ` Nguyễn Thái Ngọc Duy
  2012-01-09 19:30   ` Junio C Hamano
  2012-01-14 12:19   ` [PATCH v3 " Nguyễn Thái Ngọc Duy
  2012-01-09  3:59 ` [PATCH v2 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-01-09  3:59 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Shawn O. Pearce, Nguyễn Thái Ngọc Duy

Resend to incorporate the fixup commit from pu, no other changes.

Nguyễn Thái Ngọc Duy (3):
  Eliminate recursion in setting/clearing marks in commit list
  index-pack: eliminate recursion in find_unresolved_deltas
  index-pack: eliminate unlimited recursion in get_delta_base()

 builtin/index-pack.c |  141 ++++++++++++++++++++++++++++++++------------------
 commit.c             |   13 ++++-
 revision.c           |   45 ++++++++++------
 3 files changed, 131 insertions(+), 68 deletions(-)

-- 
1.7.3.1.256.g2539c.dirty

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 1/3] Eliminate recursion in setting/clearing marks in commit list
  2011-12-26 12:04 [PATCH 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
                   ` (2 preceding siblings ...)
  2012-01-09  3:59 ` [PATCH v2 0/3] nd/index-pack-no-recurse Nguyễn Thái Ngọc Duy
@ 2012-01-09  3:59 ` Nguyễn Thái Ngọc Duy
  2012-01-09 22:09   ` Junio C Hamano
  2012-01-09  3:59 ` [PATCH v2 2/3] index-pack: eliminate recursion in find_unresolved_deltas Nguyễn Thái Ngọc Duy
  2012-01-09  3:59 ` [PATCH v2 3/3] index-pack: eliminate unlimited recursion in get_delta_base() Nguyễn Thái Ngọc Duy
  5 siblings, 1 reply; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-01-09  3:59 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Shawn O. Pearce, Nguyễn Thái Ngọc Duy

Recursion in a DAG is generally a bad idea because it could be very
deep. Be defensive and avoid recursion in mark_parents_uninteresting()
and clear_commit_marks().

mark_parents_uninteresting() learns a trick from clear_commit_marks()
to avoid malloc() in (dorminant) single-parent case.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 commit.c   |   13 +++++++++++--
 revision.c |   45 +++++++++++++++++++++++++++++----------------
 2 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/commit.c b/commit.c
index 44bc96d..c7aefbf 100644
--- a/commit.c
+++ b/commit.c
@@ -421,7 +421,8 @@ struct commit *pop_most_recent_commit(struct commit_list **list,
 	return ret;
 }
 
-void clear_commit_marks(struct commit *commit, unsigned int mark)
+static void clear_commit_marks_1(struct commit_list **plist,
+				 struct commit *commit, unsigned int mark)
 {
 	while (commit) {
 		struct commit_list *parents;
@@ -436,12 +437,20 @@ void clear_commit_marks(struct commit *commit, unsigned int mark)
 			return;
 
 		while ((parents = parents->next))
-			clear_commit_marks(parents->item, mark);
+			commit_list_insert(parents->item, plist);
 
 		commit = commit->parents->item;
 	}
 }
 
+void clear_commit_marks(struct commit *commit, unsigned int mark)
+{
+	struct commit_list *list = NULL;
+	commit_list_insert(commit, &list);
+	while (list)
+		clear_commit_marks_1(&list, pop_commit(&list), mark);
+}
+
 void clear_commit_marks_for_object_array(struct object_array *a, unsigned mark)
 {
 	struct object *object;
diff --git a/revision.c b/revision.c
index 8764dde..7cc72fc 100644
--- a/revision.c
+++ b/revision.c
@@ -139,11 +139,32 @@ void mark_tree_uninteresting(struct tree *tree)
 
 void mark_parents_uninteresting(struct commit *commit)
 {
-	struct commit_list *parents = commit->parents;
+	struct commit_list *parents = NULL, *l;
+
+	for (l = commit->parents; l; l = l->next)
+		commit_list_insert(l->item, &parents);
 
 	while (parents) {
 		struct commit *commit = parents->item;
-		if (!(commit->object.flags & UNINTERESTING)) {
+		l = parents;
+		parents = parents->next;
+		free(l);
+
+		while (commit) {
+			/*
+			 * A missing commit is ok iff its parent is marked
+			 * uninteresting.
+			 *
+			 * We just mark such a thing parsed, so that when
+			 * it is popped next time around, we won't be trying
+			 * to parse it and get an error.
+			 */
+			if (!has_sha1_file(commit->object.sha1))
+				commit->object.parsed = 1;
+
+			if (commit->object.flags & UNINTERESTING)
+				break;
+
 			commit->object.flags |= UNINTERESTING;
 
 			/*
@@ -154,21 +175,13 @@ void mark_parents_uninteresting(struct commit *commit)
 			 * wasn't uninteresting), in which case we need
 			 * to mark its parents recursively too..
 			 */
-			if (commit->parents)
-				mark_parents_uninteresting(commit);
-		}
+			if (!commit->parents)
+				break;
 
-		/*
-		 * A missing commit is ok iff its parent is marked
-		 * uninteresting.
-		 *
-		 * We just mark such a thing parsed, so that when
-		 * it is popped next time around, we won't be trying
-		 * to parse it and get an error.
-		 */
-		if (!has_sha1_file(commit->object.sha1))
-			commit->object.parsed = 1;
-		parents = parents->next;
+			for (l = commit->parents->next; l; l = l->next)
+				commit_list_insert(l->item, &parents);
+			commit = commit->parents->item;
+		}
 	}
 }
 
-- 
1.7.3.1.256.g2539c.dirty

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 2/3] index-pack: eliminate recursion in find_unresolved_deltas
  2011-12-26 12:04 [PATCH 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
                   ` (3 preceding siblings ...)
  2012-01-09  3:59 ` [PATCH v2 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
@ 2012-01-09  3:59 ` Nguyễn Thái Ngọc Duy
  2012-01-09 22:10   ` Junio C Hamano
  2012-01-09  3:59 ` [PATCH v2 3/3] index-pack: eliminate unlimited recursion in get_delta_base() Nguyễn Thái Ngọc Duy
  5 siblings, 1 reply; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-01-09  3:59 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Shawn O. Pearce, Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 builtin/index-pack.c |  111 +++++++++++++++++++++++++++++++------------------
 1 files changed, 70 insertions(+), 41 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index af7dc37..38ff03a 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -34,6 +34,8 @@ struct base_data {
 	struct object_entry *obj;
 	void *data;
 	unsigned long size;
+	int ref_first, ref_last;
+	int ofs_first, ofs_last;
 };
 
 /*
@@ -221,6 +223,15 @@ static NORETURN void bad_object(unsigned long offset, const char *format, ...)
 	die("pack has bad object at offset %lu: %s", offset, buf);
 }
 
+static struct base_data *alloc_base_data(void)
+{
+	struct base_data *base = xmalloc(sizeof(struct base_data));
+	memset(base, 0, sizeof(*base));
+	base->ref_last = -1;
+	base->ofs_last = -1;
+	return base;
+}
+
 static void free_base_data(struct base_data *c)
 {
 	if (c->data) {
@@ -553,58 +564,76 @@ static void resolve_delta(struct object_entry *delta_obj,
 	nr_resolved_deltas++;
 }
 
-static void find_unresolved_deltas(struct base_data *base,
-				   struct base_data *prev_base)
+static struct base_data *find_unresolved_deltas_1(struct base_data *base,
+						  struct base_data *prev_base)
 {
-	int i, ref_first, ref_last, ofs_first, ofs_last;
-
-	/*
-	 * This is a recursive function. Those brackets should help reducing
-	 * stack usage by limiting the scope of the delta_base union.
-	 */
-	{
+	if (base->ref_last == -1 && base->ofs_last == -1) {
 		union delta_base base_spec;
 
 		hashcpy(base_spec.sha1, base->obj->idx.sha1);
 		find_delta_children(&base_spec,
-				    &ref_first, &ref_last, OBJ_REF_DELTA);
+				    &base->ref_first, &base->ref_last, OBJ_REF_DELTA);
 
 		memset(&base_spec, 0, sizeof(base_spec));
 		base_spec.offset = base->obj->idx.offset;
 		find_delta_children(&base_spec,
-				    &ofs_first, &ofs_last, OBJ_OFS_DELTA);
-	}
+				    &base->ofs_first, &base->ofs_last, OBJ_OFS_DELTA);
 
-	if (ref_last == -1 && ofs_last == -1) {
-		free(base->data);
-		return;
-	}
+		if (base->ref_last == -1 && base->ofs_last == -1) {
+			free(base->data);
+			return NULL;
+		}
 
-	link_base_data(prev_base, base);
+		link_base_data(prev_base, base);
+	}
 
-	for (i = ref_first; i <= ref_last; i++) {
-		struct object_entry *child = objects + deltas[i].obj_no;
-		struct base_data result;
+	if (base->ref_first <= base->ref_last) {
+		struct object_entry *child = objects + deltas[base->ref_first].obj_no;
+		struct base_data *result = alloc_base_data();
 
 		assert(child->real_type == OBJ_REF_DELTA);
-		resolve_delta(child, base, &result);
-		if (i == ref_last && ofs_last == -1)
+		resolve_delta(child, base, result);
+		if (base->ref_first == base->ref_last && base->ofs_last == -1)
 			free_base_data(base);
-		find_unresolved_deltas(&result, base);
+
+		base->ref_first++;
+		return result;
 	}
 
-	for (i = ofs_first; i <= ofs_last; i++) {
-		struct object_entry *child = objects + deltas[i].obj_no;
-		struct base_data result;
+	if (base->ofs_first <= base->ofs_last) {
+		struct object_entry *child = objects + deltas[base->ofs_first].obj_no;
+		struct base_data *result = alloc_base_data();
 
 		assert(child->real_type == OBJ_OFS_DELTA);
-		resolve_delta(child, base, &result);
-		if (i == ofs_last)
+		resolve_delta(child, base, result);
+		if (base->ofs_first == base->ofs_last)
 			free_base_data(base);
-		find_unresolved_deltas(&result, base);
+
+		base->ofs_first++;
+		return result;
 	}
 
 	unlink_base_data(base);
+	return NULL;
+}
+
+static void find_unresolved_deltas(struct base_data *base)
+{
+	struct base_data *new_base, *prev_base = NULL;
+	for (;;) {
+		new_base = find_unresolved_deltas_1(base, prev_base);
+
+		if (new_base) {
+			prev_base = base;
+			base = new_base;
+		} else {
+			free(base);
+			base = prev_base;
+			if (!base)
+				return;
+			prev_base = base->base;
+		}
+	}
 }
 
 static int compare_delta_entry(const void *a, const void *b)
@@ -684,13 +713,13 @@ static void parse_pack_objects(unsigned char *sha1)
 		progress = start_progress("Resolving deltas", nr_deltas);
 	for (i = 0; i < nr_objects; i++) {
 		struct object_entry *obj = &objects[i];
-		struct base_data base_obj;
+		struct base_data *base_obj = alloc_base_data();
 
 		if (is_delta_type(obj->type))
 			continue;
-		base_obj.obj = obj;
-		base_obj.data = NULL;
-		find_unresolved_deltas(&base_obj, NULL);
+		base_obj->obj = obj;
+		base_obj->data = NULL;
+		find_unresolved_deltas(base_obj);
 		display_progress(progress, nr_resolved_deltas);
 	}
 }
@@ -783,20 +812,20 @@ static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved)
 	for (i = 0; i < n; i++) {
 		struct delta_entry *d = sorted_by_pos[i];
 		enum object_type type;
-		struct base_data base_obj;
+		struct base_data *base_obj = alloc_base_data();
 
 		if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
 			continue;
-		base_obj.data = read_sha1_file(d->base.sha1, &type, &base_obj.size);
-		if (!base_obj.data)
+		base_obj->data = read_sha1_file(d->base.sha1, &type, &base_obj->size);
+		if (!base_obj->data)
 			continue;
 
-		if (check_sha1_signature(d->base.sha1, base_obj.data,
-				base_obj.size, typename(type)))
+		if (check_sha1_signature(d->base.sha1, base_obj->data,
+				base_obj->size, typename(type)))
 			die("local object %s is corrupt", sha1_to_hex(d->base.sha1));
-		base_obj.obj = append_obj_to_pack(f, d->base.sha1,
-					base_obj.data, base_obj.size, type);
-		find_unresolved_deltas(&base_obj, NULL);
+		base_obj->obj = append_obj_to_pack(f, d->base.sha1,
+					base_obj->data, base_obj->size, type);
+		find_unresolved_deltas(base_obj);
 		display_progress(progress, nr_resolved_deltas);
 	}
 	free(sorted_by_pos);
-- 
1.7.3.1.256.g2539c.dirty

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 3/3] index-pack: eliminate unlimited recursion in get_delta_base()
  2011-12-26 12:04 [PATCH 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
                   ` (4 preceding siblings ...)
  2012-01-09  3:59 ` [PATCH v2 2/3] index-pack: eliminate recursion in find_unresolved_deltas Nguyễn Thái Ngọc Duy
@ 2012-01-09  3:59 ` Nguyễn Thái Ngọc Duy
  2012-01-09 22:51   ` Junio C Hamano
  5 siblings, 1 reply; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-01-09  3:59 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Shawn O. Pearce, Nguyễn Thái Ngọc Duy

Revert the order of delta applying so that by the time a delta is
applied, its base is either non-delta or already inflated.
get_delta_base() is still recursive, but because base's data is always
ready, the inner get_delta_base() call never has any chance to call
itself again.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 builtin/index-pack.c |   30 +++++++++++++++++++++---------
 1 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 38ff03a..8c1f5d9 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -519,10 +519,25 @@ static void *get_base_data(struct base_data *c)
 {
 	if (!c->data) {
 		struct object_entry *obj = c->obj;
+		struct base_data **delta = NULL;
+		int delta_nr = 0, delta_alloc = 0;
 
-		if (is_delta_type(obj->type)) {
-			void *base = get_base_data(c->base);
-			void *raw = get_data_from_pack(obj);
+		for (; is_delta_type(c->obj->type); c = c->base) {
+			ALLOC_GROW(delta, delta_nr + 1, delta_alloc);
+			delta[delta_nr++] = c;
+		}
+		if (!delta_nr) {
+			c->data = get_data_from_pack(obj);
+			c->size = obj->size;
+			base_cache_used += c->size;
+			prune_base_data(c);
+		}
+		for (; delta_nr > 0; delta_nr--) {
+			void *base, *raw;
+			c = delta[delta_nr - 1];
+			obj = c->obj;
+			base = get_base_data(c->base);
+			raw = get_data_from_pack(obj);
 			c->data = patch_delta(
 				base, c->base->size,
 				raw, obj->size,
@@ -530,13 +545,10 @@ static void *get_base_data(struct base_data *c)
 			free(raw);
 			if (!c->data)
 				bad_object(obj->idx.offset, "failed to apply delta");
-		} else {
-			c->data = get_data_from_pack(obj);
-			c->size = obj->size;
+			base_cache_used += c->size;
+			prune_base_data(c);
 		}
-
-		base_cache_used += c->size;
-		prune_base_data(c);
+		free(delta);
 	}
 	return c->data;
 }
-- 
1.7.3.1.256.g2539c.dirty

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 0/3] nd/index-pack-no-recurse
  2012-01-09  3:59 ` [PATCH v2 0/3] nd/index-pack-no-recurse Nguyễn Thái Ngọc Duy
@ 2012-01-09 19:30   ` Junio C Hamano
  2012-01-14 12:19   ` [PATCH v3 " Nguyễn Thái Ngọc Duy
  1 sibling, 0 replies; 21+ messages in thread
From: Junio C Hamano @ 2012-01-09 19:30 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git, Shawn O. Pearce

Nguyễn Thái Ngọc Duy <pclouds@gmail.com> writes:

> Resend to incorporate the fixup commit from pu, no other changes.
>
> Nguyễn Thái Ngọc Duy (3):
>   Eliminate recursion in setting/clearing marks in commit list
>   index-pack: eliminate recursion in find_unresolved_deltas
>   index-pack: eliminate unlimited recursion in get_delta_base()
>
>  builtin/index-pack.c |  141 ++++++++++++++++++++++++++++++++------------------
>  commit.c             |   13 ++++-
>  revision.c           |   45 ++++++++++------
>  3 files changed, 131 insertions(+), 68 deletions(-)

Thanks.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 1/3] Eliminate recursion in setting/clearing marks in commit list
  2012-01-09  3:59 ` [PATCH v2 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
@ 2012-01-09 22:09   ` Junio C Hamano
  0 siblings, 0 replies; 21+ messages in thread
From: Junio C Hamano @ 2012-01-09 22:09 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git, Shawn O. Pearce

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

> Recursion in a DAG is generally a bad idea because it could be very
> deep. Be defensive and avoid recursion in mark_parents_uninteresting()
> and clear_commit_marks().
>
> mark_parents_uninteresting() learns a trick from clear_commit_marks()
> to avoid malloc() in (dorminant) single-parent case.

Looks cleanly done.

This retains the depth-firstness of the (supposedly less common case)
recursion in mark_parents_uninteresting() from the original code, by
adding the already parsed parent at the beginning of the queue. I suspect
that the original went depth-first primarily because that was the most
straightforward way to code it, but now you have more flexibility, I
wonder if there is a difference if we made it width-first, and if so, if
the difference is positive or detrimental.

Thanks.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 2/3] index-pack: eliminate recursion in find_unresolved_deltas
  2012-01-09  3:59 ` [PATCH v2 2/3] index-pack: eliminate recursion in find_unresolved_deltas Nguyễn Thái Ngọc Duy
@ 2012-01-09 22:10   ` Junio C Hamano
  2012-01-10 12:23     ` Nguyen Thai Ngoc Duy
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2012-01-09 22:10 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git, Shawn O. Pearce

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>

I find both the original and the updated code rather dense to read without
annotation, but from a cursory look all changes look good.

Thanks.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 3/3] index-pack: eliminate unlimited recursion in get_delta_base()
  2012-01-09  3:59 ` [PATCH v2 3/3] index-pack: eliminate unlimited recursion in get_delta_base() Nguyễn Thái Ngọc Duy
@ 2012-01-09 22:51   ` Junio C Hamano
  2012-01-10 13:03     ` Nguyen Thai Ngoc Duy
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2012-01-09 22:51 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git, Shawn O. Pearce

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

> Revert the order of delta applying so that by the time a delta is
> applied, its base is either non-delta or already inflated.
>
> get_delta_base() is still recursive, but because base's data is always
> ready, the inner get_delta_base() call never has any chance to call
> itself again.

I suspect s/Revert/Reverse/ here, but I have a feeling that the structure
of the resulting code is a bit too complex and subtle.

In parse_pack_objects(), we have two passes. The first pass scans to
enumerate the objects that appear in the pack and sift them into base and
delta objects, and the second one starts from a base object, resolves its
immediate children with find_unresolved_daltas(), but that function recurses
many times, bound only by the number of objects in the pack, which is the
issue you are trying to address with this series.

I wonder if a cleaner approach is to change the loop in the second pass in
such a way that (1) the function it calls resolves _only_ the immediate
children of the object we know its final shape (either because the object
was recorded in the deflated form in the pack, or we have already resolved
it in earlier iteration), and (2) the loop goes over the objects[] array
not just once, but until we stopped making progress.

It would require us to be able to tell, by looking at objects[i], if the
object itself has already been handled (perhaps you can look at its
idx.sha1 field for this purpose) and if we have already handled its
immediate delta children (you may need to add a bit to struct object_entry
for this).

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 2/3] index-pack: eliminate recursion in find_unresolved_deltas
  2012-01-09 22:10   ` Junio C Hamano
@ 2012-01-10 12:23     ` Nguyen Thai Ngoc Duy
  2012-01-12 20:32       ` Junio C Hamano
  0 siblings, 1 reply; 21+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-01-10 12:23 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Shawn O. Pearce

2012/1/10 Junio C Hamano <gitster@pobox.com>:
> Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:
>
>> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
>
> I find both the original and the updated code rather dense to read without
> annotation, but from a cursory look all changes look good.

Maybe I stared at it for too long it seems obvious to me (hence no
further description in commit message). Let me describe it (and put in
commit message later if it makes sense)

Current code already links all bases together in a form of tree, using
struct base_data, with prev_base pointer to point to parent node. The
only problem is that struct base_data is all allocated on stack. So we
need to put all on heap (parse_pack_objects and
fix_unresolved_deltas). After that, it's simple depth-first traversal
where each node also maintains its own state (ofs and ref indices to
iterate over all children nodes).

So we process one node:

 - if it returns a new (child) node (a parent base), we link it to our
tree, then process the new node.
 - if it returns nothing, the node is done, free it. We go back to
parent node and resume whatever it's doing.

and do it until we have no nodes to process.
-- 
Duy

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 3/3] index-pack: eliminate unlimited recursion in get_delta_base()
  2012-01-09 22:51   ` Junio C Hamano
@ 2012-01-10 13:03     ` Nguyen Thai Ngoc Duy
  2012-01-10 13:16       ` Nguyen Thai Ngoc Duy
  0 siblings, 1 reply; 21+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-01-10 13:03 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Shawn O. Pearce

2012/1/10 Junio C Hamano <gitster@pobox.com>:
> In parse_pack_objects(), we have two passes. The first pass scans to
> enumerate the objects that appear in the pack and sift them into base and
> delta objects, and the second one starts from a base object, resolves its
> immediate children with find_unresolved_daltas(), but that function recurses
> many times, bound only by the number of objects in the pack, which is the
> issue you are trying to address with this series.
>
> I wonder if a cleaner approach is to change the loop in the second pass in
> such a way that (1) the function it calls resolves _only_ the immediate
> children of the object we know its final shape (either because the object
> was recorded in the deflated form in the pack, or we have already resolved
> it in earlier iteration), and (2) the loop goes over the objects[] array
> not just once, but until we stopped making progress.

So basically:

 - remove the recursion code from get_base_data()
 - update find_unresolved_deltas() to mark resolved objects handled
 - put find_unresolve_deltas() call into a loop to go through all
unhandled objects

correct? Yeah I think it's simpler than current code.

> It would require us to be able to tell, by looking at objects[i], if the
> object itself has already been handled (perhaps you can look at its
> idx.sha1 field for this purpose) and if we have already handled its
> immediate delta children (you may need to add a bit to struct object_entry
> for this).
-- 
Duy

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 3/3] index-pack: eliminate unlimited recursion in get_delta_base()
  2012-01-10 13:03     ` Nguyen Thai Ngoc Duy
@ 2012-01-10 13:16       ` Nguyen Thai Ngoc Duy
  0 siblings, 0 replies; 21+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-01-10 13:16 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Shawn O. Pearce

On Tue, Jan 10, 2012 at 8:03 PM, Nguyen Thai Ngoc Duy <pclouds@gmail.com> wrote:
> correct? Yeah I think it's simpler than current code.

I wrote about core.deltabasecachelimit, then deleted it, thinking it
should be ok, but how do handle the case when you use up cache limit?
Some handled objects may be freed when we're short on cache and need
to be resolved again before we could resolve an unhandled object.

Assume we're at the limit, have resolved A, which has two children B
and C. When we resolve B we need to free A to keep it fit in cache. By
the time we look at C, A needs to be resolved again. Without tracing
back (i.e. what get_base_data() does), we don't know what we need to
re-resolve among all handled objects.
-- 
Duy

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 2/3] index-pack: eliminate recursion in find_unresolved_deltas
  2012-01-10 12:23     ` Nguyen Thai Ngoc Duy
@ 2012-01-12 20:32       ` Junio C Hamano
  0 siblings, 0 replies; 21+ messages in thread
From: Junio C Hamano @ 2012-01-12 20:32 UTC (permalink / raw)
  To: Nguyen Thai Ngoc Duy; +Cc: git, Shawn O. Pearce

Nguyen Thai Ngoc Duy <pclouds@gmail.com> writes:

> 2012/1/10 Junio C Hamano <gitster@pobox.com>:
>> Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:
>>
>>> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
>>
>> I find both the original and the updated code rather dense to read without
>> annotation, but from a cursory look all changes look good.
>
> Maybe I stared at it for too long it seems obvious to me (hence no
> further description in commit message). Let me describe it (and put in
> commit message later if it makes sense)
>
> Current code already links all bases together in a form of tree, using
> struct base_data, with prev_base pointer to point to parent node. The
> only problem is that struct base_data is all allocated on stack. So we
> need to put all on heap (parse_pack_objects and
> fix_unresolved_deltas). After that, it's simple depth-first traversal
> where each node also maintains its own state (ofs and ref indices to
> iterate over all children nodes).
>
> So we process one node:
>
>  - if it returns a new (child) node (a parent base), we link it to our
> tree, then process the new node.
>  - if it returns nothing, the node is done, free it. We go back to
> parent node and resume whatever it's doing.
>
> and do it until we have no nodes to process.

If you have the current path (base to another that is recorded as a delta
to it to yet another that is recorded as a delta to that delitified
object) on the stack, it is obvious that as you have done with the objects
on the deeper end of the delta chain, the data that becomes unnecessary
will be gone by simply returning from the recursion, but if you "put all
on heap", you would have to do the same freeing as part of the hand-rolled
recursion. It is unclear if, where and how the patch takes care of that
in the above.

Other than that, I find the description very readable.

Thanks.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v3 0/3] nd/index-pack-no-recurse
  2012-01-09  3:59 ` [PATCH v2 0/3] nd/index-pack-no-recurse Nguyễn Thái Ngọc Duy
  2012-01-09 19:30   ` Junio C Hamano
@ 2012-01-14 12:19   ` Nguyễn Thái Ngọc Duy
  2012-01-14 12:19     ` [PATCH v3 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
                       ` (2 more replies)
  1 sibling, 3 replies; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-01-14 12:19 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Shawn O. Pearce, Nguyễn Thái Ngọc Duy

This round adds more explanation in commit message of 2/3 and comment
before get_base_data() in 3/3. Changes in 3/3 does not really address
Junio's concern regarding maintainability though.

It also fixes a regression in 3/3. In current code, get_base_data()
goes up as far as the first deflated parent. v2 of this series always
goes up to top parent. v3 fixes this.

Junio raised a point about depth-first vs breadth-first search in 1/3. I
have not addressed that either, but it makes me wonder if we may
benefit from using bfs in find_unresolved_deltas(), 2/3. If the delta
chains form a fork-like figure (e.g. long delta chains sharing common
base), then we may run out of cache doing dfs on one chain, by the
time we get back on the common base, we would need to deflate them
again.

Another observation is recursion in get_base_data() is unlikely to be
called in real life. With 16M default delta base cache, git.git does
not trigger it at all. Perhaps repos with large blobs have better chance..

Nguyễn Thái Ngọc Duy (3):
  Eliminate recursion in setting/clearing marks in commit list
  index-pack: eliminate recursion in find_unresolved_deltas
  index-pack: eliminate unlimited recursion in get_base_data()

 builtin/index-pack.c |  164 ++++++++++++++++++++++++++++++++++---------------
 commit.c             |   13 ++++-
 revision.c           |   45 +++++++++-----
 3 files changed, 154 insertions(+), 68 deletions(-)

-- 
1.7.8.36.g69ee2

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v3 1/3] Eliminate recursion in setting/clearing marks in commit list
  2012-01-14 12:19   ` [PATCH v3 " Nguyễn Thái Ngọc Duy
@ 2012-01-14 12:19     ` Nguyễn Thái Ngọc Duy
  2012-01-14 15:23       ` Peter Baumann
  2012-01-14 12:19     ` [PATCH v3 2/3] index-pack: eliminate recursion in find_unresolved_deltas Nguyễn Thái Ngọc Duy
  2012-01-14 12:19     ` [PATCH v3 3/3] index-pack: eliminate unlimited recursion in get_base_data() Nguyễn Thái Ngọc Duy
  2 siblings, 1 reply; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-01-14 12:19 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Shawn O. Pearce, Nguyễn Thái Ngọc Duy

Recursion in a DAG is generally a bad idea because it could be very
deep. Be defensive and avoid recursion in mark_parents_uninteresting()
and clear_commit_marks().

mark_parents_uninteresting() learns a trick from clear_commit_marks()
to avoid malloc() in (dorminant) single-parent case.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 commit.c   |   13 +++++++++++--
 revision.c |   45 +++++++++++++++++++++++++++++----------------
 2 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/commit.c b/commit.c
index 44bc96d..c7aefbf 100644
--- a/commit.c
+++ b/commit.c
@@ -421,7 +421,8 @@ struct commit *pop_most_recent_commit(struct commit_list **list,
 	return ret;
 }
 
-void clear_commit_marks(struct commit *commit, unsigned int mark)
+static void clear_commit_marks_1(struct commit_list **plist,
+				 struct commit *commit, unsigned int mark)
 {
 	while (commit) {
 		struct commit_list *parents;
@@ -436,12 +437,20 @@ void clear_commit_marks(struct commit *commit, unsigned int mark)
 			return;
 
 		while ((parents = parents->next))
-			clear_commit_marks(parents->item, mark);
+			commit_list_insert(parents->item, plist);
 
 		commit = commit->parents->item;
 	}
 }
 
+void clear_commit_marks(struct commit *commit, unsigned int mark)
+{
+	struct commit_list *list = NULL;
+	commit_list_insert(commit, &list);
+	while (list)
+		clear_commit_marks_1(&list, pop_commit(&list), mark);
+}
+
 void clear_commit_marks_for_object_array(struct object_array *a, unsigned mark)
 {
 	struct object *object;
diff --git a/revision.c b/revision.c
index 8764dde..7cc72fc 100644
--- a/revision.c
+++ b/revision.c
@@ -139,11 +139,32 @@ void mark_tree_uninteresting(struct tree *tree)
 
 void mark_parents_uninteresting(struct commit *commit)
 {
-	struct commit_list *parents = commit->parents;
+	struct commit_list *parents = NULL, *l;
+
+	for (l = commit->parents; l; l = l->next)
+		commit_list_insert(l->item, &parents);
 
 	while (parents) {
 		struct commit *commit = parents->item;
-		if (!(commit->object.flags & UNINTERESTING)) {
+		l = parents;
+		parents = parents->next;
+		free(l);
+
+		while (commit) {
+			/*
+			 * A missing commit is ok iff its parent is marked
+			 * uninteresting.
+			 *
+			 * We just mark such a thing parsed, so that when
+			 * it is popped next time around, we won't be trying
+			 * to parse it and get an error.
+			 */
+			if (!has_sha1_file(commit->object.sha1))
+				commit->object.parsed = 1;
+
+			if (commit->object.flags & UNINTERESTING)
+				break;
+
 			commit->object.flags |= UNINTERESTING;
 
 			/*
@@ -154,21 +175,13 @@ void mark_parents_uninteresting(struct commit *commit)
 			 * wasn't uninteresting), in which case we need
 			 * to mark its parents recursively too..
 			 */
-			if (commit->parents)
-				mark_parents_uninteresting(commit);
-		}
+			if (!commit->parents)
+				break;
 
-		/*
-		 * A missing commit is ok iff its parent is marked
-		 * uninteresting.
-		 *
-		 * We just mark such a thing parsed, so that when
-		 * it is popped next time around, we won't be trying
-		 * to parse it and get an error.
-		 */
-		if (!has_sha1_file(commit->object.sha1))
-			commit->object.parsed = 1;
-		parents = parents->next;
+			for (l = commit->parents->next; l; l = l->next)
+				commit_list_insert(l->item, &parents);
+			commit = commit->parents->item;
+		}
 	}
 }
 
-- 
1.7.8.36.g69ee2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v3 2/3] index-pack: eliminate recursion in find_unresolved_deltas
  2012-01-14 12:19   ` [PATCH v3 " Nguyễn Thái Ngọc Duy
  2012-01-14 12:19     ` [PATCH v3 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
@ 2012-01-14 12:19     ` Nguyễn Thái Ngọc Duy
  2012-01-14 12:19     ` [PATCH v3 3/3] index-pack: eliminate unlimited recursion in get_base_data() Nguyễn Thái Ngọc Duy
  2 siblings, 0 replies; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-01-14 12:19 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Shawn O. Pearce, Nguyễn Thái Ngọc Duy

Current find_unresolved_deltas() links all bases together in a form of
tree, using struct base_data, with prev_base pointer to point to
parent node. Then it traverses down from parent to children in
recursive manner with all base_data allocated on stack.

To eliminate recursion, we simply need to put all on heap
(parse_pack_objects and fix_unresolved_deltas). After that, it's
simple non-recursive depth-first traversal loop. Each node also
maintains its own state (ofs and ref indices) to iterate over all
children nodes.

So we process one node:

 - if it returns a new (child) node (a parent base), we link it to our
   tree, then process the new node.

 - if it returns nothing, the node is done, free it. We go back to
   parent node and resume whatever it's doing.

and do it until we have no nodes to process.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 builtin/index-pack.c |  111 +++++++++++++++++++++++++++++++------------------
 1 files changed, 70 insertions(+), 41 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index af7dc37..38ff03a 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -34,6 +34,8 @@ struct base_data {
 	struct object_entry *obj;
 	void *data;
 	unsigned long size;
+	int ref_first, ref_last;
+	int ofs_first, ofs_last;
 };
 
 /*
@@ -221,6 +223,15 @@ static NORETURN void bad_object(unsigned long offset, const char *format, ...)
 	die("pack has bad object at offset %lu: %s", offset, buf);
 }
 
+static struct base_data *alloc_base_data(void)
+{
+	struct base_data *base = xmalloc(sizeof(struct base_data));
+	memset(base, 0, sizeof(*base));
+	base->ref_last = -1;
+	base->ofs_last = -1;
+	return base;
+}
+
 static void free_base_data(struct base_data *c)
 {
 	if (c->data) {
@@ -553,58 +564,76 @@ static void resolve_delta(struct object_entry *delta_obj,
 	nr_resolved_deltas++;
 }
 
-static void find_unresolved_deltas(struct base_data *base,
-				   struct base_data *prev_base)
+static struct base_data *find_unresolved_deltas_1(struct base_data *base,
+						  struct base_data *prev_base)
 {
-	int i, ref_first, ref_last, ofs_first, ofs_last;
-
-	/*
-	 * This is a recursive function. Those brackets should help reducing
-	 * stack usage by limiting the scope of the delta_base union.
-	 */
-	{
+	if (base->ref_last == -1 && base->ofs_last == -1) {
 		union delta_base base_spec;
 
 		hashcpy(base_spec.sha1, base->obj->idx.sha1);
 		find_delta_children(&base_spec,
-				    &ref_first, &ref_last, OBJ_REF_DELTA);
+				    &base->ref_first, &base->ref_last, OBJ_REF_DELTA);
 
 		memset(&base_spec, 0, sizeof(base_spec));
 		base_spec.offset = base->obj->idx.offset;
 		find_delta_children(&base_spec,
-				    &ofs_first, &ofs_last, OBJ_OFS_DELTA);
-	}
+				    &base->ofs_first, &base->ofs_last, OBJ_OFS_DELTA);
 
-	if (ref_last == -1 && ofs_last == -1) {
-		free(base->data);
-		return;
-	}
+		if (base->ref_last == -1 && base->ofs_last == -1) {
+			free(base->data);
+			return NULL;
+		}
 
-	link_base_data(prev_base, base);
+		link_base_data(prev_base, base);
+	}
 
-	for (i = ref_first; i <= ref_last; i++) {
-		struct object_entry *child = objects + deltas[i].obj_no;
-		struct base_data result;
+	if (base->ref_first <= base->ref_last) {
+		struct object_entry *child = objects + deltas[base->ref_first].obj_no;
+		struct base_data *result = alloc_base_data();
 
 		assert(child->real_type == OBJ_REF_DELTA);
-		resolve_delta(child, base, &result);
-		if (i == ref_last && ofs_last == -1)
+		resolve_delta(child, base, result);
+		if (base->ref_first == base->ref_last && base->ofs_last == -1)
 			free_base_data(base);
-		find_unresolved_deltas(&result, base);
+
+		base->ref_first++;
+		return result;
 	}
 
-	for (i = ofs_first; i <= ofs_last; i++) {
-		struct object_entry *child = objects + deltas[i].obj_no;
-		struct base_data result;
+	if (base->ofs_first <= base->ofs_last) {
+		struct object_entry *child = objects + deltas[base->ofs_first].obj_no;
+		struct base_data *result = alloc_base_data();
 
 		assert(child->real_type == OBJ_OFS_DELTA);
-		resolve_delta(child, base, &result);
-		if (i == ofs_last)
+		resolve_delta(child, base, result);
+		if (base->ofs_first == base->ofs_last)
 			free_base_data(base);
-		find_unresolved_deltas(&result, base);
+
+		base->ofs_first++;
+		return result;
 	}
 
 	unlink_base_data(base);
+	return NULL;
+}
+
+static void find_unresolved_deltas(struct base_data *base)
+{
+	struct base_data *new_base, *prev_base = NULL;
+	for (;;) {
+		new_base = find_unresolved_deltas_1(base, prev_base);
+
+		if (new_base) {
+			prev_base = base;
+			base = new_base;
+		} else {
+			free(base);
+			base = prev_base;
+			if (!base)
+				return;
+			prev_base = base->base;
+		}
+	}
 }
 
 static int compare_delta_entry(const void *a, const void *b)
@@ -684,13 +713,13 @@ static void parse_pack_objects(unsigned char *sha1)
 		progress = start_progress("Resolving deltas", nr_deltas);
 	for (i = 0; i < nr_objects; i++) {
 		struct object_entry *obj = &objects[i];
-		struct base_data base_obj;
+		struct base_data *base_obj = alloc_base_data();
 
 		if (is_delta_type(obj->type))
 			continue;
-		base_obj.obj = obj;
-		base_obj.data = NULL;
-		find_unresolved_deltas(&base_obj, NULL);
+		base_obj->obj = obj;
+		base_obj->data = NULL;
+		find_unresolved_deltas(base_obj);
 		display_progress(progress, nr_resolved_deltas);
 	}
 }
@@ -783,20 +812,20 @@ static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved)
 	for (i = 0; i < n; i++) {
 		struct delta_entry *d = sorted_by_pos[i];
 		enum object_type type;
-		struct base_data base_obj;
+		struct base_data *base_obj = alloc_base_data();
 
 		if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
 			continue;
-		base_obj.data = read_sha1_file(d->base.sha1, &type, &base_obj.size);
-		if (!base_obj.data)
+		base_obj->data = read_sha1_file(d->base.sha1, &type, &base_obj->size);
+		if (!base_obj->data)
 			continue;
 
-		if (check_sha1_signature(d->base.sha1, base_obj.data,
-				base_obj.size, typename(type)))
+		if (check_sha1_signature(d->base.sha1, base_obj->data,
+				base_obj->size, typename(type)))
 			die("local object %s is corrupt", sha1_to_hex(d->base.sha1));
-		base_obj.obj = append_obj_to_pack(f, d->base.sha1,
-					base_obj.data, base_obj.size, type);
-		find_unresolved_deltas(&base_obj, NULL);
+		base_obj->obj = append_obj_to_pack(f, d->base.sha1,
+					base_obj->data, base_obj->size, type);
+		find_unresolved_deltas(base_obj);
 		display_progress(progress, nr_resolved_deltas);
 	}
 	free(sorted_by_pos);
-- 
1.7.8.36.g69ee2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v3 3/3] index-pack: eliminate unlimited recursion in get_base_data()
  2012-01-14 12:19   ` [PATCH v3 " Nguyễn Thái Ngọc Duy
  2012-01-14 12:19     ` [PATCH v3 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
  2012-01-14 12:19     ` [PATCH v3 2/3] index-pack: eliminate recursion in find_unresolved_deltas Nguyễn Thái Ngọc Duy
@ 2012-01-14 12:19     ` Nguyễn Thái Ngọc Duy
  2 siblings, 0 replies; 21+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-01-14 12:19 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Shawn O. Pearce, Nguyễn Thái Ngọc Duy

Revese the order of delta applying so that by the time a delta is
applied, its base is either non-delta or already inflated.
get_base_data() is still recursive, but because base's data is always
ready, the inner get_base_data() call never has any chance to call
itself again.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 builtin/index-pack.c |   53 +++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 38ff03a..dc6a584 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -515,14 +515,52 @@ static int is_delta_type(enum object_type type)
 	return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA);
 }
 
+/*
+ * This function is part of find_unresolved_deltas(). There are two
+ * walkers going in the opposite ways.
+ *
+ * The first one in find_unresolved_deltas() traverses down from
+ * parent node to children, deflating nodes along the way. However,
+ * memory for deflated nodes is limited by delta_base_cache_limit, so
+ * at some point parent node's deflated content may be freed.
+ *
+ * The second walker is this function, which goes from current node up
+ * to top parent if necessary to deflate the node. In normal
+ * situation, its parent node would be already deflated, so it just
+ * needs to apply delta.
+ *
+ * In worst case scenario, parent node is no longer deflated because
+ * we're running out of delta_base_cache_limit, then we need to
+ * re-deflate parents, possibly up to the top base.
+ *
+ * All deflated objecsts here are subject to be freed if we exceed
+ * delta_base_cache_limit, just like in find_unresolved_deltas(), we
+ * just need to make sure the last node is not freed.
+ */
 static void *get_base_data(struct base_data *c)
 {
 	if (!c->data) {
 		struct object_entry *obj = c->obj;
+		struct base_data **delta = NULL;
+		int delta_nr = 0, delta_alloc = 0;
 
-		if (is_delta_type(obj->type)) {
-			void *base = get_base_data(c->base);
-			void *raw = get_data_from_pack(obj);
+		while (is_delta_type(c->obj->type) && !c->data) {
+			ALLOC_GROW(delta, delta_nr + 1, delta_alloc);
+			delta[delta_nr++] = c;
+			c = c->base;
+		}
+		if (!delta_nr) {
+			c->data = get_data_from_pack(obj);
+			c->size = obj->size;
+			base_cache_used += c->size;
+			prune_base_data(c);
+		}
+		for (; delta_nr > 0; delta_nr--) {
+			void *base, *raw;
+			c = delta[delta_nr - 1];
+			obj = c->obj;
+			base = get_base_data(c->base);
+			raw = get_data_from_pack(obj);
 			c->data = patch_delta(
 				base, c->base->size,
 				raw, obj->size,
@@ -530,13 +568,10 @@ static void *get_base_data(struct base_data *c)
 			free(raw);
 			if (!c->data)
 				bad_object(obj->idx.offset, "failed to apply delta");
-		} else {
-			c->data = get_data_from_pack(obj);
-			c->size = obj->size;
+			base_cache_used += c->size;
+			prune_base_data(c);
 		}
-
-		base_cache_used += c->size;
-		prune_base_data(c);
+		free(delta);
 	}
 	return c->data;
 }
-- 
1.7.8.36.g69ee2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 1/3] Eliminate recursion in setting/clearing marks in commit list
  2012-01-14 12:19     ` [PATCH v3 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
@ 2012-01-14 15:23       ` Peter Baumann
  2012-01-15  9:25         ` Nguyen Thai Ngoc Duy
  0 siblings, 1 reply; 21+ messages in thread
From: Peter Baumann @ 2012-01-14 15:23 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy
  Cc: git, Junio C Hamano, Shawn O. Pearce

On Sat, Jan 14, 2012 at 07:19:53PM +0700, Nguyễn Thái Ngọc Duy wrote:
> Recursion in a DAG is generally a bad idea because it could be very
> deep. Be defensive and avoid recursion in mark_parents_uninteresting()
> and clear_commit_marks().
> 
> mark_parents_uninteresting() learns a trick from clear_commit_marks()
> to avoid malloc() in (dorminant) single-parent case.
                        ^^^^^^^^^

I think you ment dominant here.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 1/3] Eliminate recursion in setting/clearing marks in commit list
  2012-01-14 15:23       ` Peter Baumann
@ 2012-01-15  9:25         ` Nguyen Thai Ngoc Duy
  0 siblings, 0 replies; 21+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-01-15  9:25 UTC (permalink / raw)
  To: Peter Baumann; +Cc: git, Junio C Hamano, Shawn O. Pearce

2012/1/14 Peter Baumann <waste.manager@gmx.de>:
> On Sat, Jan 14, 2012 at 07:19:53PM +0700, Nguyễn Thái Ngọc Duy wrote:
>> Recursion in a DAG is generally a bad idea because it could be very
>> deep. Be defensive and avoid recursion in mark_parents_uninteresting()
>> and clear_commit_marks().
>>
>> mark_parents_uninteresting() learns a trick from clear_commit_marks()
>> to avoid malloc() in (dorminant) single-parent case.
>                        ^^^^^^^^^
>
> I think you ment dominant here.

Yes I meant dominant.
-- 
Duy

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2012-01-15  9:26 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-12-26 12:04 [PATCH 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
2011-12-26 12:04 ` [PATCH 2/3] index-pack: eliminate recursion in find_unresolved_deltas Nguyễn Thái Ngọc Duy
2011-12-26 12:04 ` [PATCH 3/3] index-pack: eliminate unlimited recursion in get_delta_base() Nguyễn Thái Ngọc Duy
2012-01-09  3:59 ` [PATCH v2 0/3] nd/index-pack-no-recurse Nguyễn Thái Ngọc Duy
2012-01-09 19:30   ` Junio C Hamano
2012-01-14 12:19   ` [PATCH v3 " Nguyễn Thái Ngọc Duy
2012-01-14 12:19     ` [PATCH v3 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
2012-01-14 15:23       ` Peter Baumann
2012-01-15  9:25         ` Nguyen Thai Ngoc Duy
2012-01-14 12:19     ` [PATCH v3 2/3] index-pack: eliminate recursion in find_unresolved_deltas Nguyễn Thái Ngọc Duy
2012-01-14 12:19     ` [PATCH v3 3/3] index-pack: eliminate unlimited recursion in get_base_data() Nguyễn Thái Ngọc Duy
2012-01-09  3:59 ` [PATCH v2 1/3] Eliminate recursion in setting/clearing marks in commit list Nguyễn Thái Ngọc Duy
2012-01-09 22:09   ` Junio C Hamano
2012-01-09  3:59 ` [PATCH v2 2/3] index-pack: eliminate recursion in find_unresolved_deltas Nguyễn Thái Ngọc Duy
2012-01-09 22:10   ` Junio C Hamano
2012-01-10 12:23     ` Nguyen Thai Ngoc Duy
2012-01-12 20:32       ` Junio C Hamano
2012-01-09  3:59 ` [PATCH v2 3/3] index-pack: eliminate unlimited recursion in get_delta_base() Nguyễn Thái Ngọc Duy
2012-01-09 22:51   ` Junio C Hamano
2012-01-10 13:03     ` Nguyen Thai Ngoc Duy
2012-01-10 13:16       ` Nguyen Thai Ngoc Duy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).