[PATCH] git tag --contains : avoid stack overflow

* [PATCH] git tag --contains : avoid stack overflow
@ 2014-04-16 14:15 Stepan Kasal
  2014-04-16 15:46 ` Jeff King
  0 siblings, 1 reply; 24+ messages in thread
From: Stepan Kasal @ 2014-04-16 14:15 UTC (permalink / raw)
  To: git

From: Jean-Jacques Lafay <jeanjacques.lafay@gmail.com>
Date: Sat, 10 Nov 2012 18:36:10 +0100

In large repos, the recursion implementation of contains(commit,
commit_list) may result in a stack overflow. Replace the recursion with
a loop to fix it.

This problem is more apparent on Windows than on Linux, where the stack
is more limited by default.

See also this thread on the msysGit list:

	https://groups.google.com/d/topic/msysgit/FqT6boJrb2g/discussion

[jes: re-written to imitate the original recursion more closely]

Thomas Braun pointed out several documentation shortcomings.

Signed-off-by: Jean-Jacques Lafay <jeanjacques.lafay@gmail.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Tested-by: Stepan Kasal <kasal@ucw.cz>
Thanks-to: Thomas Braun <thomas.braun@byte-physics.de>
---
 builtin/tag.c  | 81 ++++++++++++++++++++++++++++++++++++++++++++++++----------
 t/t7004-tag.sh | 21 +++++++++++++++
 2 files changed, 88 insertions(+), 14 deletions(-)

diff --git a/builtin/tag.c b/builtin/tag.c
index 74d3780..79c8c28 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -73,11 +73,13 @@ static int in_commit_list(const struct commit_list *want, struct commit *c)
 	return 0;
 }
 
-static int contains_recurse(struct commit *candidate,
+/*
+ * Test whether the candidate or one of its parents is contained in the list.
+ * Do not recurse to find out, though, but return -1 if inconclusive.
+ */
+static int contains_test(struct commit *candidate,
 			    const struct commit_list *want)
 {
-	struct commit_list *p;
-
 	/* was it previously marked as containing a want commit? */
 	if (candidate->object.flags & TMP_MARK)
 		return 1;
@@ -85,26 +87,77 @@ static int contains_recurse(struct commit *candidate,
 	if (candidate->object.flags & UNINTERESTING)
 		return 0;
 	/* or are we it? */
-	if (in_commit_list(want, candidate))
+	if (in_commit_list(want, candidate)) {
+		candidate->object.flags |= TMP_MARK;
 		return 1;
+	}
 
 	if (parse_commit(candidate) < 0)
 		return 0;
 
-	/* Otherwise recurse and mark ourselves for future traversals. */
-	for (p = candidate->parents; p; p = p->next) {
-		if (contains_recurse(p->item, want)) {
-			candidate->object.flags |= TMP_MARK;
-			return 1;
-		}
-	}
-	candidate->object.flags |= UNINTERESTING;
-	return 0;
+	return -1;
+}
+
+/*
+ * Mimicking the real stack, this stack lives on the heap, avoiding stack
+ * overflows.
+ *
+ * At each recursion step, the stack items points to the commits whose
+ * ancestors are to be inspected.
+ */
+struct stack {
+	int nr, alloc;
+	struct stack_entry {
+		struct commit *commit;
+		struct commit_list *parents;
+	} *stack;
+};
+
+static void push_to_stack(struct commit *candidate, struct stack *stack)
+{
+	int index = stack->nr++;
+	ALLOC_GROW(stack->stack, stack->nr, stack->alloc);
+	stack->stack[index].commit = candidate;
+	stack->stack[index].parents = candidate->parents;
 }
 
 static int contains(struct commit *candidate, const struct commit_list *want)
 {
-	return contains_recurse(candidate, want);
+	struct stack stack = { 0, 0, NULL };
+	int result = contains_test(candidate, want);
+
+	if (result >= 0)
+		return result;
+
+	push_to_stack(candidate, &stack);
+	while (stack.nr) {
+		struct stack_entry *entry = &stack.stack[stack.nr - 1];
+		struct commit *commit = entry->commit;
+		struct commit_list *parents = entry->parents;
+
+		if (!parents) {
+			commit->object.flags = UNINTERESTING;
+			stack.nr--;
+		}
+		/*
+		 * If we just popped the stack, parents->item has been marked,
+		 * therefore contains_test will return a meaningful 0 or 1.
+		 */
+		else switch (contains_test(parents->item, want)) {
+		case 1:
+			commit->object.flags |= TMP_MARK;
+			stack.nr--;
+			break;
+		case 0:
+			entry->parents = parents->next;
+			break;
+		default:
+			push_to_stack(parents->item, &stack);
+			break;
+		}
+	}
+	free(stack.stack);
+	return contains_test(candidate, want);
 }
 
 static void show_tag_lines(const unsigned char *sha1, int lines)
diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
index c8d6e9f..edaff13 100755
--- a/t/t7004-tag.sh
+++ b/t/t7004-tag.sh
@@ -1380,4 +1380,25 @@ test_expect_success 'multiple --points-at are OR-ed together' '
 	test_cmp expect actual
 '
 
+>expect
+# ulimit is a bash builtin; we can rely on that in MinGW, but nowhere else
+test_expect_success MINGW '--contains works in a deep repo' '
+	ulimit -s 64
+	i=1 &&
+	while test $i -lt 1000
+	do
+		echo "commit refs/heads/master
+committer A U Thor <author@example.com> $((1000000000 + $i * 100)) +0200
+data <<EOF
+commit #$i
+EOF"
+		test $i = 1 && echo "from refs/heads/master^0"
+		i=$(($i + 1))
+	done | git fast-import &&
+	git checkout master &&
+	git tag far-far-away HEAD^ &&
+	git tag --contains HEAD >actual &&
+	test_cmp expect actual
+'
+
 test_done
-- 
1.9.2.msysgit.0.154.g978f18d

^ permalink raw reply related	[flat|nested] 24+ messages in thread