git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] git tag --contains : avoid stack overflow
@ 2014-04-16 14:15 Stepan Kasal
  2014-04-16 15:46 ` Jeff King
  0 siblings, 1 reply; 31+ messages in thread
From: Stepan Kasal @ 2014-04-16 14:15 UTC (permalink / raw)
  To: git

From: Jean-Jacques Lafay <jeanjacques.lafay@gmail.com>
Date: Sat, 10 Nov 2012 18:36:10 +0100

In large repos, the recursion implementation of contains(commit,
commit_list) may result in a stack overflow. Replace the recursion with
a loop to fix it.

This problem is more apparent on Windows than on Linux, where the stack
is more limited by default.

See also this thread on the msysGit list:

	https://groups.google.com/d/topic/msysgit/FqT6boJrb2g/discussion

[jes: re-written to imitate the original recursion more closely]

Thomas Braun pointed out several documentation shortcomings.

Signed-off-by: Jean-Jacques Lafay <jeanjacques.lafay@gmail.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Tested-by: Stepan Kasal <kasal@ucw.cz>
Thanks-to: Thomas Braun <thomas.braun@byte-physics.de>
---
 builtin/tag.c  | 81 ++++++++++++++++++++++++++++++++++++++++++++++++----------
 t/t7004-tag.sh | 21 +++++++++++++++
 2 files changed, 88 insertions(+), 14 deletions(-)

diff --git a/builtin/tag.c b/builtin/tag.c
index 74d3780..79c8c28 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -73,11 +73,13 @@ static int in_commit_list(const struct commit_list *want, struct commit *c)
 	return 0;
 }
 
-static int contains_recurse(struct commit *candidate,
+/*
+ * Test whether the candidate or one of its parents is contained in the list.
+ * Do not recurse to find out, though, but return -1 if inconclusive.
+ */
+static int contains_test(struct commit *candidate,
 			    const struct commit_list *want)
 {
-	struct commit_list *p;
-
 	/* was it previously marked as containing a want commit? */
 	if (candidate->object.flags & TMP_MARK)
 		return 1;
@@ -85,26 +87,77 @@ static int contains_recurse(struct commit *candidate,
 	if (candidate->object.flags & UNINTERESTING)
 		return 0;
 	/* or are we it? */
-	if (in_commit_list(want, candidate))
+	if (in_commit_list(want, candidate)) {
+		candidate->object.flags |= TMP_MARK;
 		return 1;
+	}
 
 	if (parse_commit(candidate) < 0)
 		return 0;
 
-	/* Otherwise recurse and mark ourselves for future traversals. */
-	for (p = candidate->parents; p; p = p->next) {
-		if (contains_recurse(p->item, want)) {
-			candidate->object.flags |= TMP_MARK;
-			return 1;
-		}
-	}
-	candidate->object.flags |= UNINTERESTING;
-	return 0;
+	return -1;
+}
+
+/*
+ * Mimicking the real stack, this stack lives on the heap, avoiding stack
+ * overflows.
+ *
+ * At each recursion step, the stack items points to the commits whose
+ * ancestors are to be inspected.
+ */
+struct stack {
+	int nr, alloc;
+	struct stack_entry {
+		struct commit *commit;
+		struct commit_list *parents;
+	} *stack;
+};
+
+static void push_to_stack(struct commit *candidate, struct stack *stack)
+{
+	int index = stack->nr++;
+	ALLOC_GROW(stack->stack, stack->nr, stack->alloc);
+	stack->stack[index].commit = candidate;
+	stack->stack[index].parents = candidate->parents;
 }
 
 static int contains(struct commit *candidate, const struct commit_list *want)
 {
-	return contains_recurse(candidate, want);
+	struct stack stack = { 0, 0, NULL };
+	int result = contains_test(candidate, want);
+
+	if (result >= 0)
+		return result;
+
+	push_to_stack(candidate, &stack);
+	while (stack.nr) {
+		struct stack_entry *entry = &stack.stack[stack.nr - 1];
+		struct commit *commit = entry->commit;
+		struct commit_list *parents = entry->parents;
+
+		if (!parents) {
+			commit->object.flags = UNINTERESTING;
+			stack.nr--;
+		}
+		/*
+		 * If we just popped the stack, parents->item has been marked,
+		 * therefore contains_test will return a meaningful 0 or 1.
+		 */
+		else switch (contains_test(parents->item, want)) {
+		case 1:
+			commit->object.flags |= TMP_MARK;
+			stack.nr--;
+			break;
+		case 0:
+			entry->parents = parents->next;
+			break;
+		default:
+			push_to_stack(parents->item, &stack);
+			break;
+		}
+	}
+	free(stack.stack);
+	return contains_test(candidate, want);
 }
 
 static void show_tag_lines(const unsigned char *sha1, int lines)
diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
index c8d6e9f..edaff13 100755
--- a/t/t7004-tag.sh
+++ b/t/t7004-tag.sh
@@ -1380,4 +1380,25 @@ test_expect_success 'multiple --points-at are OR-ed together' '
 	test_cmp expect actual
 '
 
+>expect
+# ulimit is a bash builtin; we can rely on that in MinGW, but nowhere else
+test_expect_success MINGW '--contains works in a deep repo' '
+	ulimit -s 64
+	i=1 &&
+	while test $i -lt 1000
+	do
+		echo "commit refs/heads/master
+committer A U Thor <author@example.com> $((1000000000 + $i * 100)) +0200
+data <<EOF
+commit #$i
+EOF"
+		test $i = 1 && echo "from refs/heads/master^0"
+		i=$(($i + 1))
+	done | git fast-import &&
+	git checkout master &&
+	git tag far-far-away HEAD^ &&
+	git tag --contains HEAD >actual &&
+	test_cmp expect actual
+'
+
 test_done
-- 
1.9.2.msysgit.0.154.g978f18d

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH] git tag --contains : avoid stack overflow
  2014-04-16 14:15 [PATCH] git tag --contains : avoid stack overflow Stepan Kasal
@ 2014-04-16 15:46 ` Jeff King
  2014-04-17 17:31   ` Johannes Schindelin
  0 siblings, 1 reply; 31+ messages in thread
From: Jeff King @ 2014-04-16 15:46 UTC (permalink / raw)
  To: git

On Wed, Apr 16, 2014 at 04:15:19PM +0200, Stepan Kasal wrote:

> From: Jean-Jacques Lafay <jeanjacques.lafay@gmail.com>
> Date: Sat, 10 Nov 2012 18:36:10 +0100
> 
> In large repos, the recursion implementation of contains(commit,
> commit_list) may result in a stack overflow. Replace the recursion with
> a loop to fix it.
> 
> This problem is more apparent on Windows than on Linux, where the stack
> is more limited by default.

I think this is a good thing to be doing, and it looks mostly good to
me. A few comments:

> -static int contains_recurse(struct commit *candidate,
> +/*
> + * Test whether the candidate or one of its parents is contained in the list.
> + * Do not recurse to find out, though, but return -1 if inconclusive.
> + */
> +static int contains_test(struct commit *candidate,
>  			    const struct commit_list *want)

Can we turn this return value into

  enum {
	CONTAINS_UNKNOWN = -1,
	CONTAINS_NO = 0,
	CONTAINS_YES = 1,
  } contains_result;

to make the code a little more self-documenting?

>  static int contains(struct commit *candidate, const struct commit_list *want)
>  {
> -	return contains_recurse(candidate, want);
> +	struct stack stack = { 0, 0, NULL };
> +	int result = contains_test(candidate, want);
> +
> +	if (result >= 0)
> +		return result;

Then this can become:

  if (result != CONTAINS_UNKNOWN)
	return result;

> +		if (!parents) {
> +			commit->object.flags = UNINTERESTING;
> +			stack.nr--;
> +		}

Shouldn't this be "|=" when setting the flag?

> +		/*
> +		 * If we just popped the stack, parents->item has been marked,
> +		 * therefore contains_test will return a meaningful 0 or 1.
> +		 */
> +		else switch (contains_test(parents->item, want)) {
> +		case 1:
> +			commit->object.flags |= TMP_MARK;
> +			stack.nr--;
> +			break;
> +		case 0:
> +			entry->parents = parents->next;
> +			break;
> +		default:
> +			push_to_stack(parents->item, &stack);
> +			break;
> +		}

And if we have an enum, this switch() becomes more readable (the
"default" here threw me off initially, because it is actually just
looking for "-1").

> +>expect
> +# ulimit is a bash builtin; we can rely on that in MinGW, but nowhere else
> +test_expect_success MINGW '--contains works in a deep repo' '
> +	ulimit -s 64

It would be nice to test this on Linux.

Can we do something like:

  test_lazy_prereq BASH 'bash --version'

  test_expect_success BASH '--contains works in a deep repo' '
	... setup repo ...
	bash -c "ulimit -s 64 && git tag --contains HEAD" >actual &&
	test_cmp expect actual
  '

As a bonus, then our "ulimit" call does not pollute the environment of
subsequent tests.

-Peff

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] git tag --contains : avoid stack overflow
  2014-04-16 15:46 ` Jeff King
@ 2014-04-17 17:31   ` Johannes Schindelin
  2014-04-17 21:32     ` Jeff King
  0 siblings, 1 reply; 31+ messages in thread
From: Johannes Schindelin @ 2014-04-17 17:31 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Stepan Kasal, Jean-Jacques Lafay

Hi Peff,

On Wed, 16 Apr 2014, Jeff King wrote:

> On Wed, Apr 16, 2014 at 04:15:19PM +0200, Stepan Kasal wrote:
> 
> > From: Jean-Jacques Lafay at Sat, 10 Nov 2012 18:36:10 +0100
> > 
> > In large repos, the recursion implementation of contains(commit,
> > commit_list) may result in a stack overflow. Replace the recursion
> > with a loop to fix it.
> > 
> > This problem is more apparent on Windows than on Linux, where the
> > stack is more limited by default.
> 
> I think this is a good thing to be doing, and it looks mostly good to
> me. A few comments:
> 
> > -static int contains_recurse(struct commit *candidate,
> > +/*
> > + * Test whether the candidate or one of its parents is contained in the list.
> > + * Do not recurse to find out, though, but return -1 if inconclusive.
> > + */
> > +static int contains_test(struct commit *candidate,
> >  			    const struct commit_list *want)
> 
> Can we turn this return value into
> 
>   enum {
> 	CONTAINS_UNKNOWN = -1,
> 	CONTAINS_NO = 0,
> 	CONTAINS_YES = 1,
>   } contains_result;
> 
> to make the code a little more self-documenting?

Good idea!

> [... detailed instructions what changes are implied by the enum ...]
> 
> > +>expect
> > +# ulimit is a bash builtin; we can rely on that in MinGW, but nowhere else
> > +test_expect_success MINGW '--contains works in a deep repo' '
> > +	ulimit -s 64
> 
> It would be nice to test this on Linux.
> 
> Can we do something like:
> 
>   test_lazy_prereq BASH 'bash --version'
> 
>   test_expect_success BASH '--contains works in a deep repo' '
> 	... setup repo ...
> 	bash -c "ulimit -s 64 && git tag --contains HEAD" >actual &&
> 	test_cmp expect actual
>   '
> 
> As a bonus, then our "ulimit" call does not pollute the environment of
> subsequent tests.

That's a very good idea! We mulled it over a bit and did not come up with
this excellent solution.

Please see https://github.com/msysgit/git/c63d196 for the fixup, and
https://github.com/msysgit/git/compare/tag-contains%5E...tag-contains for
the updated patch.

Thanks,
Dscho

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] git tag --contains : avoid stack overflow
  2014-04-17 17:31   ` Johannes Schindelin
@ 2014-04-17 21:32     ` Jeff King
  2014-04-17 21:52       ` Johannes Schindelin
  0 siblings, 1 reply; 31+ messages in thread
From: Jeff King @ 2014-04-17 21:32 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Stepan Kasal, Jean-Jacques Lafay

On Thu, Apr 17, 2014 at 07:31:54PM +0200, Johannes Schindelin wrote:

> > 	bash -c "ulimit -s 64 && git tag --contains HEAD" >actual &&
> [...]
> Please see https://github.com/msysgit/git/c63d196 for the fixup, and
> https://github.com/msysgit/git/compare/tag-contains%5E...tag-contains for
> the updated patch.

I tried running the test on my Linux box, but it doesn't fail with the
existing recursive code. So I tried a few different stack sizes, like:

  for i in `seq 1 64`; do
    bash -c "
      ulimit -s $i &&
      ../../git tag --contains HEAD ||
      echo fail $i"
  done

The results are strangely non-deterministic, but with -O0, we generally
die reliably below about 60. With -O2, though, it's more like 43. We
can't go _too_ low here, though, as lots of things start breaking around
32.

If we instead bump the size of the history to 2000 commits, then I
reliably fail with a 64k stack (even with -O2, it needs around 80k).

Of course those numbers are all black magic, and are going to vary based
on the system, the compiler, settings, etc. My system is 64-bit, and the
current code needs at least 3 pointers per recursive invocation. So
we're spending ~46K on those variables, not counting any calling
convention overhead (and presumably we at least need a function return
pointer there). So a 32-bit system might actually get by, as it would
need half as much.

So we can bump the depth further; probably 4000 is enough for any system
to fail with a 64k stack. The deeper we make it, the longer it takes to
run the test, though. At 4000, my machine seems to take about 300ms to
run it. That's may be OK.

-Peff

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] git tag --contains : avoid stack overflow
  2014-04-17 21:32     ` Jeff King
@ 2014-04-17 21:52       ` Johannes Schindelin
  2014-04-17 21:58         ` Jeff King
  0 siblings, 1 reply; 31+ messages in thread
From: Johannes Schindelin @ 2014-04-17 21:52 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Stepan Kasal, Jean-Jacques Lafay

Hi Peff,

On Thu, 17 Apr 2014, Jeff King wrote:

> On Thu, Apr 17, 2014 at 07:31:54PM +0200, Johannes Schindelin wrote:
> 
> > > 	bash -c "ulimit -s 64 && git tag --contains HEAD" >actual &&
> > [...]
> > Please see https://github.com/msysgit/git/c63d196 for the fixup, and
> > https://github.com/msysgit/git/compare/tag-contains%5E...tag-contains for
> > the updated patch.
> 
> I tried running the test on my Linux box, but it doesn't fail with the
> existing recursive code.

I cannot recall how I came to choose 64, but I *think* I only tested on
Windows, and I *think* I reduced the number of tags in order to make
things faster (Windows is *unbearably* slow with spawn-happy programs such
as Git's tests -- literally every single line in a shell script tests the
patience of this developer, running the complete test suite with 15
parallel threads takes several hours, no kidding).

> The results are strangely non-deterministic, but with -O0, we generally
> die reliably below about 60. With -O2, though, it's more like 43. We
> can't go _too_ low here, though, as lots of things start breaking around
> 32.

How about using 40, then? I am more interested in reducing the runtime
than reducing the number of false negatives. The problem will be exercised
enough on Windows, but not if the test suite becomes even slower than it
already is.

Ciao,
Johannes

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] git tag --contains : avoid stack overflow
  2014-04-17 21:52       ` Johannes Schindelin
@ 2014-04-17 21:58         ` Jeff King
  2014-04-23  7:53           ` [PATCH v2] " Stepan Kasal
  0 siblings, 1 reply; 31+ messages in thread
From: Jeff King @ 2014-04-17 21:58 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Stepan Kasal, Jean-Jacques Lafay

On Thu, Apr 17, 2014 at 11:52:56PM +0200, Johannes Schindelin wrote:

> > I tried running the test on my Linux box, but it doesn't fail with the
> > existing recursive code.
> 
> I cannot recall how I came to choose 64, but I *think* I only tested on
> Windows, and I *think* I reduced the number of tags in order to make
> things faster (Windows is *unbearably* slow with spawn-happy programs such
> as Git's tests -- literally every single line in a shell script tests the
> patience of this developer, running the complete test suite with 15
> parallel threads takes several hours, no kidding).

Yeah, I figured speed had something to do with it. However, since you
are using a bash loop to generate the input (and it should all be done
as builtins in bash, I think), and fast-import to create the objects, I
don't think bumping it will actually increase your process count.

> > The results are strangely non-deterministic, but with -O0, we generally
> > die reliably below about 60. With -O2, though, it's more like 43. We
> > can't go _too_ low here, though, as lots of things start breaking around
> > 32.
> 
> How about using 40, then? I am more interested in reducing the runtime
> than reducing the number of false negatives. The problem will be exercised
> enough on Windows, but not if the test suite becomes even slower than it
> already is.

I'm OK with doing that. My biggest concern is that it will cause false
positives on systems that are hungrier for stack space, but we can
address that if it happens.

-Peff

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v2] git tag --contains : avoid stack overflow
  2014-04-17 21:58         ` Jeff King
@ 2014-04-23  7:53           ` Stepan Kasal
  2014-04-23 14:28             ` Johannes Schindelin
                               ` (2 more replies)
  0 siblings, 3 replies; 31+ messages in thread
From: Stepan Kasal @ 2014-04-23  7:53 UTC (permalink / raw)
  To: Jeff King; +Cc: Johannes Schindelin, git, Jean-Jacques Lafay

From: Jean-Jacques Lafay <jeanjacques.lafay@gmail.com>

In large repos, the recursion implementation of contains(commit,
commit_list) may result in a stack overflow. Replace the recursion with
a loop to fix it.

This problem is more apparent on Windows than on Linux, where the stack
is more limited by default.

See also this thread on the msysGit list:

	https://groups.google.com/d/topic/msysgit/FqT6boJrb2g/discussion

[jes: re-written to imitate the original recursion more closely]

Thomas Braun pointed out several documentation shortcomings.

Tests are run only if ulimit -s is available.  This means they cannot
be run on Windows.

Signed-off-by: Jean-Jacques Lafay <jeanjacques.lafay@gmail.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Tested-by: Stepan Kasal <kasal@ucw.cz>
---
Hello,
I have found out that "ulimit -s" does not work on Windows.
Adding this as a prerequisite, we will skip the test there.

On Thu, Apr 17, 2014 at 05:32:38PM -0400, Jeff King wrote:
> So we can bump the depth further; probably 4000 is enough for any system
> to fail with a 64k stack. The deeper we make it, the longer it takes to
> run the test, though. At 4000, my machine seems to take about 300ms to
> run it. That's may be OK.

Consequently, we can accept this proposal.

Stepan Kasal

 builtin/tag.c  | 90 ++++++++++++++++++++++++++++++++++++++++++++++++----------
 t/t7004-tag.sh | 23 +++++++++++++++
 2 files changed, 98 insertions(+), 15 deletions(-)

diff --git a/builtin/tag.c b/builtin/tag.c
index 6c7c6bd..f344002 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -80,11 +80,19 @@ static int in_commit_list(const struct commit_list *want, struct commit *c)
 	return 0;
 }
 
-static int contains_recurse(struct commit *candidate,
+enum contains_result {
+	CONTAINS_UNKNOWN = -1,
+	CONTAINS_NO = 0,
+	CONTAINS_YES = 1,
+};
+
+/*
+ * Test whether the candidate or one of its parents is contained in the list.
+ * Do not recurse to find out, though, but return -1 if inconclusive.
+ */
+static enum contains_result contains_test(struct commit *candidate,
 			    const struct commit_list *want)
 {
-	struct commit_list *p;
-
 	/* was it previously marked as containing a want commit? */
 	if (candidate->object.flags & TMP_MARK)
 		return 1;
@@ -92,26 +100,78 @@ static int contains_recurse(struct commit *candidate,
 	if (candidate->object.flags & UNINTERESTING)
 		return 0;
 	/* or are we it? */
-	if (in_commit_list(want, candidate))
+	if (in_commit_list(want, candidate)) {
+		candidate->object.flags |= TMP_MARK;
 		return 1;
+	}
 
 	if (parse_commit(candidate) < 0)
 		return 0;
 
-	/* Otherwise recurse and mark ourselves for future traversals. */
-	for (p = candidate->parents; p; p = p->next) {
-		if (contains_recurse(p->item, want)) {
-			candidate->object.flags |= TMP_MARK;
-			return 1;
-		}
-	}
-	candidate->object.flags |= UNINTERESTING;
-	return 0;
+	return -1;
 }
 
-static int contains(struct commit *candidate, const struct commit_list *want)
+/*
+ * Mimicking the real stack, this stack lives on the heap, avoiding stack
+ * overflows.
+ *
+ * At each recursion step, the stack items points to the commits whose
+ * ancestors are to be inspected.
+ */
+struct stack {
+	int nr, alloc;
+	struct stack_entry {
+		struct commit *commit;
+		struct commit_list *parents;
+	} *stack;
+};
+
+static void push_to_stack(struct commit *candidate, struct stack *stack)
+{
+	int index = stack->nr++;
+	ALLOC_GROW(stack->stack, stack->nr, stack->alloc);
+	stack->stack[index].commit = candidate;
+	stack->stack[index].parents = candidate->parents;
+}
+
+static enum contains_result contains(struct commit *candidate,
+		const struct commit_list *want)
 {
-	return contains_recurse(candidate, want);
+	struct stack stack = { 0, 0, NULL };
+	int result = contains_test(candidate, want);
+
+	if (result != CONTAINS_UNKNOWN)
+		return result;
+
+	push_to_stack(candidate, &stack);
+	while (stack.nr) {
+		struct stack_entry *entry = &stack.stack[stack.nr - 1];
+		struct commit *commit = entry->commit;
+		struct commit_list *parents = entry->parents;
+
+		if (!parents) {
+			commit->object.flags |= UNINTERESTING;
+			stack.nr--;
+		}
+		/*
+		 * If we just popped the stack, parents->item has been marked,
+		 * therefore contains_test will return a meaningful 0 or 1.
+		 */
+		else switch (contains_test(parents->item, want)) {
+		case CONTAINS_YES:
+			commit->object.flags |= TMP_MARK;
+			stack.nr--;
+			break;
+		case CONTAINS_NO:
+			entry->parents = parents->next;
+			break;
+		case CONTAINS_UNKNOWN:
+			push_to_stack(parents->item, &stack);
+			break;
+		}
+	}
+	free(stack.stack);
+	return contains_test(candidate, want);
 }
 
 static void show_tag_lines(const unsigned char *sha1, int lines)
diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
index 143a8ea..db82f6d 100755
--- a/t/t7004-tag.sh
+++ b/t/t7004-tag.sh
@@ -1423,4 +1423,27 @@ EOF
 	test_cmp expect actual
 '
 
+ulimit_stack="ulimit -s 64"
+test_lazy_prereq ULIMIT 'bash -c "'"$ulimit_stack"'"'
+
+>expect
+# we require bash and ulimit, this excludes Windows
+test_expect_success ULIMIT '--contains works in a deep repo' '
+	i=1 &&
+	while test $i -lt 4000
+	do
+		echo "commit refs/heads/master
+committer A U Thor <author@example.com> $((1000000000 + $i * 100)) +0200
+data <<EOF
+commit #$i
+EOF"
+		test $i = 1 && echo "from refs/heads/master^0"
+		i=$(($i + 1))
+	done | git fast-import &&
+	git checkout master &&
+	git tag far-far-away HEAD^ &&
+	bash -c "'"$ulimit_stack"' && git tag --contains HEAD >actual" &&
+	test_cmp expect actual
+'
+
 test_done
-- 
1.9.2.msysgit.0.158.g49754fb

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] git tag --contains : avoid stack overflow
  2014-04-23  7:53           ` [PATCH v2] " Stepan Kasal
@ 2014-04-23 14:28             ` Johannes Schindelin
  2014-04-23 15:45               ` Stepan Kasal
  2014-04-23 19:12             ` Junio C Hamano
  2014-04-23 19:17             ` Jeff King
  2 siblings, 1 reply; 31+ messages in thread
From: Johannes Schindelin @ 2014-04-23 14:28 UTC (permalink / raw)
  To: Stepan Kasal; +Cc: Jeff King, git, Jean-Jacques Lafay

Hi,

On Wed, 23 Apr 2014, Stepan Kasal wrote:

> I have found out that "ulimit -s" does not work on Windows.
> Adding this as a prerequisite, we will skip the test there.

The interdiff can be seen here:

	https://github.com/msysgit/git/commit/c68e27d5

Ciao,
Johannes

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] git tag --contains : avoid stack overflow
  2014-04-23 14:28             ` Johannes Schindelin
@ 2014-04-23 15:45               ` Stepan Kasal
  0 siblings, 0 replies; 31+ messages in thread
From: Stepan Kasal @ 2014-04-23 15:45 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Jeff King, git, Jean-Jacques Lafay

Hello,

On Wed, Apr 23, 2014 at 04:28:39PM +0200, Johannes Schindelin wrote:
> The interdiff can be seen here:
> 	https://github.com/msysgit/git/commit/c68e27d5

not exatly, is also changes the number of commits in the "deep repo"
from 1000 to 4000, as peff proposed.

Stepan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] git tag --contains : avoid stack overflow
  2014-04-23  7:53           ` [PATCH v2] " Stepan Kasal
  2014-04-23 14:28             ` Johannes Schindelin
@ 2014-04-23 19:12             ` Junio C Hamano
  2014-04-23 19:16               ` Jeff King
  2014-04-23 19:59               ` [PATCH v2] git tag --contains : " Stepan Kasal
  2014-04-23 19:17             ` Jeff King
  2 siblings, 2 replies; 31+ messages in thread
From: Junio C Hamano @ 2014-04-23 19:12 UTC (permalink / raw)
  To: Stepan Kasal; +Cc: Jeff King, Johannes Schindelin, git, Jean-Jacques Lafay

Stepan Kasal <kasal@ucw.cz> writes:

[Administrivia: please refrain from using Mail-Followup-To to
deflect an attempt to directly respond to you; it will waste time of
other people while it may be saving your time].

> From: Jean-Jacques Lafay <jeanjacques.lafay@gmail.com>
>
> In large repos, the recursion implementation of contains(commit,
> commit_list) may result in a stack overflow. Replace the recursion with
> a loop to fix it.
>
> This problem is more apparent on Windows than on Linux, where the stack
> is more limited by default.
>
> See also this thread on the msysGit list:
>
> 	https://groups.google.com/d/topic/msysgit/FqT6boJrb2g/discussion
>
> [jes: re-written to imitate the original recursion more closely]
>
> Thomas Braun pointed out several documentation shortcomings.
>
> Tests are run only if ulimit -s is available.  This means they cannot
> be run on Windows.
>
> Signed-off-by: Jean-Jacques Lafay <jeanjacques.lafay@gmail.com>
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> Tested-by: Stepan Kasal <kasal@ucw.cz>

Thanks.

> diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
> index 143a8ea..db82f6d 100755
> --- a/t/t7004-tag.sh
> +++ b/t/t7004-tag.sh
> @@ -1423,4 +1423,27 @@ EOF
>  	test_cmp expect actual
>  '
>  
> +ulimit_stack="ulimit -s 64"
> +test_lazy_prereq ULIMIT 'bash -c "'"$ulimit_stack"'"'

With this implementaion, ULIMIT implies bash, and we use bash that
appears on user's PATH that may not be the one the user chose to run
git with.  Can't we fix both of them by using $SHELL_PATH?

> +>expect

Move this inside test_expect_success?

> +# we require bash and ulimit, this excludes Windows
> +test_expect_success ULIMIT '--contains works in a deep repo' '
> +	i=1 &&
> +	while test $i -lt 4000
> +	do
> +		echo "commit refs/heads/master
> +committer A U Thor <author@example.com> $((1000000000 + $i * 100)) +0200
> +data <<EOF
> +commit #$i
> +EOF"
> +		test $i = 1 && echo "from refs/heads/master^0"
> +		i=$(($i + 1))
> +	done | git fast-import &&
> +	git checkout master &&
> +	git tag far-far-away HEAD^ &&
> +	bash -c "'"$ulimit_stack"' && git tag --contains HEAD >actual" &&

So this runs a separate "bash", the only thing which does is to run
a small script that gives a small stack to itself and exit, and then
run "git tag" in the original shell?

Ahh, no, I am mis-pairing the quotes.

How about doing it along this line instead?

	run_with_limited_stack () {
		"$SHELL_PATH" -c "ulimit -s 64 && $*"
	}

	test_lazy_prereq ULIMIT "run_with_limited_stack true"

	test_expect_success ULIMIT '...' '
        	>expect &&
		i=1 &&
                ...
                done | git-fast-import &&
                git tag far-far-away HEAD^ &&
                run_with_limited_stack "git tag --contains HEAD" >actual &&
		test_cmp expect actual
	'

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] git tag --contains : avoid stack overflow
  2014-04-23 19:12             ` Junio C Hamano
@ 2014-04-23 19:16               ` Jeff King
  2014-04-23 20:48                 ` Junio C Hamano
  2014-04-23 19:59               ` [PATCH v2] git tag --contains : " Stepan Kasal
  1 sibling, 1 reply; 31+ messages in thread
From: Jeff King @ 2014-04-23 19:16 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Stepan Kasal, Johannes Schindelin, git, Jean-Jacques Lafay

On Wed, Apr 23, 2014 at 12:12:14PM -0700, Junio C Hamano wrote:

> > +ulimit_stack="ulimit -s 64"
> > +test_lazy_prereq ULIMIT 'bash -c "'"$ulimit_stack"'"'
> 
> With this implementaion, ULIMIT implies bash, and we use bash that
> appears on user's PATH that may not be the one the user chose to run
> git with.  Can't we fix both of them by using $SHELL_PATH?

I don't think so. The point is that we _must_ use bash here, not any
POSIX shell. So my $SHELL_PATH is /bin/sh, which is dash, and would not
run the test.

We want to run "some bash" if we can. We may pick a bash on the user's
PATH that is not what they put into $SHELL_PATH, but that should be
relatively rare. And the consequence is that either that bash works fine
and we run the test, or it does not, and we skip the test.

> How about doing it along this line instead?
> 
> 	run_with_limited_stack () {
> 		"$SHELL_PATH" -c "ulimit -s 64 && $*"
> 	}
> 
> 	test_lazy_prereq ULIMIT "run_with_limited_stack true"

That's a much more direct test. I like it (aside from the $SHELL_PATH
thing as described above).

-Peff

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] git tag --contains : avoid stack overflow
  2014-04-23  7:53           ` [PATCH v2] " Stepan Kasal
  2014-04-23 14:28             ` Johannes Schindelin
  2014-04-23 19:12             ` Junio C Hamano
@ 2014-04-23 19:17             ` Jeff King
  2014-04-23 21:14               ` Johannes Schindelin
  2 siblings, 1 reply; 31+ messages in thread
From: Jeff King @ 2014-04-23 19:17 UTC (permalink / raw)
  To: Johannes Schindelin, git, Jean-Jacques Lafay

On Wed, Apr 23, 2014 at 09:53:25AM +0200, Stepan Kasal wrote:

> I have found out that "ulimit -s" does not work on Windows.
> Adding this as a prerequisite, we will skip the test there.

I found this bit weird, as the test originated on Windows. Did it never
actually cause a failure there (i.e., the "ulimit -s" doesn't do
anything)? Or does "ulimit" fail?

-Peff

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] git tag --contains : avoid stack overflow
  2014-04-23 19:12             ` Junio C Hamano
  2014-04-23 19:16               ` Jeff King
@ 2014-04-23 19:59               ` Stepan Kasal
  1 sibling, 0 replies; 31+ messages in thread
From: Stepan Kasal @ 2014-04-23 19:59 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jeff King, Johannes Schindelin, git, Jean-Jacques Lafay

Hi,

On Wed, Apr 23, 2014 at 12:12:14PM -0700, Junio C Hamano wrote:
> [Administrivia: please refrain from using Mail-Followup-To to
> deflect an attempt to directly respond to you;

thanks a lot for telling me.
Actually, this was a mistake: I added git to the list of discussion
lists, without realizing the consequences.
I'm glad to have separate copies of "my threads" that do not fall
to the git-list folder.

> > +>expect
> Move this inside test_expect_success?

Of course.  Had this in mind, then forgot.

> So this runs a separate "bash", [...]
> run "git tag" in the original shell?
> 
> Ahh, no, I am mis-pairing the quotes.

Point taken.  I admire how nicely you explained that!

> 	run_with_limited_stack () {
> 		"$SHELL_PATH" -c "ulimit -s 64 && $*"
> 	}

Elegant. But I agree with Peff that we shall run "a bash" instead.

I'll mail an updated patch tomorrow.

Stepan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] git tag --contains : avoid stack overflow
  2014-04-23 19:16               ` Jeff King
@ 2014-04-23 20:48                 ` Junio C Hamano
  2014-04-23 20:55                   ` Jeff King
  0 siblings, 1 reply; 31+ messages in thread
From: Junio C Hamano @ 2014-04-23 20:48 UTC (permalink / raw)
  To: Jeff King; +Cc: Stepan Kasal, Johannes Schindelin, git, Jean-Jacques Lafay

Jeff King <peff@peff.net> writes:

> On Wed, Apr 23, 2014 at 12:12:14PM -0700, Junio C Hamano wrote:
>
>> > +ulimit_stack="ulimit -s 64"
>> > +test_lazy_prereq ULIMIT 'bash -c "'"$ulimit_stack"'"'
>> 
>> With this implementaion, ULIMIT implies bash, and we use bash that
>> appears on user's PATH that may not be the one the user chose to run
>> git with.  Can't we fix both of them by using $SHELL_PATH?
>
> I don't think so. The point is that we _must_ use bash here, not any
> POSIX shell.

Sorry, but I do not understand.  Isn't what you want "any POSIX
shell with 'ulimit -s 64' supported"?

    $ dash -c 'ulimit -s && ulimit -s 64 && ulimit -s'
    8192
    64

> We want to run "some bash" if we can. We may pick a bash on the user's
> PATH that is not what they put into $SHELL_PATH, but that should be
> relatively rare. And the consequence is that either that bash works fine
> and we run the test, or it does not, and we skip the test.
>
>> How about doing it along this line instead?
>> 
>> 	run_with_limited_stack () {
>> 		"$SHELL_PATH" -c "ulimit -s 64 && $*"
>> 	}
>> 
>> 	test_lazy_prereq ULIMIT "run_with_limited_stack true"
>
> That's a much more direct test. I like it (aside from the $SHELL_PATH
> thing as described above).

Still puzzled.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] git tag --contains : avoid stack overflow
  2014-04-23 20:48                 ` Junio C Hamano
@ 2014-04-23 20:55                   ` Jeff King
  2014-04-23 21:05                     ` Junio C Hamano
  0 siblings, 1 reply; 31+ messages in thread
From: Jeff King @ 2014-04-23 20:55 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Stepan Kasal, Johannes Schindelin, git, Jean-Jacques Lafay

On Wed, Apr 23, 2014 at 01:48:05PM -0700, Junio C Hamano wrote:

> > I don't think so. The point is that we _must_ use bash here, not any
> > POSIX shell.
> 
> Sorry, but I do not understand.  Isn't what you want "any POSIX
> shell with 'ulimit -s 64' supported"?

Sure, that would be fine, but the original patch which started this
thread claimed that bash was required. I had assumed that to be true,
but it seems like it's not:

>     $ dash -c 'ulimit -s && ulimit -s 64 && ulimit -s'
>     8192
>     64

If we are just using the same shell we are already running, then why
invoke it by name in the first place? IOW, why not:

  run_with_limited_stack () {
	(ulimit -s 64 && "$@")
  }

-Peff

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] git tag --contains : avoid stack overflow
  2014-04-23 20:55                   ` Jeff King
@ 2014-04-23 21:05                     ` Junio C Hamano
  2014-04-24 12:20                       ` Stepan Kasal
  0 siblings, 1 reply; 31+ messages in thread
From: Junio C Hamano @ 2014-04-23 21:05 UTC (permalink / raw)
  To: Jeff King; +Cc: Stepan Kasal, Johannes Schindelin, git, Jean-Jacques Lafay

Jeff King <peff@peff.net> writes:

> On Wed, Apr 23, 2014 at 01:48:05PM -0700, Junio C Hamano wrote:
>
>> > I don't think so. The point is that we _must_ use bash here, not any
>> > POSIX shell.
>> 
>> Sorry, but I do not understand.  Isn't what you want "any POSIX
>> shell with 'ulimit -s 64' supported"?
>
> Sure, that would be fine, but the original patch which started this
> thread claimed that bash was required. I had assumed that to be true,
> but it seems like it's not:
>
>>     $ dash -c 'ulimit -s && ulimit -s 64 && ulimit -s'
>>     8192
>>     64
>
> If we are just using the same shell we are already running, then why
> invoke it by name in the first place? IOW, why not:
>
>   run_with_limited_stack () {
> 	(ulimit -s 64 && "$@")
>   }

That is certainly more preferrable than an explicit "run this piece
with $SHELL_PATH".

I think the choice between "Any bash that is on user's PATH" vs "The
shell the user told us to use when working with Git" is a trade-off
between

 - those who choose a shell that does not support "ulimit -s" to
   work with Git (which is fine, because our scripted Porcelains
   would not have any need for that); for these people, this test
   would be skipped unnecessarily if we insist on SHELL_PATH; and

 - those who run on a box without any bash on their PATH, chose a
   shell that is not bash but does support "ulimit -s" as their
   SHELL_PATH to build Git with; for these people, this test would
   be skipped unnecessarily if we insist on "bash".

and I do not think of a good reason to favor one over the other.

If I have to pick, I'd take your "don't name any shell, and let the
current one run it" approach, solely for the simplicity of the
solution (it ends up favoring the latter class of people as a
side-effect, though).

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] git tag --contains : avoid stack overflow
  2014-04-23 19:17             ` Jeff King
@ 2014-04-23 21:14               ` Johannes Schindelin
  0 siblings, 0 replies; 31+ messages in thread
From: Johannes Schindelin @ 2014-04-23 21:14 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Jean-Jacques Lafay

Hi Peff,

On Wed, 23 Apr 2014, Jeff King wrote:

> On Wed, Apr 23, 2014 at 09:53:25AM +0200, Stepan Kasal wrote:
> 
> > I have found out that "ulimit -s" does not work on Windows.  Adding
> > this as a prerequisite, we will skip the test there.
> 
> I found this bit weird, as the test originated on Windows. Did it never
> actually cause a failure there (i.e., the "ulimit -s" doesn't do
> anything)? Or does "ulimit" fail?

I must have forgotten to test on Windows. For performance reasons (you
know that I only have a Git time budget of about 15min/day), I developed
the test and the patch on Linux.

Ciao,
Johannes

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] git tag --contains : avoid stack overflow
  2014-04-23 21:05                     ` Junio C Hamano
@ 2014-04-24 12:20                       ` Stepan Kasal
  2014-04-24 12:24                         ` [PATCH v3] git tag --contains: " Stepan Kasal
  0 siblings, 1 reply; 31+ messages in thread
From: Stepan Kasal @ 2014-04-24 12:20 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jeff King, Johannes Schindelin, git, Jean-Jacques Lafay

Thanks for all suggestions and explanations.
The diff against PATCH v2 is below, PATCH v3 follows.

Have a nice day,
	Stepan

Subject: [PATCH] fixup! git tag --contains : avoid stack overflow

---
 t/t7004-tag.sh | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
index db82f6d..a911df0 100755
--- a/t/t7004-tag.sh
+++ b/t/t7004-tag.sh
@@ -1423,12 +1423,15 @@ EOF
 	test_cmp expect actual
 '
 
-ulimit_stack="ulimit -s 64"
-test_lazy_prereq ULIMIT 'bash -c "'"$ulimit_stack"'"'
+run_with_limited_stack () {
+	(ulimit -s 64 && "$@")
+}
+
+test_lazy_prereq ULIMIT 'run_with_limited_stack true'
 
->expect
 # we require bash and ulimit, this excludes Windows
 test_expect_success ULIMIT '--contains works in a deep repo' '
+	>expect &&
 	i=1 &&
 	while test $i -lt 4000
 	do
@@ -1442,7 +1445,7 @@ EOF"
 	done | git fast-import &&
 	git checkout master &&
 	git tag far-far-away HEAD^ &&
-	bash -c "'"$ulimit_stack"' && git tag --contains HEAD >actual" &&
+	run_with_limited_stack git tag --contains HEAD >actual &&
 	test_cmp expect actual
 '
 
-- 
1.9.0.msysgit.0.119.g722efef

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v3] git tag --contains: avoid stack overflow
  2014-04-24 12:20                       ` Stepan Kasal
@ 2014-04-24 12:24                         ` Stepan Kasal
  2014-04-25  5:54                           ` Jeff King
  2014-09-20 18:18                           ` Andreas Schwab
  0 siblings, 2 replies; 31+ messages in thread
From: Stepan Kasal @ 2014-04-24 12:24 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jeff King, Johannes Schindelin, git, Jean-Jacques Lafay

From: Jean-Jacques Lafay <jeanjacques.lafay@gmail.com>

In large repos, the recursion implementation of contains(commit,
commit_list) may result in a stack overflow. Replace the recursion with
a loop to fix it.

This problem is more apparent on Windows than on Linux, where the stack
is more limited by default.

See also this thread on the msysGit list:

	https://groups.google.com/d/topic/msysgit/FqT6boJrb2g/discussion

[jes: re-written to imitate the original recursion more closely]

Thomas Braun pointed out several documentation shortcomings.

Tests are run only if ulimit -s is available.  This means they cannot
be run on Windows.

Signed-off-by: Jean-Jacques Lafay <jeanjacques.lafay@gmail.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Tested-by: Stepan Kasal <kasal@ucw.cz>
---

Oops, actually there is one more omission, the comments needs to be
fixed:
   -# we require bash and ulimit, this excludes Windows
   +# we require ulimit, this excludes Windows

I forgot to add this to the preceding diff, but the final version
here has this fixed.

 builtin/tag.c  | 90 ++++++++++++++++++++++++++++++++++++++++++++++++----------
 t/t7004-tag.sh | 26 +++++++++++++++++
 2 files changed, 101 insertions(+), 15 deletions(-)

diff --git a/builtin/tag.c b/builtin/tag.c
index 6c7c6bd..f344002 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -80,11 +80,19 @@ static int in_commit_list(const struct commit_list *want, struct commit *c)
 	return 0;
 }
 
-static int contains_recurse(struct commit *candidate,
+enum contains_result {
+	CONTAINS_UNKNOWN = -1,
+	CONTAINS_NO = 0,
+	CONTAINS_YES = 1,
+};
+
+/*
+ * Test whether the candidate or one of its parents is contained in the list.
+ * Do not recurse to find out, though, but return -1 if inconclusive.
+ */
+static enum contains_result contains_test(struct commit *candidate,
 			    const struct commit_list *want)
 {
-	struct commit_list *p;
-
 	/* was it previously marked as containing a want commit? */
 	if (candidate->object.flags & TMP_MARK)
 		return 1;
@@ -92,26 +100,78 @@ static int contains_recurse(struct commit *candidate,
 	if (candidate->object.flags & UNINTERESTING)
 		return 0;
 	/* or are we it? */
-	if (in_commit_list(want, candidate))
+	if (in_commit_list(want, candidate)) {
+		candidate->object.flags |= TMP_MARK;
 		return 1;
+	}
 
 	if (parse_commit(candidate) < 0)
 		return 0;
 
-	/* Otherwise recurse and mark ourselves for future traversals. */
-	for (p = candidate->parents; p; p = p->next) {
-		if (contains_recurse(p->item, want)) {
-			candidate->object.flags |= TMP_MARK;
-			return 1;
-		}
-	}
-	candidate->object.flags |= UNINTERESTING;
-	return 0;
+	return -1;
 }
 
-static int contains(struct commit *candidate, const struct commit_list *want)
+/*
+ * Mimicking the real stack, this stack lives on the heap, avoiding stack
+ * overflows.
+ *
+ * At each recursion step, the stack items points to the commits whose
+ * ancestors are to be inspected.
+ */
+struct stack {
+	int nr, alloc;
+	struct stack_entry {
+		struct commit *commit;
+		struct commit_list *parents;
+	} *stack;
+};
+
+static void push_to_stack(struct commit *candidate, struct stack *stack)
+{
+	int index = stack->nr++;
+	ALLOC_GROW(stack->stack, stack->nr, stack->alloc);
+	stack->stack[index].commit = candidate;
+	stack->stack[index].parents = candidate->parents;
+}
+
+static enum contains_result contains(struct commit *candidate,
+		const struct commit_list *want)
 {
-	return contains_recurse(candidate, want);
+	struct stack stack = { 0, 0, NULL };
+	int result = contains_test(candidate, want);
+
+	if (result != CONTAINS_UNKNOWN)
+		return result;
+
+	push_to_stack(candidate, &stack);
+	while (stack.nr) {
+		struct stack_entry *entry = &stack.stack[stack.nr - 1];
+		struct commit *commit = entry->commit;
+		struct commit_list *parents = entry->parents;
+
+		if (!parents) {
+			commit->object.flags |= UNINTERESTING;
+			stack.nr--;
+		}
+		/*
+		 * If we just popped the stack, parents->item has been marked,
+		 * therefore contains_test will return a meaningful 0 or 1.
+		 */
+		else switch (contains_test(parents->item, want)) {
+		case CONTAINS_YES:
+			commit->object.flags |= TMP_MARK;
+			stack.nr--;
+			break;
+		case CONTAINS_NO:
+			entry->parents = parents->next;
+			break;
+		case CONTAINS_UNKNOWN:
+			push_to_stack(parents->item, &stack);
+			break;
+		}
+	}
+	free(stack.stack);
+	return contains_test(candidate, want);
 }
 
 static void show_tag_lines(const unsigned char *sha1, int lines)
diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
index 143a8ea..a911df0 100755
--- a/t/t7004-tag.sh
+++ b/t/t7004-tag.sh
@@ -1423,4 +1423,30 @@ EOF
 	test_cmp expect actual
 '
 
+run_with_limited_stack () {
+	(ulimit -s 64 && "$@")
+}
+
+test_lazy_prereq ULIMIT 'run_with_limited_stack true'
+
+# we require ulimit, this excludes Windows
+test_expect_success ULIMIT '--contains works in a deep repo' '
+	>expect &&
+	i=1 &&
+	while test $i -lt 4000
+	do
+		echo "commit refs/heads/master
+committer A U Thor <author@example.com> $((1000000000 + $i * 100)) +0200
+data <<EOF
+commit #$i
+EOF"
+		test $i = 1 && echo "from refs/heads/master^0"
+		i=$(($i + 1))
+	done | git fast-import &&
+	git checkout master &&
+	git tag far-far-away HEAD^ &&
+	run_with_limited_stack git tag --contains HEAD >actual &&
+	test_cmp expect actual
+'
+
 test_done
-- 
1.9.0.msysgit.0.119.g722efef

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v3] git tag --contains: avoid stack overflow
  2014-04-24 12:24                         ` [PATCH v3] git tag --contains: " Stepan Kasal
@ 2014-04-25  5:54                           ` Jeff King
  2014-09-20 18:18                           ` Andreas Schwab
  1 sibling, 0 replies; 31+ messages in thread
From: Jeff King @ 2014-04-25  5:54 UTC (permalink / raw)
  To: Stepan Kasal; +Cc: Junio C Hamano, Johannes Schindelin, git, Jean-Jacques Lafay

On Thu, Apr 24, 2014 at 02:24:39PM +0200, Stepan Kasal wrote:

> From: Jean-Jacques Lafay <jeanjacques.lafay@gmail.com>
> 
> In large repos, the recursion implementation of contains(commit,
> commit_list) may result in a stack overflow. Replace the recursion with
> a loop to fix it.
> 
> This problem is more apparent on Windows than on Linux, where the stack
> is more limited by default.
> 
> See also this thread on the msysGit list:
> 
> 	https://groups.google.com/d/topic/msysgit/FqT6boJrb2g/discussion
> 
> [jes: re-written to imitate the original recursion more closely]
> 
> Thomas Braun pointed out several documentation shortcomings.
> 
> Tests are run only if ulimit -s is available.  This means they cannot
> be run on Windows.
> 
> Signed-off-by: Jean-Jacques Lafay <jeanjacques.lafay@gmail.com>
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> Tested-by: Stepan Kasal <kasal@ucw.cz>

Thanks, this version looks good to me.

-Peff

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3] git tag --contains: avoid stack overflow
  2014-04-24 12:24                         ` [PATCH v3] git tag --contains: " Stepan Kasal
  2014-04-25  5:54                           ` Jeff King
@ 2014-09-20 18:18                           ` Andreas Schwab
  2014-09-23 16:05                             ` Jeff King
  1 sibling, 1 reply; 31+ messages in thread
From: Andreas Schwab @ 2014-09-20 18:18 UTC (permalink / raw)
  To: Stepan Kasal
  Cc: Junio C Hamano, Jeff King, Johannes Schindelin, git, Jean-Jacques Lafay

Stepan Kasal <kasal@ucw.cz> writes:

> diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
> index 143a8ea..a911df0 100755
> --- a/t/t7004-tag.sh
> +++ b/t/t7004-tag.sh
> @@ -1423,4 +1423,30 @@ EOF
>  	test_cmp expect actual
>  '
>  
> +run_with_limited_stack () {
> +	(ulimit -s 64 && "$@")
> +}

That is way too small.

https://build.opensuse.org/package/live_build_log/openSUSE:Factory:PowerPC/git/standard/ppc64le

[  492s] not ok 136 - --contains works in a deep repo
[  492s] ok 11 - log using absolute path names
[  492s] #      
[  492s] #              >expect &&
[  492s] #              i=1 &&
[  492s] #              while test $i -lt 4000
[  492s] #              do
[  492s] #                      echo "commit refs/heads/master
[  492s] #      committer A U Thor <author@example.com> $((1000000000 + $i * 100)) +0200
[  492s] #      data <<EOF
[  492s] #      commit #$i
[  492s] #      EOF"
[  492s] #                      test $i = 1 && echo "from refs/heads/master^0"
[  492s] #                      i=$(($i + 1))
[  492s] #              done | git fast-import &&
[  492s] #              git checkout master &&
[  492s] #              git tag far-far-away HEAD^ &&
[  492s] #              run_with_limited_stack git tag --contains HEAD >actual &&
[  492s] #              test_cmp expect actual
[  492s] #      
[  492s] # failed 1 among 136 test(s)
[  492s] 1..136
[  492s] Makefile:44: recipe for target 't7004-tag.sh' failed
[  492s] make[2]: *** [t7004-tag.sh] Error 1

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3] git tag --contains: avoid stack overflow
  2014-09-20 18:18                           ` Andreas Schwab
@ 2014-09-23 16:05                             ` Jeff King
  2014-09-23 21:48                               ` Andreas Schwab
  0 siblings, 1 reply; 31+ messages in thread
From: Jeff King @ 2014-09-23 16:05 UTC (permalink / raw)
  To: Andreas Schwab
  Cc: Stepan Kasal, Junio C Hamano, Johannes Schindelin, git,
	Jean-Jacques Lafay

On Sat, Sep 20, 2014 at 08:18:59PM +0200, Andreas Schwab wrote:

> Stepan Kasal <kasal@ucw.cz> writes:
> 
> > diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
> > index 143a8ea..a911df0 100755
> > --- a/t/t7004-tag.sh
> > +++ b/t/t7004-tag.sh
> > @@ -1423,4 +1423,30 @@ EOF
> >  	test_cmp expect actual
> >  '
> >  
> > +run_with_limited_stack () {
> > +	(ulimit -s 64 && "$@")
> > +}
> 
> That is way too small.
> 
> https://build.opensuse.org/package/live_build_log/openSUSE:Factory:PowerPC/git/standard/ppc64le

Thanks for the report. I'd be OK with just giving up and dropping this
test as too flaky and system-specific to be worth the trouble.

But if we do want to keep it, does bumping it to 128 (and bumping the
4000 to 8000 in the test below it) work?

-Peff

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3] git tag --contains: avoid stack overflow
  2014-09-23 16:05                             ` Jeff King
@ 2014-09-23 21:48                               ` Andreas Schwab
  2014-09-23 22:41                                 ` Junio C Hamano
  0 siblings, 1 reply; 31+ messages in thread
From: Andreas Schwab @ 2014-09-23 21:48 UTC (permalink / raw)
  To: Jeff King
  Cc: Stepan Kasal, Junio C Hamano, Johannes Schindelin, git,
	Jean-Jacques Lafay

Jeff King <peff@peff.net> writes:

> But if we do want to keep it, does bumping it to 128 (and bumping the
> 4000 to 8000 in the test below it) work?

It works for all architectures supported by the openSUSE build service.

https://build.opensuse.org/package/show/home:AndreasSchwab:f/git

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3] git tag --contains: avoid stack overflow
  2014-09-23 21:48                               ` Andreas Schwab
@ 2014-09-23 22:41                                 ` Junio C Hamano
  0 siblings, 0 replies; 31+ messages in thread
From: Junio C Hamano @ 2014-09-23 22:41 UTC (permalink / raw)
  To: Andreas Schwab
  Cc: Jeff King, Stepan Kasal, Johannes Schindelin, git, Jean-Jacques Lafay

Andreas Schwab <schwab@linux-m68k.org> writes:

> Jeff King <peff@peff.net> writes:
>
>> But if we do want to keep it, does bumping it to 128 (and bumping the
>> 4000 to 8000 in the test below it) work?
>
> It works for all architectures supported by the openSUSE build service.
>
> https://build.opensuse.org/package/show/home:AndreasSchwab:f/git
>
> Andreas.

So this on top of cbc60b67 (git tag --contains: avoid stack
overflow, 2014-04-24) and merge it to maint and upwards?

 t/t7004-tag.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
index e4ab0f5..c564197 100755
--- a/t/t7004-tag.sh
+++ b/t/t7004-tag.sh
@@ -1424,7 +1424,7 @@ EOF
 '
 
 run_with_limited_stack () {
-	(ulimit -s 64 && "$@")
+	(ulimit -s 128 && "$@")
 }
 
 test_lazy_prereq ULIMIT 'run_with_limited_stack true'
@@ -1433,7 +1433,7 @@ test_lazy_prereq ULIMIT 'run_with_limited_stack true'
 test_expect_success ULIMIT '--contains works in a deep repo' '
 	>expect &&
 	i=1 &&
-	while test $i -lt 4000
+	while test $i -lt 8000
 	do
 		echo "commit refs/heads/master
 committer A U Thor <author@example.com> $((1000000000 + $i * 100)) +0200

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH] git tag --contains : avoid stack overflow
  2012-11-12 22:27         ` Jean-Jacques Lafay
@ 2012-11-12 23:14           ` Jeff King
  0 siblings, 0 replies; 31+ messages in thread
From: Jeff King @ 2012-11-12 23:14 UTC (permalink / raw)
  To: Jean-Jacques Lafay; +Cc: René Scharfe, msysgit, Git List, Philip Oakley

On Mon, Nov 12, 2012 at 11:27:14PM +0100, Jean-Jacques Lafay wrote:

> 2012/11/11 Jeff King <peff@peff.net>:
> > On Sun, Nov 11, 2012 at 05:46:32PM +0100, René Scharfe wrote:
> >
> > Ultimately, I have some ideas for doing this in a breadth-first way,
> > which would make it more naturally iterative. It would involve having N
> > bits of storage per commit to check N tags, but it would mean that we
> > could get accurate answers in the face of clock skew (like the
> > merge-base calculation, it would merely get slower in the face of skew).
> 
> I guess the optimal algorithm may also depend on the commit graph
> general shape, but intuitively, I'd say that the critical factor is
> the number and distribution of tags. As soon as you have a significant
> number of tags (let's say 1% of the commits are tagged, evenly
> distributed), you'll quickly end up with every commit marked as
> containing or not the target commit, so that each additional tag check
> is cheap.
> 
> This suggests a complexity of O(number of commits) more often then
> not, however you choose to traverse the graph.

We can do much better than O(number of commits), though, if we stop
traversing down a path when its timestamp shows that it is too old to
contain the commits we are searching for. The problem is that the
timestamps cannot always be trusted, because they are generated on
machines with wrong clocks, or by buggy software. This could be solved
by calculating and caching a "generation" number, but last time it was
discussed there was a lot of arguing and nothing got done.

Another approach, used by the merge-base calculation, is to treat
parents in a breadth-first way, but sort them by timestamp, and walk
backwards to find the merge-base (and if you get to a merge-base, you
can stop). In that case, bad timestamps may cause you to look at extra
commits (because you process a commit prematurely and end up going
deeper than the merge base), but it can never give you a wrong answer.

Thinking on it more, though, the merge-base style of computation would
mean you always have to walk backwards to the oldest tag. Which is in
the same order of magnitude as the number of commits, assuming you have
tags near the start of history. So I think we will always want to keep a
cutoff, anyway, and there is not much point in switching off of a
depth-first approach (but of course it does make sense to use iteration
instead of recursion to do so).

> At least on my almost-real-life repo*, with ~140,000 commits and
> ~2,000 tags, tag --contains takes a few seconds, of course more than
> the worst-case test on a 40,000 commit / 1 tag repo, but still in the
> same order of magnitude.

Try "git rev-list --count --all" to get a sense of how long O(# of
commits) takes. Before the depth-first implementation, "tag --contains"
was O(commits * tags) in the worst case. With depth first, it's
O(commits), and then with the timestamp cutoff, it's really O(commits
since needle), where "needle" is the oldest thing you're looking for.
Here are those numbers on linux-2.6 (with linux-stable tags):

  [to get a sense of the repo size]
  $ git for-each-ref refs/tags | wc -l
  909
  $ git rev-list --count --all
  363413

  [this is O(commits)]
  $ time git rev-list --count --all
  real    0m4.034s
  user    0m3.960s
  sys     0m0.056s

  [before depth-first, ffc4b80^; you can see that it is much worse than
   O(commits), though not as bad as the worst case (because finding the
   merge bases for recent tags is not O(commits)]
  $ time git tag --contains HEAD~200
  real    0m42.838s
  user    0m42.527s
  sys     0m0.156s

  [after depth-first, ffc4b80; you can see that this is O(commits),
   because we will go depth-first down to the roots, but do only a
   single traversal]
  $ time git tag --contains HEAD~200
  real    0m3.939s
  user    0m3.784s
  sys     0m0.140s

  [with my generation patches; much faster]
  $ time git tag --contains HEAD~200
  real    0m0.037s
  user    0m0.020s
  sys     0m0.012s

I was thinking we had merged the timestamp cutoff (which has the same
performance characteristics as generations, just with the skew issue) to
master, but it looks like we didn't.

-Peff

-- 
*** Please reply-to-all at all times ***
*** (do not pretend to know who is subscribed and who is not) ***
*** Please avoid top-posting. ***
The msysGit Wiki is here: https://github.com/msysgit/msysgit/wiki - Github accounts are free.

You received this message because you are subscribed to the Google
Groups "msysGit" group.
To post to this group, send email to msysgit@googlegroups.com
To unsubscribe from this group, send email to
msysgit+unsubscribe@googlegroups.com
For more options, and view previous threads, visit this group at
http://groups.google.com/group/msysgit?hl=en_US?hl=en

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] git tag --contains : avoid stack overflow
  2012-11-11 16:54       ` Jeff King
@ 2012-11-12 22:27         ` Jean-Jacques Lafay
  2012-11-12 23:14           ` Jeff King
  0 siblings, 1 reply; 31+ messages in thread
From: Jean-Jacques Lafay @ 2012-11-12 22:27 UTC (permalink / raw)
  To: Jeff King; +Cc: René Scharfe, msysgit, Git List, Philip Oakley

2012/11/11 Jeff King <peff@peff.net>:
> On Sun, Nov 11, 2012 at 05:46:32PM +0100, René Scharfe wrote:
>
> Ultimately, I have some ideas for doing this in a breadth-first way,
> which would make it more naturally iterative. It would involve having N
> bits of storage per commit to check N tags, but it would mean that we
> could get accurate answers in the face of clock skew (like the
> merge-base calculation, it would merely get slower in the face of skew).

I guess the optimal algorithm may also depend on the commit graph
general shape, but intuitively, I'd say that the critical factor is
the number and distribution of tags. As soon as you have a significant
number of tags (let's say 1% of the commits are tagged, evenly
distributed), you'll quickly end up with every commit marked as
containing or not the target commit, so that each additional tag check
is cheap.

This suggests a complexity of O(number of commits) more often then
not, however you choose to traverse the graph.

At least on my almost-real-life repo*, with ~140,000 commits and
~2,000 tags, tag --contains takes a few seconds, of course more than
the worst-case test on a 40,000 commit / 1 tag repo, but still in the
same order of magnitude.

* : meaning we're in the process of migrating from clearcase (deep
sighs allowed !), and for now I kept almost everything in a single git
repo, which may not be the eventual choice

Jean-Jacques.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] git tag --contains : avoid stack overflow
  2012-11-11 16:46     ` René Scharfe
@ 2012-11-11 16:54       ` Jeff King
  2012-11-12 22:27         ` Jean-Jacques Lafay
  0 siblings, 1 reply; 31+ messages in thread
From: Jeff King @ 2012-11-11 16:54 UTC (permalink / raw)
  To: René Scharfe; +Cc: Jean-Jacques Lafay, msysgit, Git List, Philip Oakley

On Sun, Nov 11, 2012 at 05:46:32PM +0100, René Scharfe wrote:

> >However, I couldn't reproduce it on Linux : where the windows
> >implementations crashes at a ~32000 depth (*not* exactly 32768, mind
> >you), on linux it happily went through 100000 commits. I didn't take
> >time to look much further, but maybe on my 64 bit Linux VM, the process
> >can afford to reserve a much bigger address range for the stack of each
> >thread than the 1Mb given to 32 bit processes on windows.
> >Jean-Jacques.
> 
> I can reproduce it on Linux (Debian testing amd64) with ulimit -s
> 1000 to reduce the stack size from its default value of 8MB.
> 
> After reverting ffc4b8012d9a4f92ef238ff72c0d15e9e1b402ed (tag: speed
> up --contains calculation) the test passes even with the smaller
> stack, but it makes "git tag --contains" take thrice the time as
> before.

Right, I am not too surprised.  That function replaced the original
algorithm with a much faster depth-first recursive one. I haven't looked
closely yet at Jean-Jacques' iterative adaptation, but that direction
seems like a good fix for now.

Ultimately, I have some ideas for doing this in a breadth-first way,
which would make it more naturally iterative. It would involve having N
bits of storage per commit to check N tags, but it would mean that we
could get accurate answers in the face of clock skew (like the
merge-base calculation, it would merely get slower in the face of skew).

But since I haven't worked on that at all, fixing the depth-first
algorithm to be iterative makes sense to me.

-Peff

-- 
*** Please reply-to-all at all times ***
*** (do not pretend to know who is subscribed and who is not) ***
*** Please avoid top-posting. ***
The msysGit Wiki is here: https://github.com/msysgit/msysgit/wiki - Github accounts are free.

You received this message because you are subscribed to the Google
Groups "msysGit" group.
To post to this group, send email to msysgit@googlegroups.com
To unsubscribe from this group, send email to
msysgit+unsubscribe@googlegroups.com
For more options, and view previous threads, visit this group at
http://groups.google.com/group/msysgit?hl=en_US?hl=en

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] git tag --contains : avoid stack overflow
  2012-11-10 21:13   ` Jean-Jacques Lafay
  2012-11-10 21:33     ` Pat Thoyts
@ 2012-11-11 16:46     ` René Scharfe
  2012-11-11 16:54       ` Jeff King
  1 sibling, 1 reply; 31+ messages in thread
From: René Scharfe @ 2012-11-11 16:46 UTC (permalink / raw)
  To: Jean-Jacques Lafay; +Cc: msysgit, Git List, Philip Oakley, Jeff King

Am 10.11.2012 22:13, schrieb Jean-Jacques Lafay:
> Le samedi 10 novembre 2012 21:00:10 UTC+1, Philip Oakley a écrit :
>
>     From: "Jean-Jacques Lafay" <jeanjacq...@gmail.com <javascript:>>
>     Sent: Saturday,
>     November 10, 2012 5:36 PM
>      > In large repos, the recursion implementation of contains(commit,
>      > commit_list)
>      > may result in a stack overflow. Replace the recursion with a loop to
>      > fix it.
>      >
>      > Signed-off-by: Jean-Jacques Lafay <jeanjacq...@gmail.com
>     <javascript:>>
>
>     This is a change to the main git code so it is better to submit it to
>     the main git list at g...@vger.kernel.org <javascript:> (plain text,
>     no HTML, first
>     post usually has a delay ;-)
>
>     It sounds reasonable but others may have opinions and comments.
>
>     Have you actually suffered from the stack overflow issue? You only
>     suggest it as a possibility, rather than a real problem.
>
>     Philip
>
>
> Yes, it actually crashed on me, and since the call is made by
> GitExtension while browsing the commit history, it was quickly annoying
> to have windows popping a "git.exe stopped working" message box at my
> face every time I clicked on a line of the history ;-)  (well, you can
> sort of work around it by having the "file tree" or "diff" tab active)
>
> Coincidentally, as I was having a glance at the recent topics, I saw
> that someone else experienced it.
>
> However, I couldn't reproduce it on Linux : where the windows
> implementations crashes at a ~32000 depth (*not* exactly 32768, mind
> you), on linux it happily went through 100000 commits. I didn't take
> time to look much further, but maybe on my 64 bit Linux VM, the process
> can afford to reserve a much bigger address range for the stack of each
> thread than the 1Mb given to 32 bit processes on windows.
> Jean-Jacques.

I can reproduce it on Linux (Debian testing amd64) with ulimit -s 1000 
to reduce the stack size from its default value of 8MB.

After reverting ffc4b8012d9a4f92ef238ff72c0d15e9e1b402ed (tag: speed up 
--contains calculation) the test passes even with the smaller stack, but 
it makes "git tag --contains" take thrice the time as before.

René

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] git tag --contains : avoid stack overflow
  2012-11-10 21:13   ` Jean-Jacques Lafay
@ 2012-11-10 21:33     ` Pat Thoyts
  2012-11-11 16:46     ` René Scharfe
  1 sibling, 0 replies; 31+ messages in thread
From: Pat Thoyts @ 2012-11-10 21:33 UTC (permalink / raw)
  To: Jean-Jacques Lafay; +Cc: msysgit, Git List, Philip Oakley

On 10 November 2012 21:13, Jean-Jacques Lafay
<jeanjacques.lafay@gmail.com> wrote:
> Le samedi 10 novembre 2012 21:00:10 UTC+1, Philip Oakley a écrit :
>>
>> From: "Jean-Jacques Lafay" <jeanjacq...@gmail.com> Sent: Saturday,
>> November 10, 2012 5:36 PM
>> > In large repos, the recursion implementation of contains(commit,
>> > commit_list)
>> > may result in a stack overflow. Replace the recursion with a loop to
>> > fix it.
>> >
>> > Signed-off-by: Jean-Jacques Lafay <jeanjacq...@gmail.com>
>>
>> This is a change to the main git code so it is better to submit it to
>> the main git list at g...@vger.kernel.org (plain text, no HTML, first
>> post usually has a delay ;-)
>>
>> It sounds reasonable but others may have opinions and comments.
>>
>> Have you actually suffered from the stack overflow issue? You only
>> suggest it as a possibility, rather than a real problem.
>>
>> Philip
>
>
> Yes, it actually crashed on me, and since the call is made by GitExtension
> while browsing the commit history, it was quickly annoying to have windows
> popping a "git.exe stopped working" message box at my face every time I
> clicked on a line of the history ;-)  (well, you can sort of work around it
> by having the "file tree" or "diff" tab active)
>
> Coincidentally, as I was having a glance at the recent topics, I saw that
> someone else experienced it.
>
> However, I couldn't reproduce it on Linux : where the windows
> implementations crashes at a ~32000 depth (*not* exactly 32768, mind you),
> on linux it happily went through 100000 commits. I didn't take time to look
> much further, but maybe on my 64 bit Linux VM, the process can afford to
> reserve a much bigger address range for the stack of each thread than the
> 1Mb given to 32 bit processes on windows.
>
> Jean-Jacques.

I checked this on msysGit - the test causes a crash on Windows when
using the 1.8.0 release and recompiling with this patch applied fixes
this. As mentioned, its best to have this reviewed upstream too as its
likely that windows just has a smaller stack so causes the crash
earlier.

-- 
*** Please reply-to-all at all times ***
*** (do not pretend to know who is subscribed and who is not) ***
*** Please avoid top-posting. ***
The msysGit Wiki is here: https://github.com/msysgit/msysgit/wiki - Github accounts are free.

You received this message because you are subscribed to the Google
Groups "msysGit" group.
To post to this group, send email to msysgit@googlegroups.com
To unsubscribe from this group, send email to
msysgit+unsubscribe@googlegroups.com
For more options, and view previous threads, visit this group at
http://groups.google.com/group/msysgit?hl=en_US?hl=en

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] git tag --contains : avoid stack overflow
  2012-11-10 20:00 ` [PATCH] " Philip Oakley
@ 2012-11-10 21:13   ` Jean-Jacques Lafay
  2012-11-10 21:33     ` Pat Thoyts
  2012-11-11 16:46     ` René Scharfe
  0 siblings, 2 replies; 31+ messages in thread
From: Jean-Jacques Lafay @ 2012-11-10 21:13 UTC (permalink / raw)
  To: msysgit; +Cc: Jean-Jacques Lafay, Git List, Philip Oakley

[-- Attachment #1: Type: text/plain, Size: 2385 bytes --]

Le samedi 10 novembre 2012 21:00:10 UTC+1, Philip Oakley a écrit :

> From: "Jean-Jacques Lafay" <jeanjacq...@gmail.com <javascript:>> Sent: 
> Saturday, 
> November 10, 2012 5:36 PM 
> > In large repos, the recursion implementation of contains(commit, 
> > commit_list) 
> > may result in a stack overflow. Replace the recursion with a loop to 
> > fix it. 
> > 
> > Signed-off-by: Jean-Jacques Lafay <jeanjacq...@gmail.com <javascript:>> 
>
> This is a change to the main git code so it is better to submit it to 
> the main git list at g...@vger.kernel.org <javascript:> (plain text, no 
> HTML, first 
> post usually has a delay ;-) 
>
> It sounds reasonable but others may have opinions and comments. 
>
> Have you actually suffered from the stack overflow issue? You only 
> suggest it as a possibility, rather than a real problem. 
>
> Philip 
>

Yes, it actually crashed on me, and since the call is made by GitExtension 
while browsing the commit history, it was quickly annoying to have windows 
popping a "git.exe stopped working" message box at my face every time I 
clicked on a line of the history ;-)  (well, you can sort of work around it 
by having the "file tree" or "diff" tab active)

Coincidentally, as I was having a glance at the recent topics, I saw that 
someone else experienced it.

However, I couldn't reproduce it on Linux : where the windows 
implementations crashes at a ~32000 depth (*not* exactly 32768, mind you), 
on linux it happily went through 100000 commits. I didn't take time to look 
much further, but maybe on my 64 bit Linux VM, the process can afford to 
reserve a much bigger address range for the stack of each thread than the 
1Mb given to 32 bit processes on windows.
 
Jean-Jacques.

-- 
*** Please reply-to-all at all times ***
*** (do not pretend to know who is subscribed and who is not) ***
*** Please avoid top-posting. ***
The msysGit Wiki is here: https://github.com/msysgit/msysgit/wiki - Github accounts are free.

You received this message because you are subscribed to the Google
Groups "msysGit" group.
To post to this group, send email to msysgit@googlegroups.com
To unsubscribe from this group, send email to
msysgit+unsubscribe@googlegroups.com
For more options, and view previous threads, visit this group at
http://groups.google.com/group/msysgit?hl=en_US?hl=en

[-- Attachment #2: Type: text/html, Size: 3089 bytes --]

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] git tag --contains : avoid stack overflow
       [not found] <1352568970-4669-1-git-send-email-jeanjacques.lafay@gmail.com>
@ 2012-11-10 20:00 ` Philip Oakley
  2012-11-10 21:13   ` Jean-Jacques Lafay
  0 siblings, 1 reply; 31+ messages in thread
From: Philip Oakley @ 2012-11-10 20:00 UTC (permalink / raw)
  To: Jean-Jacques Lafay, msysgit; +Cc: Git List

From: "Jean-Jacques Lafay" <jeanjacques.lafay@gmail.com> Sent: Saturday, 
November 10, 2012 5:36 PM
> In large repos, the recursion implementation of contains(commit, 
> commit_list)
> may result in a stack overflow. Replace the recursion with a loop to 
> fix it.
>
> Signed-off-by: Jean-Jacques Lafay <jeanjacques.lafay@gmail.com>

This is a change to the main git code so it is better to submit it to 
the main git list at git@vger.kernel.org (plain text, no HTML, first 
post usually has a delay ;-)

It sounds reasonable but others may have opinions and comments.

Have you actually suffered from the stack overflow issue? You only 
suggest it as a possibility, rather than a real problem.

Philip

> ---
> builtin/tag.c  | 83 
> ++++++++++++++++++++++++++++++++++++++++------------------
> t/t7004-tag.sh | 21 +++++++++++++++
> 2 files changed, 78 insertions(+), 26 deletions(-)
>
> diff --git a/builtin/tag.c b/builtin/tag.c
> index 9c3e067..4be67dd 100644
> --- a/builtin/tag.c
> +++ b/builtin/tag.c
> @@ -73,40 +73,71 @@ static int in_commit_list(const struct commit_list 
> *want, struct commit *c)
>  return 0;
> }
>
> -static int contains_recurse(struct commit *candidate,
> -     const struct commit_list *want)
> -{
> - struct commit_list *p;
> -
> - /* was it previously marked as containing a want commit? */
> - if (candidate->object.flags & TMP_MARK)
> - return 1;
> - /* or marked as not possibly containing a want commit? */
> - if (candidate->object.flags & UNINTERESTING)
> - return 0;
> - /* or are we it? */
> - if (in_commit_list(want, candidate))
> - return 1;
> +struct commit_list_list {
> + struct commit *item;
> + struct commit_list *next_item;
> + struct commit_list_list *next;
> +};
>
> - if (parse_commit(candidate) < 0)
> - return 0;
> +static void mark_path(struct commit_list_list *path, int status)
> +{
> + struct commit_list_list *temp;
> + while (path) {
> + path->item->object.flags |= status;
> + temp = path;
> + path = temp->next;
> + free(temp);
> + }
> +}
>
> - /* Otherwise recurse and mark ourselves for future traversals. */
> - for (p = candidate->parents; p; p = p->next) {
> - if (contains_recurse(p->item, want)) {
> - candidate->object.flags |= TMP_MARK;
> +static int contains(struct commit *candidate,
> +     const struct commit_list *want)
> +{
> + /* previously implemented with a recursion, but could stack overflow 
> in large repos */
> + struct commit_list_list *p, *tmp;
> + p = xmalloc(sizeof(struct commit_list_list));
> + p->item = candidate;
> + p->next_item = NULL;
> + p->next = NULL;
> +
> + while (p) {
> + candidate = p->item;
> +
> + /* is it ok : */
> + /* was it previously marked as containing a want commit? */
> + /* or are we it? */
> + if (candidate->object.flags & TMP_MARK || in_commit_list(want, 
> candidate)) {
> + mark_path(p, TMP_MARK);
>  return 1;
>  }
> + /* is it not ok : */
> + /* was it previously marked as not possibly containing a want 
> commit? */
> + /* do we have parents? */
> + if (candidate->object.flags & UNINTERESTING || 
> parse_commit(candidate) < 0 || !candidate->parents) {
> + candidate->object.flags |= UNINTERESTING;
> + /* then backtrack, marking as UNINTERESTING along the way */
> + while (p && !p->next_item) {
> + p->item->object.flags |= UNINTERESTING;
> + tmp = p;
> + p = tmp->next;
> + free(tmp);
> + }
> + if (p) {
> + p->item = p->next_item->item;
> + p->next_item = p->next_item->next;
> + }
> + } else {
> + /* let's check the parents */
> + tmp = xmalloc(sizeof(struct commit_list_list));
> + tmp->item = candidate->parents->item;
> + tmp->next_item = candidate->parents->next;
> + tmp->next = p;
> + p = tmp;
> + }
>  }
> - candidate->object.flags |= UNINTERESTING;
>  return 0;
> }
>
> -static int contains(struct commit *candidate, const struct 
> commit_list *want)
> -{
> - return contains_recurse(candidate, want);
> -}
> -
> static void show_tag_lines(const unsigned char *sha1, int lines)
> {
>  int i;
> diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
> index 5189446..196ac54 100755
> --- a/t/t7004-tag.sh
> +++ b/t/t7004-tag.sh
> @@ -1365,4 +1365,25 @@ test_expect_success 'multiple --points-at are 
> OR-ed together' '
>  test_cmp expect actual
> '
>
> +# what about a deep repo ?
> +
> +>expect
> +test_expect_success '--contains works in a deep repo' '
> + i=1
> + while test $i -lt 40000
> + do
> + echo "commit refs/heads/master
> +committer A U Thor <author@example.com> $((1000000000 + $i * 100)) 
> +0200
> +data <<EOF
> +commit #$i
> +EOF"
> + test $i == 1 && echo "from refs/heads/master^0"
> + i=$(($i + 1))
> + done | git fast-import
> + git checkout master
> + git tag far-far-away HEAD^
> + git tag --contains HEAD >actual &&
> + test_cmp expect actual
> +'
> +
> test_done
> -- 
> 1.8.0.msysgit.0.1.geed93bd.dirty
>
> -- 

-- 
*** Please reply-to-all at all times ***
*** (do not pretend to know who is subscribed and who is not) ***
*** Please avoid top-posting. ***
The msysGit Wiki is here: https://github.com/msysgit/msysgit/wiki - Github accounts are free.

You received this message because you are subscribed to the Google
Groups "msysGit" group.
To post to this group, send email to msysgit@googlegroups.com
To unsubscribe from this group, send email to
msysgit+unsubscribe@googlegroups.com
For more options, and view previous threads, visit this group at
http://groups.google.com/group/msysgit?hl=en_US?hl=en

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2014-09-23 22:41 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-16 14:15 [PATCH] git tag --contains : avoid stack overflow Stepan Kasal
2014-04-16 15:46 ` Jeff King
2014-04-17 17:31   ` Johannes Schindelin
2014-04-17 21:32     ` Jeff King
2014-04-17 21:52       ` Johannes Schindelin
2014-04-17 21:58         ` Jeff King
2014-04-23  7:53           ` [PATCH v2] " Stepan Kasal
2014-04-23 14:28             ` Johannes Schindelin
2014-04-23 15:45               ` Stepan Kasal
2014-04-23 19:12             ` Junio C Hamano
2014-04-23 19:16               ` Jeff King
2014-04-23 20:48                 ` Junio C Hamano
2014-04-23 20:55                   ` Jeff King
2014-04-23 21:05                     ` Junio C Hamano
2014-04-24 12:20                       ` Stepan Kasal
2014-04-24 12:24                         ` [PATCH v3] git tag --contains: " Stepan Kasal
2014-04-25  5:54                           ` Jeff King
2014-09-20 18:18                           ` Andreas Schwab
2014-09-23 16:05                             ` Jeff King
2014-09-23 21:48                               ` Andreas Schwab
2014-09-23 22:41                                 ` Junio C Hamano
2014-04-23 19:59               ` [PATCH v2] git tag --contains : " Stepan Kasal
2014-04-23 19:17             ` Jeff King
2014-04-23 21:14               ` Johannes Schindelin
     [not found] <1352568970-4669-1-git-send-email-jeanjacques.lafay@gmail.com>
2012-11-10 20:00 ` [PATCH] " Philip Oakley
2012-11-10 21:13   ` Jean-Jacques Lafay
2012-11-10 21:33     ` Pat Thoyts
2012-11-11 16:46     ` René Scharfe
2012-11-11 16:54       ` Jeff King
2012-11-12 22:27         ` Jean-Jacques Lafay
2012-11-12 23:14           ` Jeff King

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).