All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkout
@ 2014-07-12  4:44 David Turner
  2014-07-12  4:44 ` [PATCH v8 2/4] test-dump-cache-tree: invalid trees are not errors David Turner
                   ` (3 more replies)
  0 siblings, 4 replies; 28+ messages in thread
From: David Turner @ 2014-07-12  4:44 UTC (permalink / raw)
  To: git; +Cc: David Turner

When git checkout checks out a branch, create or update the
cache-tree so that subsequent operations are faster.

update_main_cache_tree learned a new flag, WRITE_TREE_REPAIR.  When
WRITE_TREE_REPAIR is set, portions of the cache-tree which do not
correspond to existing tree objects are invalidated (and portions which
do are marked as valid).  No new tree objects are created.

Signed-off-by: David Turner <dturner@twitter.com>
---
 builtin/checkout.c    |  8 ++++++++
 cache-tree.c          | 12 +++++++++++-
 cache-tree.h          |  1 +
 t/t0090-cache-tree.sh | 19 ++++++++++++++++---
 4 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index 07cf555..054214f 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -553,6 +553,14 @@ static int merge_working_tree(const struct checkout_opts *opts,
 		}
 	}
 
+	if (!active_cache_tree)
+		active_cache_tree = cache_tree();
+
+	if (!cache_tree_fully_valid(active_cache_tree))
+		cache_tree_update(active_cache_tree,
+				  (const struct cache_entry * const *)active_cache,
+				  active_nr, WRITE_TREE_SILENT | WRITE_TREE_REPAIR);
+
 	if (write_cache(newfd, active_cache, active_nr) ||
 	    commit_locked_index(lock_file))
 		die(_("unable to write new index file"));
diff --git a/cache-tree.c b/cache-tree.c
index 7fa524a..f951d7d 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -239,9 +239,12 @@ static int update_one(struct cache_tree *it,
 	struct strbuf buffer;
 	int missing_ok = flags & WRITE_TREE_MISSING_OK;
 	int dryrun = flags & WRITE_TREE_DRY_RUN;
+	int repair = flags & WRITE_TREE_REPAIR;
 	int to_invalidate = 0;
 	int i;
 
+	assert(!(dryrun && repair));
+
 	*skip_count = 0;
 
 	if (0 <= it->entry_count && has_sha1_file(it->sha1))
@@ -374,7 +377,14 @@ static int update_one(struct cache_tree *it,
 #endif
 	}
 
-	if (dryrun)
+	if (repair) {
+		unsigned char sha1[20];
+		hash_sha1_file(buffer.buf, buffer.len, tree_type, sha1);
+		if (has_sha1_file(sha1))
+			hashcpy(it->sha1, sha1);
+		else
+			to_invalidate = 1;
+	} else if (dryrun)
 		hash_sha1_file(buffer.buf, buffer.len, tree_type, it->sha1);
 	else if (write_sha1_file(buffer.buf, buffer.len, tree_type, it->sha1)) {
 		strbuf_release(&buffer);
diff --git a/cache-tree.h b/cache-tree.h
index f1923ad..666d18f 100644
--- a/cache-tree.h
+++ b/cache-tree.h
@@ -39,6 +39,7 @@ int update_main_cache_tree(int);
 #define WRITE_TREE_IGNORE_CACHE_TREE 2
 #define WRITE_TREE_DRY_RUN 4
 #define WRITE_TREE_SILENT 8
+#define WRITE_TREE_REPAIR 16
 
 /* error return codes */
 #define WRITE_TREE_UNREADABLE_INDEX (-1)
diff --git a/t/t0090-cache-tree.sh b/t/t0090-cache-tree.sh
index 6c33e28..98fb1ab 100755
--- a/t/t0090-cache-tree.sh
+++ b/t/t0090-cache-tree.sh
@@ -44,14 +44,14 @@ test_expect_success 'read-tree HEAD establishes cache-tree' '
 
 test_expect_success 'git-add invalidates cache-tree' '
 	test_when_finished "git reset --hard; git read-tree HEAD" &&
-	echo "I changed this file" > foo &&
+	echo "I changed this file" >foo &&
 	git add foo &&
 	test_invalid_cache_tree
 '
 
 test_expect_success 'update-index invalidates cache-tree' '
 	test_when_finished "git reset --hard; git read-tree HEAD" &&
-	echo "I changed this file" > foo &&
+	echo "I changed this file" >foo &&
 	git update-index --add foo &&
 	test_invalid_cache_tree
 '
@@ -85,9 +85,22 @@ test_expect_success 'reset --hard without index gives cache-tree' '
 	test_shallow_cache_tree
 '
 
-test_expect_failure 'checkout gives cache-tree' '
+test_expect_success 'checkout gives cache-tree' '
+	git tag current &&
 	git checkout HEAD^ &&
 	test_shallow_cache_tree
 '
 
+test_expect_success 'checkout -b gives cache-tree' '
+	git checkout current &&
+	git checkout -b prev HEAD^ &&
+	test_shallow_cache_tree
+'
+
+test_expect_success 'checkout -B gives cache-tree' '
+	git checkout current &&
+	git checkout -B prev HEAD^ &&
+	test_shallow_cache_tree
+'
+
 test_done
-- 
2.0.0.390.gcb682f8

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v8 2/4] test-dump-cache-tree: invalid trees are not errors
  2014-07-12  4:44 [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkout David Turner
@ 2014-07-12  4:44 ` David Turner
  2014-07-12  4:44 ` [PATCH v8 3/4] cache-tree: subdirectory tests David Turner
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 28+ messages in thread
From: David Turner @ 2014-07-12  4:44 UTC (permalink / raw)
  To: git; +Cc: David Turner

Do not treat known-invalid trees as errors even when their subtree_nr is
incorrect.  Because git already knows that these trees are invalid,
an incorrect subtree_nr will not cause problems.

Add a couple of comments.

Signed-off-by: David Turner <dturner@twitter.com>
---
 test-dump-cache-tree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test-dump-cache-tree.c b/test-dump-cache-tree.c
index 47eab97..cbbbd8e 100644
--- a/test-dump-cache-tree.c
+++ b/test-dump-cache-tree.c
@@ -26,16 +26,16 @@ static int dump_cache_tree(struct cache_tree *it,
 		return 0;
 
 	if (it->entry_count < 0) {
+		/* invalid */
 		dump_one(it, pfx, "");
 		dump_one(ref, pfx, "#(ref) ");
-		if (it->subtree_nr != ref->subtree_nr)
-			errs = 1;
 	}
 	else {
 		dump_one(it, pfx, "");
 		if (hashcmp(it->sha1, ref->sha1) ||
 		    ref->entry_count != it->entry_count ||
 		    ref->subtree_nr != it->subtree_nr) {
+			/* claims to be valid but is lying */
 			dump_one(ref, pfx, "#(ref) ");
 			errs = 1;
 		}
-- 
2.0.0.390.gcb682f8

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v8 3/4] cache-tree: subdirectory tests
  2014-07-12  4:44 [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkout David Turner
  2014-07-12  4:44 ` [PATCH v8 2/4] test-dump-cache-tree: invalid trees are not errors David Turner
@ 2014-07-12  4:44 ` David Turner
  2014-07-12  4:44 ` [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit David Turner
  2014-08-31 12:07 ` [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkou John Keeping
  3 siblings, 0 replies; 28+ messages in thread
From: David Turner @ 2014-07-12  4:44 UTC (permalink / raw)
  To: git; +Cc: David Turner

Add tests to confirm that invalidation of subdirectories neither over-
nor under-invalidates.

Signed-off-by: David Turner <dturner@twitter.com>
---
 t/t0090-cache-tree.sh | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/t/t0090-cache-tree.sh b/t/t0090-cache-tree.sh
index 98fb1ab..3a3342e 100755
--- a/t/t0090-cache-tree.sh
+++ b/t/t0090-cache-tree.sh
@@ -22,9 +22,10 @@ test_shallow_cache_tree () {
 }
 
 test_invalid_cache_tree () {
-	echo "invalid                                   (0 subtrees)" >expect &&
-	printf "SHA #(ref)  (%d entries, 0 subtrees)\n" $(git ls-files|wc -l) >>expect &&
-	cmp_cache_tree expect
+	printf "invalid                                  %s ()\n" "" "$@" >expect &&
+	test-dump-cache-tree | \
+	sed -n -e "s/[0-9]* subtrees//" -e '/#(ref)/d' -e '/^invalid /p' >actual &&
+	test_cmp expect actual
 }
 
 test_no_cache_tree () {
@@ -49,6 +50,25 @@ test_expect_success 'git-add invalidates cache-tree' '
 	test_invalid_cache_tree
 '
 
+test_expect_success 'git-add in subdir invalidates cache-tree' '
+	test_when_finished "git reset --hard; git read-tree HEAD" &&
+	mkdir dirx &&
+	echo "I changed this file" >dirx/foo &&
+	git add dirx/foo &&
+	test_invalid_cache_tree
+'
+
+test_expect_success 'git-add in subdir does not invalidate sibling cache-tree' '
+	git tag no-children &&
+	test_when_finished "git reset --hard no-children; git read-tree HEAD" &&
+	mkdir dir1 dir2 &&
+	test_commit dir1/a &&
+	test_commit dir2/b &&
+	echo "I changed this file" >dir1/a &&
+	git add dir1/a &&
+	test_invalid_cache_tree dir1/
+'
+
 test_expect_success 'update-index invalidates cache-tree' '
 	test_when_finished "git reset --hard; git read-tree HEAD" &&
 	echo "I changed this file" >foo &&
-- 
2.0.0.390.gcb682f8

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit
  2014-07-12  4:44 [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkout David Turner
  2014-07-12  4:44 ` [PATCH v8 2/4] test-dump-cache-tree: invalid trees are not errors David Turner
  2014-07-12  4:44 ` [PATCH v8 3/4] cache-tree: subdirectory tests David Turner
@ 2014-07-12  4:44 ` David Turner
  2014-07-13  5:09   ` Duy Nguyen
  2014-08-31 12:07 ` [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkou John Keeping
  3 siblings, 1 reply; 28+ messages in thread
From: David Turner @ 2014-07-12  4:44 UTC (permalink / raw)
  To: git; +Cc: David Turner

During the commit process, update the cache-tree. Write this updated
cache-tree so that it's ready for subsequent commands.

Add test code which demonstrates that git commit now writes the cache
tree.  Make all tests test the entire cache-tree, not just the root
level.

Signed-off-by: David Turner <dturner@twitter.com>
---
 builtin/commit.c      |  16 ++++++-
 t/t0090-cache-tree.sh | 117 +++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 117 insertions(+), 16 deletions(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index 9cfef6c..d6681c4 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -342,6 +342,15 @@ static char *prepare_index(int argc, const char **argv, const char *prefix,
 
 		discard_cache();
 		read_cache_from(index_lock.filename);
+		if (update_main_cache_tree(WRITE_TREE_SILENT) == 0) {
+			fd = open(index_lock.filename, O_WRONLY);
+			if (fd >= 0)
+				if (write_cache(fd, active_cache, active_nr) == 0) {
+					close_lock_file(&index_lock);
+			}
+		}
+		else
+			fprintf(stderr, "FAiled to update main cache tree\n");
 
 		commit_style = COMMIT_NORMAL;
 		return index_lock.filename;
@@ -383,8 +392,12 @@ static char *prepare_index(int argc, const char **argv, const char *prefix,
 	if (!only && !pathspec.nr) {
 		fd = hold_locked_index(&index_lock, 1);
 		refresh_cache_or_die(refresh_flags);
-		if (active_cache_changed) {
+		if (active_cache_changed
+		    || !cache_tree_fully_valid(active_cache_tree)) {
 			update_main_cache_tree(WRITE_TREE_SILENT);
+			active_cache_changed = 1;
+		}
+		if (active_cache_changed) {
 			if (write_cache(fd, active_cache, active_nr) ||
 			    commit_locked_index(&index_lock))
 				die(_("unable to write new_index file"));
@@ -435,6 +448,7 @@ static char *prepare_index(int argc, const char **argv, const char *prefix,
 	fd = hold_locked_index(&index_lock, 1);
 	add_remove_files(&partial);
 	refresh_cache(REFRESH_QUIET);
+	update_main_cache_tree(WRITE_TREE_SILENT);
 	if (write_cache(fd, active_cache, active_nr) ||
 	    close_lock_file(&index_lock))
 		die(_("unable to write new_index file"));
diff --git a/t/t0090-cache-tree.sh b/t/t0090-cache-tree.sh
index 3a3342e..48c4240 100755
--- a/t/t0090-cache-tree.sh
+++ b/t/t0090-cache-tree.sh
@@ -8,7 +8,7 @@ cache-tree extension.
  . ./test-lib.sh
 
 cmp_cache_tree () {
-	test-dump-cache-tree >actual &&
+	test-dump-cache-tree | sed -e '/#(ref)/d' >actual &&
 	sed "s/$_x40/SHA/" <actual >filtered &&
 	test_cmp "$1" filtered
 }
@@ -16,14 +16,38 @@ cmp_cache_tree () {
 # We don't bother with actually checking the SHA1:
 # test-dump-cache-tree already verifies that all existing data is
 # correct.
-test_shallow_cache_tree () {
-	printf "SHA  (%d entries, 0 subtrees)\n" $(git ls-files|wc -l) >expect &&
+generate_expected_cache_tree_rec () {
+	dir="$1${1:+/}" &&
+	parent="$2" &&
+	# ls-files might have foo/bar, foo/bar/baz, and foo/bar/quux
+	# We want to count only foo because it's the only direct child
+	subtrees=$(git ls-files|grep /|cut -d / -f 1|uniq) &&
+	subtree_count=$(echo "$subtrees"|awk '$1 {++c} END {print c}') &&
+	entries=$(git ls-files|wc -l) &&
+	printf "SHA $dir (%d entries, %d subtrees)\n" "$entries" "$subtree_count" &&
+	for subtree in $subtrees
+	do
+		cd "$subtree"
+		generate_expected_cache_tree_rec "$dir$subtree" "$dir" || return 1
+		cd ..
+	done &&
+	dir=$parent
+}
+
+generate_expected_cache_tree () {
+	(
+		generate_expected_cache_tree_rec
+	)
+}
+
+test_cache_tree () {
+	generate_expected_cache_tree >expect &&
 	cmp_cache_tree expect
 }
 
 test_invalid_cache_tree () {
 	printf "invalid                                  %s ()\n" "" "$@" >expect &&
-	test-dump-cache-tree | \
+	test-dump-cache-tree |
 	sed -n -e "s/[0-9]* subtrees//" -e '/#(ref)/d' -e '/^invalid /p' >actual &&
 	test_cmp expect actual
 }
@@ -33,14 +57,14 @@ test_no_cache_tree () {
 	cmp_cache_tree expect
 }
 
-test_expect_failure 'initial commit has cache-tree' '
+test_expect_success 'initial commit has cache-tree' '
 	test_commit foo &&
-	test_shallow_cache_tree
+	test_cache_tree
 '
 
 test_expect_success 'read-tree HEAD establishes cache-tree' '
 	git read-tree HEAD &&
-	test_shallow_cache_tree
+	test_cache_tree
 '
 
 test_expect_success 'git-add invalidates cache-tree' '
@@ -58,6 +82,18 @@ test_expect_success 'git-add in subdir invalidates cache-tree' '
 	test_invalid_cache_tree
 '
 
+cat >before <<\EOF
+SHA  (3 entries, 2 subtrees)
+SHA dir1/ (1 entries, 0 subtrees)
+SHA dir2/ (1 entries, 0 subtrees)
+EOF
+
+cat >expect <<\EOF
+invalid                                   (2 subtrees)
+invalid                                  dir1/ (0 subtrees)
+SHA dir2/ (1 entries, 0 subtrees)
+EOF
+
 test_expect_success 'git-add in subdir does not invalidate sibling cache-tree' '
 	git tag no-children &&
 	test_when_finished "git reset --hard no-children; git read-tree HEAD" &&
@@ -65,8 +101,10 @@ test_expect_success 'git-add in subdir does not invalidate sibling cache-tree' '
 	test_commit dir1/a &&
 	test_commit dir2/b &&
 	echo "I changed this file" >dir1/a &&
+	cmp_cache_tree before &&
+	echo "I changed this file" >dir1/a &&
 	git add dir1/a &&
-	test_invalid_cache_tree dir1/
+	cmp_cache_tree expect
 '
 
 test_expect_success 'update-index invalidates cache-tree' '
@@ -79,7 +117,7 @@ test_expect_success 'update-index invalidates cache-tree' '
 test_expect_success 'write-tree establishes cache-tree' '
 	test-scrap-cache-tree &&
 	git write-tree &&
-	test_shallow_cache_tree
+	test_cache_tree
 '
 
 test_expect_success 'test-scrap-cache-tree works' '
@@ -90,37 +128,86 @@ test_expect_success 'test-scrap-cache-tree works' '
 
 test_expect_success 'second commit has cache-tree' '
 	test_commit bar &&
-	test_shallow_cache_tree
+	test_cache_tree
+'
+
+test_expect_success 'commit --interactive gives cache-tree on partial commit' '
+	cat <<-\EOT >foo.c &&
+	int foo()
+	{
+		return 42;
+	}
+	int bar()
+	{
+		return 42;
+	}
+	EOT
+	git add foo.c &&
+	test_invalid_cache_tree &&
+	git commit -m "add a file" &&
+	test_cache_tree &&
+	cat <<-\EOT >foo.c &&
+	int foo()
+	{
+		return 43;
+	}
+	int bar()
+	{
+		return 44;
+	}
+	EOT
+	(echo p; echo 1; echo; echo s; echo n; echo y; echo q) |
+	git commit --interactive -m foo &&
+	test_cache_tree
+'
+
+test_expect_success 'commit in child dir has cache-tree' '
+	mkdir dir &&
+	>dir/child.t &&
+	git add dir/child.t &&
+	git commit -m dir/child.t &&
+	test_cache_tree
 '
 
 test_expect_success 'reset --hard gives cache-tree' '
 	test-scrap-cache-tree &&
 	git reset --hard &&
-	test_shallow_cache_tree
+	test_cache_tree
 '
 
 test_expect_success 'reset --hard without index gives cache-tree' '
 	rm -f .git/index &&
 	git reset --hard &&
-	test_shallow_cache_tree
+	test_cache_tree
 '
 
 test_expect_success 'checkout gives cache-tree' '
 	git tag current &&
 	git checkout HEAD^ &&
-	test_shallow_cache_tree
+	test_cache_tree
 '
 
 test_expect_success 'checkout -b gives cache-tree' '
 	git checkout current &&
 	git checkout -b prev HEAD^ &&
-	test_shallow_cache_tree
+	test_cache_tree
 '
 
 test_expect_success 'checkout -B gives cache-tree' '
 	git checkout current &&
 	git checkout -B prev HEAD^ &&
-	test_shallow_cache_tree
+	test_cache_tree
+'
+
+test_expect_success 'partial commit gives cache-tree' '
+	git checkout -b partial no-children &&
+	test_commit one &&
+	test_commit two &&
+	echo "some change" >one.t &&
+	git add one.t &&
+	echo "some other change" >two.t &&
+	git commit two.t -m partial &&
+	test_cache_tree
 '
 
 test_done
-- 
2.0.0.390.gcb682f8

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit
  2014-07-12  4:44 ` [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit David Turner
@ 2014-07-13  5:09   ` Duy Nguyen
  2014-07-14 15:54     ` Junio C Hamano
  0 siblings, 1 reply; 28+ messages in thread
From: Duy Nguyen @ 2014-07-13  5:09 UTC (permalink / raw)
  To: David Turner; +Cc: Git Mailing List, David Turner

On Sat, Jul 12, 2014 at 11:44 AM, David Turner <dturner@twopensource.com> wrote:
> @@ -342,6 +342,15 @@ static char *prepare_index(int argc, const char **argv, const char *prefix,
>
>                 discard_cache();
>                 read_cache_from(index_lock.filename);
> +               if (update_main_cache_tree(WRITE_TREE_SILENT) == 0) {
> +                       fd = open(index_lock.filename, O_WRONLY);
> +                       if (fd >= 0)
> +                               if (write_cache(fd, active_cache, active_nr) == 0) {
> +                                       close_lock_file(&index_lock);

If write_cache() returns a negative value, index.lock is probably
corrupted. Should we die() instead of moving on and returning
index_lock.filename to the caller? The caller may move index.lock to
index later on and officially ruin "index".

> +                       }
> +               }
> +               else
> +                       fprintf(stderr, "FAiled to update main cache tree\n");

make the above line something like this for i18n support:

fprintf_ln(stderr, _("Failed to update main cache tree"));

>
>                 commit_style = COMMIT_NORMAL;
>                 return index_lock.filename;
-- 
Duy

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit
  2014-07-13  5:09   ` Duy Nguyen
@ 2014-07-14 15:54     ` Junio C Hamano
  2014-07-14 17:33       ` Ramsay Jones
  2014-07-14 17:43       ` Junio C Hamano
  0 siblings, 2 replies; 28+ messages in thread
From: Junio C Hamano @ 2014-07-14 15:54 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: David Turner, Git Mailing List, David Turner

Duy Nguyen <pclouds@gmail.com> writes:

> On Sat, Jul 12, 2014 at 11:44 AM, David Turner <dturner@twopensource.com> wrote:
>> @@ -342,6 +342,15 @@ static char *prepare_index(int argc, const char **argv, const char *prefix,
>>
>>                 discard_cache();
>>                 read_cache_from(index_lock.filename);
>> +               if (update_main_cache_tree(WRITE_TREE_SILENT) == 0) {
>> +                       fd = open(index_lock.filename, O_WRONLY);
>> +                       if (fd >= 0)
>> +                               if (write_cache(fd, active_cache, active_nr) == 0) {
>> +                                       close_lock_file(&index_lock);
>
> If write_cache() returns a negative value, index.lock is probably
> corrupted. Should we die() instead of moving on and returning
> index_lock.filename to the caller? The caller may move index.lock to
> index later on and officially ruin "index".

Perhaps true, but worse yet, this will not play nicely together with
your split index series, no?  After taking the lock and writing and
closing, we spawn the interactive while still holding the lock, and
the "open" we see here is because we want to further update the same
under the same lock.  Perhaps write_locked_index() API in the split
index series can notice that the underlying fd in index_lock has
been closed earlier, realize that the call is to re-update the
index under the same lock and open the file again for writing?

Then we can update the above "open() then write_cache()" sequence
with just a call to "write_locked_index()".

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit
  2014-07-14 15:54     ` Junio C Hamano
@ 2014-07-14 17:33       ` Ramsay Jones
  2014-07-14 17:51         ` Junio C Hamano
  2014-07-14 17:43       ` Junio C Hamano
  1 sibling, 1 reply; 28+ messages in thread
From: Ramsay Jones @ 2014-07-14 17:33 UTC (permalink / raw)
  To: Junio C Hamano, Duy Nguyen; +Cc: David Turner, Git Mailing List, David Turner

On 14/07/14 16:54, Junio C Hamano wrote:
> Duy Nguyen <pclouds@gmail.com> writes:
> 
>> On Sat, Jul 12, 2014 at 11:44 AM, David Turner <dturner@twopensource.com> wrote:
>>> @@ -342,6 +342,15 @@ static char *prepare_index(int argc, const char **argv, const char *prefix,
>>>
>>>                 discard_cache();
>>>                 read_cache_from(index_lock.filename);
>>> +               if (update_main_cache_tree(WRITE_TREE_SILENT) == 0) {
>>> +                       fd = open(index_lock.filename, O_WRONLY);
>>> +                       if (fd >= 0)
>>> +                               if (write_cache(fd, active_cache, active_nr) == 0) {
>>> +                                       close_lock_file(&index_lock);
>>
>> If write_cache() returns a negative value, index.lock is probably
>> corrupted. Should we die() instead of moving on and returning
>> index_lock.filename to the caller? The caller may move index.lock to
>> index later on and officially ruin "index".
> 
> Perhaps true, but worse yet, this will not play nicely together with
> your split index series, no?  After taking the lock and writing and
> closing, we spawn the interactive while still holding the lock, and
> the "open" we see here is because we want to further update the same
> under the same lock.  Perhaps write_locked_index() API in the split
> index series can notice that the underlying fd in index_lock has
> been closed earlier, realize that the call is to re-update the
> index under the same lock and open the file again for writing?

Hmm, I was just about to suggest that there was some negative interplay
between the 'dt/cache-tree-repair' and 'nd/split-index' branches as well.

The pu branch fails the testsuite for me. In particular, t0090-cache-tree.sh
fails like so:

    $ ./t0090-cache-tree.sh -i -v
    ...
    ok 9 - second commit has cache-tree

    expecting success: 
    	cat <<-\EOT >foo.c &&
    	int foo()
    	{
    		return 42;
    	}
    	int bar()
    	{
    		return 42;
    	}
    	EOT
    	git add foo.c &&
    	test_invalid_cache_tree &&
    	git commit -m "add a file" &&
    	test_cache_tree &&
    	cat <<-\EOT >foo.c &&
    	int foo()
    	{
    		return 43;
    	}
    	int bar()
    	{
    		return 44;
    	}
    	EOT
    	(echo p; echo 1; echo; echo s; echo n; echo y; echo q) |
    	git commit --interactive -m foo &&
    	test_cache_tree
    
    [master d1075a6] add a file
     Author: A U Thor <author@example.com>
     1 file changed, 8 insertions(+)
     create mode 100644 foo.c
               staged     unstaged path
      1:    unchanged        +2/-2 foo.c
    
    *** Commands ***
      1: [s]tatus	  2: [u]pdate	  3: [r]evert	  4: [a]dd untracked
      5: [p]atch	  6: [d]iff	  7: [q]uit	  8: [h]elp
    What now>            staged     unstaged path
      1:    unchanged        +2/-2 [f]oo.c
    Patch update>>            staged     unstaged path
    * 1:    unchanged        +2/-2 [f]oo.c
    Patch update>> diff --git a/foo.c b/foo.c
    index 75522e2..3f7f049 100644
    --- a/foo.c
    +++ b/foo.c
    @@ -1,8 +1,8 @@
     int foo()
     {
    -return 42;
    +return 43;
     }
     int bar()
     {
    -return 42;
    +return 44;
     }
    Stage this hunk [y,n,q,a,d,/,s,e,?]? Split into 2 hunks.
    @@ -1,6 +1,6 @@
     int foo()
     {
    -return 42;
    +return 43;
     }
     int bar()
     {
    Stage this hunk [y,n,q,a,d,/,j,J,g,e,?]? @@ -4,5 +4,5 @@
     }
     int bar()
     {
    -return 42;
    +return 44;
     }
    Stage this hunk [y,n,q,a,d,/,K,g,e,?]? 
    *** Commands ***
      1: [s]tatus	  2: [u]pdate	  3: [r]evert	  4: [a]dd untracked
      5: [p]atch	  6: [d]iff	  7: [q]uit	  8: [h]elp
    What now> Bye.
    [master 65d7dde] foo
     Author: A U Thor <author@example.com>
     1 file changed, 1 insertion(+), 1 deletion(-)
    --- expect	2014-07-14 17:10:13.755209229 +0000
    +++ filtered	2014-07-14 17:10:13.763209258 +0000
    @@ -1 +1 @@
    -SHA  (3 entries, 0 subtrees)
    +invalid                                   (0 subtrees)
    not ok 10 - commit --interactive gives cache-tree on partial commit
    #	
    #		cat <<-\EOT >foo.c &&
    #		int foo()
    #		{
    #			return 42;
    #		}
    #		int bar()
    #		{
    #			return 42;
    #		}
    #		EOT
    #		git add foo.c &&
    #		test_invalid_cache_tree &&
    #		git commit -m "add a file" &&
    #		test_cache_tree &&
    #		cat <<-\EOT >foo.c &&
    #		int foo()
    #		{
    #			return 43;
    #		}
    #		int bar()
    #		{
    #			return 44;
    #		}
    #		EOT
    #		(echo p; echo 1; echo; echo s; echo n; echo y; echo q) |
    #		git commit --interactive -m foo &&
    #		test_cache_tree
    #	
    $ 

Note that I haven't even looked at the test failure itself yet.

However, I noticed that commit 002ccda ("cache-tree: write updated
cache-tree after commit", 11-07-2014) passes that test just fine, but
that the merge commit 7608c87e fails. Looking at the details of the
merge resolution, made me think of Duy's split index work.

I probably won't look at this further tonight, so this is just a
heads-up on a possible problem.

HTH

ATB,
Ramsay Jones

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit
  2014-07-14 15:54     ` Junio C Hamano
  2014-07-14 17:33       ` Ramsay Jones
@ 2014-07-14 17:43       ` Junio C Hamano
  2014-07-14 20:19         ` [PATCH v2] lockfile: allow reopening a closed but still locked file Junio C Hamano
  1 sibling, 1 reply; 28+ messages in thread
From: Junio C Hamano @ 2014-07-14 17:43 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: David Turner, Git Mailing List, David Turner

Junio C Hamano <gitster@pobox.com> writes:

> Duy Nguyen <pclouds@gmail.com> writes:
>
>> On Sat, Jul 12, 2014 at 11:44 AM, David Turner <dturner@twopensource.com> wrote:
>>> @@ -342,6 +342,15 @@ static char *prepare_index(int argc, const char **argv, const char *prefix,
>>>
>>>                 discard_cache();
>>>                 read_cache_from(index_lock.filename);
>>> +               if (update_main_cache_tree(WRITE_TREE_SILENT) == 0) {
>>> +                       fd = open(index_lock.filename, O_WRONLY);
>>> +                       if (fd >= 0)
>>> +                               if (write_cache(fd, active_cache, active_nr) == 0) {
>>> +                                       close_lock_file(&index_lock);
>>
>> If write_cache() returns a negative value, index.lock is probably
>> corrupted. Should we die() instead of moving on and returning
>> index_lock.filename to the caller? The caller may move index.lock to
>> index later on and officially ruin "index".
>
> Perhaps true, but worse yet, this will not play nicely together with
> your split index series, no?  After taking the lock and writing and
> closing, we spawn the interactive while still holding the lock, and
> the "open" we see here is because we want to further update the same
> under the same lock.  Perhaps write_locked_index() API in the split
> index series can notice that the underlying fd in index_lock has
> been closed earlier, realize that the call is to re-update the
> index under the same lock and open the file again for writing?
>
> Then we can update the above "open() then write_cache()" sequence
> with just a call to "write_locked_index()".

Or perhaps introduce reopen_locked_file() and have these rare
callers that want to re-update after they closed the file call it,
in order to avoid mistake?

Perhaps something like this on top of your series?

-- >8 --
Subject: [PATCH] lockfile: allow reopening a closed but still locked file

In some code paths (e.g. giving "add -i" to prepare the contents to
be committed interactively inside "commit -p") where a caller takes
a lock, writes the new content, give chance for others to use it
while still holding the lock, and then releases the lock when all is
done.  As an extension, allow the caller to re-update an already
closed file while still holding the lock (i.e. not yet committed) by
re-opening the file, to be followed by updating the contents and
then by the usual close_lock_file() or commit_lock_file().

This is necessary if we want to add code to rebuild the cache-tree
and write the resulting index out after "add -i" returns the control
to "commit -p", for example.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache.h    |  7 ++++---
 lockfile.c | 51 +++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/cache.h b/cache.h
index c6b7770..a9344cf 100644
--- a/cache.h
+++ b/cache.h
@@ -567,12 +567,13 @@ extern NORETURN void unable_to_lock_index_die(const char *path, int err);
 extern int hold_lock_file_for_update(struct lock_file *, const char *path, int);
 extern int hold_lock_file_for_append(struct lock_file *, const char *path, int);
 extern int commit_lock_file(struct lock_file *);
-extern void update_index_if_able(struct index_state *, struct lock_file *);
+extern int reopen_lock_file(struct lock_file *);
+extern int close_lock_file(struct lock_file *);
+extern void rollback_lock_file(struct lock_file *);
 
+extern void update_index_if_able(struct index_state *, struct lock_file *);
 extern int hold_locked_index(struct lock_file *, int);
 extern void set_alternate_index_output(const char *);
-extern int close_lock_file(struct lock_file *);
-extern void rollback_lock_file(struct lock_file *);
 extern int delete_ref(const char *, const unsigned char *sha1, int delopt);
 
 /* Environment bits from configuration mechanism */
diff --git a/lockfile.c b/lockfile.c
index b706614..618a4b2 100644
--- a/lockfile.c
+++ b/lockfile.c
@@ -120,21 +120,8 @@ static char *resolve_symlink(char *p, size_t s)
 	return p;
 }
 
-
-static int lock_file(struct lock_file *lk, const char *path, int flags)
+static int lock_file_open(struct lock_file *lk)
 {
-	/*
-	 * subtract 5 from size to make sure there's room for adding
-	 * ".lock" for the lock file name
-	 */
-	static const size_t max_path_len = sizeof(lk->filename) - 5;
-
-	if (strlen(path) >= max_path_len)
-		return -1;
-	strcpy(lk->filename, path);
-	if (!(flags & LOCK_NODEREF))
-		resolve_symlink(lk->filename, max_path_len);
-	strcat(lk->filename, ".lock");
 	lk->fd = open(lk->filename, O_RDWR | O_CREAT | O_EXCL, 0666);
 	if (0 <= lk->fd) {
 		if (!lock_file_list) {
@@ -156,6 +143,23 @@ static int lock_file(struct lock_file *lk, const char *path, int flags)
 	return lk->fd;
 }
 
+static int lock_file(struct lock_file *lk, const char *path, int flags)
+{
+	/*
+	 * subtract 5 from size to make sure there's room for adding
+	 * ".lock" for the lock file name
+	 */
+	static const size_t max_path_len = sizeof(lk->filename) - 5;
+
+	if (strlen(path) >= max_path_len)
+		return -1;
+	strcpy(lk->filename, path);
+	if (!(flags & LOCK_NODEREF))
+		resolve_symlink(lk->filename, max_path_len);
+	strcat(lk->filename, ".lock");
+	return lock_file_open(lk);
+}
+
 static char *unable_to_lock_message(const char *path, int err)
 {
 	struct strbuf buf = STRBUF_INIT;
@@ -221,6 +225,25 @@ int hold_lock_file_for_append(struct lock_file *lk, const char *path, int flags)
 	return fd;
 }
 
+/*
+ * After calling close_lock_file() to flush the contents to the
+ * disk, without releasing the lock, this can be called to reopen
+ * to update the contents of the file.  An expected calling sequence
+ * is
+ *
+ * hold_lock_file(lk), write(lk->fd), close_lock_file(lk),
+ * reopen_lock_file(lk), write(lk->fd), close_lock_file(lk),
+ * commit_lock_file(lk).
+ */
+int reopen_lock_file(struct lock_file *lk)
+{
+	if (0 <= lk->fd)
+		die(_("BUG: attempt to reopen an unclosed lockfile"));
+	if (!lk->filename[0])
+		die(_("BUG: attempt to reopen a committed lockfile"));
+	return lock_file_open(lk);
+}
+
 int close_lock_file(struct lock_file *lk)
 {
 	int fd = lk->fd;
-- 
2.0.1-812-g56968e3

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit
  2014-07-14 17:33       ` Ramsay Jones
@ 2014-07-14 17:51         ` Junio C Hamano
  2014-07-14 18:41           ` Ramsay Jones
  0 siblings, 1 reply; 28+ messages in thread
From: Junio C Hamano @ 2014-07-14 17:51 UTC (permalink / raw)
  To: Ramsay Jones; +Cc: Duy Nguyen, David Turner, Git Mailing List, David Turner

Ramsay Jones <ramsay@ramsay1.demon.co.uk> writes:

> that the merge commit 7608c87e fails. Looking at the details of the
> merge resolution, made me think of Duy's split index work.

Yes, there is a deliberately dropped hunk from dt/cache-tree-repair
in that merge, because the topic relied on being able to say "here
is the file descriptor, write the index to it", which no longer is
available with the split-index topic.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit
  2014-07-14 17:51         ` Junio C Hamano
@ 2014-07-14 18:41           ` Ramsay Jones
  2014-07-14 22:16             ` Junio C Hamano
  0 siblings, 1 reply; 28+ messages in thread
From: Ramsay Jones @ 2014-07-14 18:41 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Duy Nguyen, David Turner, Git Mailing List, David Turner

On 14/07/14 18:51, Junio C Hamano wrote:
> Ramsay Jones <ramsay@ramsay1.demon.co.uk> writes:
> 
>> that the merge commit 7608c87e fails. Looking at the details of the
>> merge resolution, made me think of Duy's split index work.
> 
> Yes, there is a deliberately dropped hunk from dt/cache-tree-repair
> in that merge, because the topic relied on being able to say "here
> is the file descriptor, write the index to it", which no longer is
> available with the split-index topic.

Ah, OK. Sounds like everything is under control then.

Thanks.

ATB,
Ramsay Jones

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v2] lockfile: allow reopening a closed but still locked file
  2014-07-14 17:43       ` Junio C Hamano
@ 2014-07-14 20:19         ` Junio C Hamano
  0 siblings, 0 replies; 28+ messages in thread
From: Junio C Hamano @ 2014-07-14 20:19 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: David Turner, Git Mailing List, David Turner, Ramsay Jones

In some code paths (e.g. giving "add -i" to prepare the contents to
be committed interactively inside "commit -p") where a caller takes
a lock, writes the new content, give chance for others to use it
while still holding the lock, and then releases the lock when all is
done.  As an extension, allow the caller to re-update an already
closed file while still holding the lock (i.e. not yet committed) by
re-opening the file, to be followed by updating the contents and
then by the usual close_lock_file() or commit_lock_file().

This is necessary if we want to add code to rebuild the cache-tree
and write the resulting index out after "add -i" returns the control
to "commit -p", for example.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---

 * I was being silly in the first attemt as usual ;-)  There is not
   much to be shared between the reopen and initial open code path
   in that we do not want to give O_EXCL to open(), we know the file
   exists so O_CREAT is not necessary, we know we have added it to
   the unlocked-atexit list and we already know that we have futzed
   with its permission bits.

   With this added on top of the nd/split-index series, the conflict
   resolution for the "interactive" codepath in builtin/commit.c
   with dt/cache-tree-repair topic would become like this:

  @@ -340,6 +340,13 @@ static char *prepare_index(int argc, const char

                  discard_cache();
                  read_cache_from(index_lock.filename);
  +               if (update_main_cache_tree(WRITE_TREE_SILENT) == 0) {
  +                       if (reopen_lock_file(&index_lock) < 0)
  +                               die(_("unable to write index file"));
  +                       if (write_locked_index(&the_index, &index_lock, CLOSE_LOCK))
  +                               die(_("unable to update temporary index"));
  +               } else
  +                       warning(_("Failed to update main cache tree"));

                  commit_style = COMMIT_NORMAL;
                  return index_lock.filename;

 cache.h    |  1 +
 lockfile.c | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/cache.h b/cache.h
index c6b7770..b780794 100644
--- a/cache.h
+++ b/cache.h
@@ -567,6 +567,7 @@ extern NORETURN void unable_to_lock_index_die(const char *path, int err);
 extern int hold_lock_file_for_update(struct lock_file *, const char *path, int);
 extern int hold_lock_file_for_append(struct lock_file *, const char *path, int);
 extern int commit_lock_file(struct lock_file *);
+extern int reopen_lock_file(struct lock_file *);
 extern void update_index_if_able(struct index_state *, struct lock_file *);
 
 extern int hold_locked_index(struct lock_file *, int);
diff --git a/lockfile.c b/lockfile.c
index b706614..9c12ec5 100644
--- a/lockfile.c
+++ b/lockfile.c
@@ -228,6 +228,16 @@ int close_lock_file(struct lock_file *lk)
 	return close(fd);
 }
 
+int reopen_lock_file(struct lock_file *lk)
+{
+	if (0 <= lk->fd)
+		die(_("BUG: reopen a lockfile that is still open"));
+	if (!lk->filename[0])
+		die(_("BUG: reopen a lockfile that has been committed"));
+	lk->fd = open(lk->filename, O_WRONLY);
+	return lk->fd;
+}
+
 int commit_lock_file(struct lock_file *lk)
 {
 	char result_file[PATH_MAX];
-- 
2.0.1-814-g49d294e

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit
  2014-07-14 18:41           ` Ramsay Jones
@ 2014-07-14 22:16             ` Junio C Hamano
  2014-07-14 22:32               ` David Turner
  2014-07-15  2:15               ` Duy Nguyen
  0 siblings, 2 replies; 28+ messages in thread
From: Junio C Hamano @ 2014-07-14 22:16 UTC (permalink / raw)
  To: Ramsay Jones; +Cc: Duy Nguyen, David Turner, Git Mailing List, David Turner

Ramsay Jones <ramsay@ramsay1.demon.co.uk> writes:

> On 14/07/14 18:51, Junio C Hamano wrote:
>> Ramsay Jones <ramsay@ramsay1.demon.co.uk> writes:
>> 
>>> that the merge commit 7608c87e fails. Looking at the details of the
>>> merge resolution, made me think of Duy's split index work.
>> 
>> Yes, there is a deliberately dropped hunk from dt/cache-tree-repair
>> in that merge, because the topic relied on being able to say "here
>> is the file descriptor, write the index to it", which no longer is
>> available with the split-index topic.
>
> Ah, OK. Sounds like everything is under control then.

Wasn't, but now I think it is ;-)

David, could you please double check the conflict resolution at
882426ea (Merge branch 'dt/cache-tree-repair' into jch, 2014-07-14),
at about the middle between master..pu?  By eyeballing

    git diff 882426ea^ 882426ea

we should see what your series would have done if it were based on
top of the nd/split-index topic.  The most iffy is the first hunk of
change to builtin/commit.c, which is more or less my rewrite of what
you did on top of 'master'.

The change to builtin/checkout.c also seems somewhat iffy in that we
treat the_index.cache_tree (aka "active_cache_tree") as if cache
trees are something we can manipulate independent of a particular
index_state (which has been the rule for a long time), even though
in the world order after nd/split-index topic, cache_tree_update()
can no longer be used on a cache-tree that is not associated to a
particular index_state.  It is not a problem with your series, but
comes from nd/split-index topic, and it might indicate a slight
unevenness of the API (i.e. we may want to either insist that the
public API to muck with a cache-tree outside cache-tree.c must be
accessed via an index-state and never via a bare cache-tree
structure, by insisting that cache_tree_fully_valid() to take a
pointer to an index-state as well; or we may want to go the other
way and allow API users to pass a bare cache-tree without the
index-state when the latter is not absolutely necessary, by changing
cache_tree_update() to take a cache-tree, not an index-state).

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit
  2014-07-14 22:16             ` Junio C Hamano
@ 2014-07-14 22:32               ` David Turner
  2014-07-15  2:15               ` Duy Nguyen
  1 sibling, 0 replies; 28+ messages in thread
From: David Turner @ 2014-07-14 22:32 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Ramsay Jones, Duy Nguyen, Git Mailing List, David Turner

On Mon, 2014-07-14 at 15:16 -0700, Junio C Hamano wrote:
> Ramsay Jones <ramsay@ramsay1.demon.co.uk> writes:
> 
> > On 14/07/14 18:51, Junio C Hamano wrote:
> >> Ramsay Jones <ramsay@ramsay1.demon.co.uk> writes:
> >> 
> >>> that the merge commit 7608c87e fails. Looking at the details of the
> >>> merge resolution, made me think of Duy's split index work.
> >> 
> >> Yes, there is a deliberately dropped hunk from dt/cache-tree-repair
> >> in that merge, because the topic relied on being able to say "here
> >> is the file descriptor, write the index to it", which no longer is
> >> available with the split-index topic.
> >
> > Ah, OK. Sounds like everything is under control then.
> 
> Wasn't, but now I think it is ;-)
> 
> David, could you please double check the conflict resolution at
> 882426ea (Merge branch 'dt/cache-tree-repair' into jch, 2014-07-14),
> at about the middle between master..pu?  By eyeballing
> 
>     git diff 882426ea^ 882426ea
> 
> we should see what your series would have done if it were based on
> top of the nd/split-index topic.  The most iffy is the first hunk of
> change to builtin/commit.c, which is more or less my rewrite of what
> you did on top of 'master'.

LGTM.  And the tests all pass.

> The change to builtin/checkout.c also seems somewhat iffy in that we
> treat the_index.cache_tree (aka "active_cache_tree") as if cache
> trees are something we can manipulate independent of a particular
> index_state (which has been the rule for a long time), even though
> in the world order after nd/split-index topic, cache_tree_update()
> can no longer be used on a cache-tree that is not associated to a
> particular index_state.  It is not a problem with your series, but
> comes from nd/split-index topic, and it might indicate a slight
> unevenness of the API (i.e. we may want to either insist that the
> public API to muck with a cache-tree outside cache-tree.c must be
> accessed via an index-state and never via a bare cache-tree
> structure, by insisting that cache_tree_fully_valid() to take a
> pointer to an index-state as well; or we may want to go the other
> way and allow API users to pass a bare cache-tree without the
> index-state when the latter is not absolutely necessary, by changing
> cache_tree_update() to take a cache-tree, not an index-state).

I agree that some sort of API updates like would be an improvement. But
this seems to work for now.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit
  2014-07-14 22:16             ` Junio C Hamano
  2014-07-14 22:32               ` David Turner
@ 2014-07-15  2:15               ` Duy Nguyen
  2014-07-15  6:38                 ` Junio C Hamano
  1 sibling, 1 reply; 28+ messages in thread
From: Duy Nguyen @ 2014-07-15  2:15 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Ramsay Jones, David Turner, Git Mailing List, David Turner

On Tue, Jul 15, 2014 at 5:16 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Ramsay Jones <ramsay@ramsay1.demon.co.uk> writes:
>
>> On 14/07/14 18:51, Junio C Hamano wrote:
>>> Ramsay Jones <ramsay@ramsay1.demon.co.uk> writes:
>>>
>>>> that the merge commit 7608c87e fails. Looking at the details of the
>>>> merge resolution, made me think of Duy's split index work.
>>>
>>> Yes, there is a deliberately dropped hunk from dt/cache-tree-repair
>>> in that merge, because the topic relied on being able to say "here
>>> is the file descriptor, write the index to it", which no longer is
>>> available with the split-index topic.
>>
>> Ah, OK. Sounds like everything is under control then.
>
> Wasn't, but now I think it is ;-)
>
> David, could you please double check the conflict resolution at
> 882426ea (Merge branch 'dt/cache-tree-repair' into jch, 2014-07-14),
> at about the middle between master..pu?  By eyeballing
>
>     git diff 882426ea^ 882426ea
>
> we should see what your series would have done if it were based on
> top of the nd/split-index topic.  The most iffy is the first hunk of
> change to builtin/commit.c, which is more or less my rewrite of what
> you did on top of 'master'.

It makes me wonder if a cleaner way of rebuilding cache-treei in this
case is from git-add--interactive.perl, or by simply spawn "git
update-index --rebuild-cache-tree" after running
git-add--interactive.perl.
-- 
Duy

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit
  2014-07-15  2:15               ` Duy Nguyen
@ 2014-07-15  6:38                 ` Junio C Hamano
  2014-07-15 10:23                   ` Duy Nguyen
  0 siblings, 1 reply; 28+ messages in thread
From: Junio C Hamano @ 2014-07-15  6:38 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Ramsay Jones, David Turner, Git Mailing List, David Turner

On Mon, Jul 14, 2014 at 7:15 PM, Duy Nguyen <pclouds@gmail.com> wrote:
>
> It makes me wonder if a cleaner way of rebuilding cache-treei in this
> case is from git-add--interactive.perl, or by simply spawn "git
> update-index --rebuild-cache-tree" after running
> git-add--interactive.perl.

We could check if the cache-tree has fully been populated by
"add -i" and limit the rebuilding by "git commit -p" main process,
but if "add -i" did not do so, there is no reason why "git commit -p"
should not do so, without relying on the implementation of "add -i"
to do so.

At least to me, what you suggested sounds nothing more than
a cop-out; instead of lifting the limitation of the API by enhancing
it with reopen-lock-file, punting to shift the burden elsewhere. I
am not quite sure why that is cleaner, but perhaps I am missing
something.

In the longer run, it would be plausible that somebody would want
to rewrite "git-add -i" and will make it just a C API call away without
having to spawn a separate process. You cannot punt by saying
"make 'add -i' responsible for it" at that point; you will be doing
what 'add -i' would be doing yourself, no?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit
  2014-07-15  6:38                 ` Junio C Hamano
@ 2014-07-15 10:23                   ` Duy Nguyen
  2014-07-15 16:45                     ` Junio C Hamano
  0 siblings, 1 reply; 28+ messages in thread
From: Duy Nguyen @ 2014-07-15 10:23 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Ramsay Jones, David Turner, Git Mailing List, David Turner

On Mon, Jul 14, 2014 at 11:38:06PM -0700, Junio C Hamano wrote:
> On Mon, Jul 14, 2014 at 7:15 PM, Duy Nguyen <pclouds@gmail.com> wrote:
> >
> > It makes me wonder if a cleaner way of rebuilding cache-treei in this
> > case is from git-add--interactive.perl, or by simply spawn "git
> > update-index --rebuild-cache-tree" after running
> > git-add--interactive.perl.
> 
> We could check if the cache-tree has fully been populated by
> "add -i" and limit the rebuilding by "git commit -p" main process,
> but if "add -i" did not do so, there is no reason why "git commit -p"
> should not do so, without relying on the implementation of "add -i"
> to do so.
> 
> At least to me, what you suggested sounds nothing more than
> a cop-out; instead of lifting the limitation of the API by enhancing
> it with reopen-lock-file, punting to shift the burden elsewhere. I
> am not quite sure why that is cleaner, but perhaps I am missing
> something.

Reopen-lock-file to me does not sound like an enhancement, at least in
the index update context. We have always updated the index by writing
to a new file, then rename. Now if the new write_locked_index() blows
up after reopen_lock_file(), we have no way but die() because
index_lock.filename can no longer be trusted.

Doing it in a separate process keeps the tradition. And if the second
write fails, we still have good index_lock.filename. We could also do
the same here with something like this without new process (lightly
tested):

-- 8< --
diff --git a/builtin/commit.c b/builtin/commit.c
index 606c890..c2b4875 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -340,6 +340,18 @@ static char *prepare_index(int argc, const char **argv, const char *prefix,
 
 		discard_cache();
 		read_cache_from(index_lock.filename);
+		if (update_main_cache_tree(WRITE_TREE_SILENT) == 0) {
+			static struct lock_file second_index_lock;
+			hold_lock_file_for_update(&second_index_lock,
+						  index_lock.filename,
+						  LOCK_DIE_ON_ERROR);
+			if (write_locked_index(&the_index, &second_index_lock,
+					       COMMIT_LOCK)) {
+				warning(_("Failed to update main cache tree"));
+				rollback_lock_file(&second_index_lock);
+			}
+		} else
+			warning(_("Failed to update main cache tree"));
 
 		commit_style = COMMIT_NORMAL;
 		return index_lock.filename;
-- 8< --

I was worried about the dependency between second_index_lock and
index_lock, but at cleanup, all we do is rollback, which is safe
regardless of dependencies. Just need to make sure we commit
second_index_lock before index_lock.

> In the longer run, it would be plausible that somebody would want
> to rewrite "git-add -i" and will make it just a C API call away without
> having to spawn a separate process. You cannot punt by saying
> "make 'add -i' responsible for it" at that point; you will be doing
> what 'add -i' would be doing yourself, no?

Yes, but at current rewrite rate I'd bet it won't happen in the next
two years at least.
--
Duy

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit
  2014-07-15 10:23                   ` Duy Nguyen
@ 2014-07-15 16:45                     ` Junio C Hamano
  2014-07-16 10:18                       ` Duy Nguyen
  0 siblings, 1 reply; 28+ messages in thread
From: Junio C Hamano @ 2014-07-15 16:45 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Ramsay Jones, David Turner, Git Mailing List, David Turner

Duy Nguyen <pclouds@gmail.com> writes:

> On Mon, Jul 14, 2014 at 11:38:06PM -0700, Junio C Hamano wrote:
>> On Mon, Jul 14, 2014 at 7:15 PM, Duy Nguyen <pclouds@gmail.com> wrote:
>> >
>> > It makes me wonder if a cleaner way of rebuilding cache-treei in this
>> > case is from git-add--interactive.perl, or by simply spawn "git
>> > update-index --rebuild-cache-tree" after running
>> > git-add--interactive.perl.
>> 
>> We could check if the cache-tree has fully been populated by
>> "add -i" and limit the rebuilding by "git commit -p" main process,
>> but if "add -i" did not do so, there is no reason why "git commit -p"
>> should not do so, without relying on the implementation of "add -i"
>> to do so.
>> 
>> At least to me, what you suggested sounds nothing more than
>> a cop-out; instead of lifting the limitation of the API by enhancing
>> it with reopen-lock-file, punting to shift the burden elsewhere. I
>> am not quite sure why that is cleaner, but perhaps I am missing
>> something.
>
> Reopen-lock-file to me does not sound like an enhancement, at least in
> the index update context. We have always updated the index by writing
> to a new file, then rename.

Time to step back and think, I think.

What is the real point of "writing into *.lock and renaming"?  It
serves two purposes: (1) everybody adheres to that convention---if
we managed to take the lock "index.lock", nobody else will compete
and interfere with us until we rename it to "index". (2) in the
meantime, others can still read the old contents in the original
"index".

With "take lock", "write to a temporary", "commit by rename or
rollback by delete", we can have the usual transactional semantics.
While we are working on it, nobody is allowed to muck with the same
file, because everybody pays attention to *.lock.  People will not
see what we are doing until we release the lock because we are not
writing into the primary file.  And people can see what we did in
its entirety once we are done because we close and rename to commit
our changes atomically.

Think what CLOSE_LOCK adds to that and you would appreciate its
usefulness and at the same time realize its limitation.  By allowing
us to flush what we wrote to the disk without releasing the lock, we
can give others (e.g. subprograms we spawn) controlled access to the
new contents we prepared before we commit the result to the outside
world.  The access is controlled because we are in control when we
tell these others to peek into or update the temporary file "*.lock".

The original implementaiton of CLOSE_LOCK is limited in that you can
do so only once; you take a lock, you do your thing, you close, you
let (one or more) others see it, and you commit (or rollback).  You
cannot do another of your thing once you close with the original
implementation because there was no way to reopen.

What do you gain by your proposal to lock "index.lock" file?  We
know we already have "index.lock", so nobody should be competing on
mucking with its contents with us and we gain nothing by writing
into index.lock.lock and renaming it to index.lock.  We are in total
control of the lifetime of index.lock, when we spawn subprograms on
it to let them write to it, when we ourselves write to it, when we
spawn subprograms on it to let them read from it, all under the lock
we have on the "index", i.e. "index.lock".

The only thing use of another temporary file (that does not have to
be a lock on "index.lock", by the way, because we have total control
over when and who updates the file while we hold the "index.lock")
would give you is that it allows you to make the success of the "do
another of your thing" step optional.  While holding the lock, you
close and let "add -i" work on it, and after it returns, instead of
reopening, you write into yet another "index.lock.lock", expecting
that it might fail and when it fails you can roll it back, leaving
the contents "add -i" left in "index.lock" intact.  If you do not
use the extra temporary file, you start from "index.lock" left by
"add -i", write the updated index into "index.lock" and if you fail
to write, you have to roll back the entire "index"---you lose the
option to use the index left by "add -i" without repopulated
cache-tree.  But in the index update context, I do not think such a
complexity is not necessary.  If something fails, we should fail and
roll back the entire "index".

> Now if the new write_locked_index() blows
> up after reopen_lock_file(), we have no way but die() because
> index_lock.filename can no longer be trusted.

If that is the case, lock.filename[] is getting clobbered by
somebody too early, isn't it?  I think Michael's mh/lockfile topic
(dropped from my tree due to getting stale without rerolling) used a
separate flag to indicate the validity without mucking with the
filename member, and we may want to do something like that as the
right solution to that problem.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit
  2014-07-15 16:45                     ` Junio C Hamano
@ 2014-07-16 10:18                       ` Duy Nguyen
  2014-07-16 17:33                         ` Junio C Hamano
  0 siblings, 1 reply; 28+ messages in thread
From: Duy Nguyen @ 2014-07-16 10:18 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Ramsay Jones, David Turner, Git Mailing List, David Turner

On Tue, Jul 15, 2014 at 11:45 PM, Junio C Hamano <gitster@pobox.com> wrote:
> What is the real point of "writing into *.lock and renaming"?  It
> serves two purposes: (1) everybody adheres to that convention---if
> we managed to take the lock "index.lock", nobody else will compete
> and interfere with us until we rename it to "index". (2) in the
> meantime, others can still read the old contents in the original
> "index".
>
> With "take lock", "write to a temporary", "commit by rename or
> rollback by delete", we can have the usual transactional semantics.
> While we are working on it, nobody is allowed to muck with the same
> file, because everybody pays attention to *.lock.  People will not
> see what we are doing until we release the lock because we are not
> writing into the primary file.  And people can see what we did in
> its entirety once we are done because we close and rename to commit
> our changes atomically.

True.

> Think what CLOSE_LOCK adds to that and you would appreciate its
> usefulness and at the same time realize its limitation.  By allowing
> us to flush what we wrote to the disk without releasing the lock, we
> can give others (e.g. subprograms we spawn) controlled access to the
> new contents we prepared before we commit the result to the outside
> world.  The access is controlled because we are in control when we
> tell these others to peek into or update the temporary file "*.lock".
>
> The original implementaiton of CLOSE_LOCK is limited in that you can
> do so only once; you take a lock, you do your thing, you close, you
> let (one or more) others see it, and you commit (or rollback).  You
> cannot do another of your thing once you close with the original
> implementation because there was no way to reopen.

This is probably where our opinions differ. Yes, if you are sure
nobody else is looking at the lock file any more, then you can do
whatever you want. And because this is a .lock file, nobody is
supposed to look at it unless you tell them too (in contrast
$GIT_DIR/index can be read at any time). The format of the index makes
it impossible to just edit one byte and be done with it. You always
write a full new file. By sticking to transaction-style update, you
need no extra code, and you have a back up file as a side effect.

> What do you gain by your proposal to lock "index.lock" file?  We
> know we already have "index.lock", so nobody should be competing on
> mucking with its contents with us and we gain nothing by writing
> into index.lock.lock and renaming it to index.lock.  We are in total
> control of the lifetime of index.lock, when we spawn subprograms on
> it to let them write to it, when we ourselves write to it, when we
> spawn subprograms on it to let them read from it, all under the lock
> we have on the "index", i.e. "index.lock".
>
> The only thing use of another temporary file (that does not have to
> be a lock on "index.lock", by the way, because we have total control
> over when and who updates the file while we hold the "index.lock")
> would give you is that it allows you to make the success of the "do
> another of your thing" step optional.  While holding the lock, you
> close and let "add -i" work on it, and after it returns, instead of
> reopening, you write into yet another "index.lock.lock", expecting
> that it might fail and when it fails you can roll it back, leaving
> the contents "add -i" left in "index.lock" intact.  If you do not
> use the extra temporary file, you start from "index.lock" left by
> "add -i", write the updated index into "index.lock" and if you fail
> to write, you have to roll back the entire "index"---you lose the
> option to use the index left by "add -i" without repopulated
> cache-tree.  But in the index update context, I do not think such a
> complexity is not necessary.  If something fails, we should fail and
> roll back the entire "index".

I probably look at the problem from a wrong angle. To me the result of
"commit -p" is precious. I'm not a big user of "commit -p" myself as I
prefer "add -p" but it's the same: it'd be frustrating if after you
have carefully added your chunks, the program aborts and you have to
start over. And not just a few chunks. Think of reviewing .po files
and approve strings by the means of adding them to the index. Perhaps
because _I_ as a developer see this cache-tree update step optional
and react to it unnecessarily. Ordinary users won't see any
difference. And perhaps a better way to save the result of "commit/add
-p" is some sort of index log, not be over-protective at this
"interactive commit" code block.

I don't feel strongly either way. So your call.
-- 
Duy

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit
  2014-07-16 10:18                       ` Duy Nguyen
@ 2014-07-16 17:33                         ` Junio C Hamano
  0 siblings, 0 replies; 28+ messages in thread
From: Junio C Hamano @ 2014-07-16 17:33 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Ramsay Jones, David Turner, Git Mailing List, David Turner

Duy Nguyen <pclouds@gmail.com> writes:

>> ....  If you do not
>> use the extra temporary file, you start from "index.lock" left by
>> "add -i", write the updated index into "index.lock" and if you fail
>> to write, you have to roll back the entire "index"---you lose the
>> option to use the index left by "add -i" without repopulated
>> cache-tree.  But in the index update context, I do not think such a
>> complexity is not necessary.  If something fails, we should fail and
>> roll back the entire "index".
>
> I probably look at the problem from a wrong angle. To me the result of
> "commit -p" is precious. I'm not a big user of "commit -p" myself as I
> prefer "add -p" but it's the same...

Oh, we agree that the result of "commit -p" is precious.

But the point of David's series is to change the definition of the
"precious result" to not just "commit is made as asked", but now
also to include that "the index the user will use for continued work
will have populated cache-tree".  The series thinks both are precious.

As you can probably read from my review responses, I am not sold to
the idea that spending extra cycles to pre-populate cache-tree is
100% good idea, but if we _were_ to accept that it is a good idea,
it logically follows that failing to populate cache-tree is just as
a failure as failing to commit.

In any case, it is unlikely for writing out the updated index with
refreshed cache-tree to fail and blow away the partially built index
(we do not even attempt to reopen/update if we cannot prepare
in-core cache-tree), so I do not think it is such a big deal either
way.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkou
  2014-07-12  4:44 [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkout David Turner
                   ` (2 preceding siblings ...)
  2014-07-12  4:44 ` [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit David Turner
@ 2014-08-31 12:07 ` John Keeping
  2014-09-01 20:49   ` David Turner
  2014-09-02 20:52   ` Junio C Hamano
  3 siblings, 2 replies; 28+ messages in thread
From: John Keeping @ 2014-08-31 12:07 UTC (permalink / raw)
  To: David Turner; +Cc: git, David Turner

On Fri, Jul 11, 2014 at 09:44:33PM -0700, David Turner wrote:
> When git checkout checks out a branch, create or update the
> cache-tree so that subsequent operations are faster.
> 
> update_main_cache_tree learned a new flag, WRITE_TREE_REPAIR.  When
> WRITE_TREE_REPAIR is set, portions of the cache-tree which do not
> correspond to existing tree objects are invalidated (and portions which
> do are marked as valid).  No new tree objects are created.
> 
> Signed-off-by: David Turner <dturner@twitter.com>
> ---

This causes an incorrect error message to be printed when switching
branches with staged changes in a subdirectory.  The test case is pretty
simple:

	git init test &&
	cd test &&
	mkdir sub &&
	echo one >sub/one &&
	git add sub/one &&
	git commit -m one &&
	echo two >sub/two &&
	git add sub/two &&
	git checkout -b test

After this commit the output is:

	error: invalid object 040000 0000000000000000000000000000000000000000 for 'bar'
	A       bar/quux
	Switched to branch 'test'

but the "error:" line should not be there.

Everything still works, but I think the error message is a bit scary
considering that this isn't actually an error.

>  builtin/checkout.c    |  8 ++++++++
>  cache-tree.c          | 12 +++++++++++-
>  cache-tree.h          |  1 +
>  t/t0090-cache-tree.sh | 19 ++++++++++++++++---
>  4 files changed, 36 insertions(+), 4 deletions(-)
> 
> diff --git a/builtin/checkout.c b/builtin/checkout.c
> index 07cf555..054214f 100644
> --- a/builtin/checkout.c
> +++ b/builtin/checkout.c
> @@ -553,6 +553,14 @@ static int merge_working_tree(const struct checkout_opts *opts,
>  		}
>  	}
>  
> +	if (!active_cache_tree)
> +		active_cache_tree = cache_tree();
> +
> +	if (!cache_tree_fully_valid(active_cache_tree))
> +		cache_tree_update(active_cache_tree,
> +				  (const struct cache_entry * const *)active_cache,
> +				  active_nr, WRITE_TREE_SILENT | WRITE_TREE_REPAIR);
> +
>  	if (write_cache(newfd, active_cache, active_nr) ||
>  	    commit_locked_index(lock_file))
>  		die(_("unable to write new index file"));
> diff --git a/cache-tree.c b/cache-tree.c
> index 7fa524a..f951d7d 100644
> --- a/cache-tree.c
> +++ b/cache-tree.c
> @@ -239,9 +239,12 @@ static int update_one(struct cache_tree *it,
>  	struct strbuf buffer;
>  	int missing_ok = flags & WRITE_TREE_MISSING_OK;
>  	int dryrun = flags & WRITE_TREE_DRY_RUN;
> +	int repair = flags & WRITE_TREE_REPAIR;
>  	int to_invalidate = 0;
>  	int i;
>  
> +	assert(!(dryrun && repair));
> +
>  	*skip_count = 0;
>  
>  	if (0 <= it->entry_count && has_sha1_file(it->sha1))
> @@ -374,7 +377,14 @@ static int update_one(struct cache_tree *it,
>  #endif
>  	}
>  
> -	if (dryrun)
> +	if (repair) {
> +		unsigned char sha1[20];
> +		hash_sha1_file(buffer.buf, buffer.len, tree_type, sha1);
> +		if (has_sha1_file(sha1))
> +			hashcpy(it->sha1, sha1);
> +		else
> +			to_invalidate = 1;
> +	} else if (dryrun)
>  		hash_sha1_file(buffer.buf, buffer.len, tree_type, it->sha1);
>  	else if (write_sha1_file(buffer.buf, buffer.len, tree_type, it->sha1)) {
>  		strbuf_release(&buffer);
> diff --git a/cache-tree.h b/cache-tree.h
> index f1923ad..666d18f 100644
> --- a/cache-tree.h
> +++ b/cache-tree.h
> @@ -39,6 +39,7 @@ int update_main_cache_tree(int);
>  #define WRITE_TREE_IGNORE_CACHE_TREE 2
>  #define WRITE_TREE_DRY_RUN 4
>  #define WRITE_TREE_SILENT 8
> +#define WRITE_TREE_REPAIR 16
>  
>  /* error return codes */
>  #define WRITE_TREE_UNREADABLE_INDEX (-1)
> diff --git a/t/t0090-cache-tree.sh b/t/t0090-cache-tree.sh
> index 6c33e28..98fb1ab 100755
> --- a/t/t0090-cache-tree.sh
> +++ b/t/t0090-cache-tree.sh
> @@ -44,14 +44,14 @@ test_expect_success 'read-tree HEAD establishes cache-tree' '
>  
>  test_expect_success 'git-add invalidates cache-tree' '
>  	test_when_finished "git reset --hard; git read-tree HEAD" &&
> -	echo "I changed this file" > foo &&
> +	echo "I changed this file" >foo &&
>  	git add foo &&
>  	test_invalid_cache_tree
>  '
>  
>  test_expect_success 'update-index invalidates cache-tree' '
>  	test_when_finished "git reset --hard; git read-tree HEAD" &&
> -	echo "I changed this file" > foo &&
> +	echo "I changed this file" >foo &&
>  	git update-index --add foo &&
>  	test_invalid_cache_tree
>  '
> @@ -85,9 +85,22 @@ test_expect_success 'reset --hard without index gives cache-tree' '
>  	test_shallow_cache_tree
>  '
>  
> -test_expect_failure 'checkout gives cache-tree' '
> +test_expect_success 'checkout gives cache-tree' '
> +	git tag current &&
>  	git checkout HEAD^ &&
>  	test_shallow_cache_tree
>  '
>  
> +test_expect_success 'checkout -b gives cache-tree' '
> +	git checkout current &&
> +	git checkout -b prev HEAD^ &&
> +	test_shallow_cache_tree
> +'
> +
> +test_expect_success 'checkout -B gives cache-tree' '
> +	git checkout current &&
> +	git checkout -B prev HEAD^ &&
> +	test_shallow_cache_tree
> +'
> +
>  test_done
> -- 
> 2.0.0.390.gcb682f8

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkou
  2014-08-31 12:07 ` [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkou John Keeping
@ 2014-09-01 20:49   ` David Turner
  2014-09-01 22:13     ` John Keeping
  2014-09-02 20:52   ` Junio C Hamano
  1 sibling, 1 reply; 28+ messages in thread
From: David Turner @ 2014-09-01 20:49 UTC (permalink / raw)
  To: John Keeping; +Cc: git, David Turner

On Sun, 2014-08-31 at 13:07 +0100, John Keeping wrote:
> On Fri, Jul 11, 2014 at 09:44:33PM -0700, David Turner wrote:
> > When git checkout checks out a branch, create or update the
> > cache-tree so that subsequent operations are faster.
> > 
> > update_main_cache_tree learned a new flag, WRITE_TREE_REPAIR.  When
> > WRITE_TREE_REPAIR is set, portions of the cache-tree which do not
> > correspond to existing tree objects are invalidated (and portions which
> > do are marked as valid).  No new tree objects are created.
> > 
> > Signed-off-by: David Turner <dturner@twitter.com>
> > ---
> 
> This causes an incorrect error message to be printed when switching
> branches with staged changes in a subdirectory.  The test case is pretty
> simple:
<snip>

I tried to reproduce this problem, but I could not.  I tried to
reproduce against just this patch 1/4, and also against all 4 of the
patches.  Can you reproduce this on Junio's 'next' branch, which
includes this series of patches?

Do you have some sort of unusual configuration that might cause
different results? 

Thanks.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkou
  2014-09-01 20:49   ` David Turner
@ 2014-09-01 22:13     ` John Keeping
  0 siblings, 0 replies; 28+ messages in thread
From: John Keeping @ 2014-09-01 22:13 UTC (permalink / raw)
  To: David Turner; +Cc: git, David Turner

On Mon, Sep 01, 2014 at 04:49:28PM -0400, David Turner wrote:
> On Sun, 2014-08-31 at 13:07 +0100, John Keeping wrote:
> > On Fri, Jul 11, 2014 at 09:44:33PM -0700, David Turner wrote:
> > > When git checkout checks out a branch, create or update the
> > > cache-tree so that subsequent operations are faster.
> > > 
> > > update_main_cache_tree learned a new flag, WRITE_TREE_REPAIR.  When
> > > WRITE_TREE_REPAIR is set, portions of the cache-tree which do not
> > > correspond to existing tree objects are invalidated (and portions which
> > > do are marked as valid).  No new tree objects are created.
> > > 
> > > Signed-off-by: David Turner <dturner@twitter.com>
> > > ---
> > 
> > This causes an incorrect error message to be printed when switching
> > branches with staged changes in a subdirectory.  The test case is pretty
> > simple:
> <snip>
> 
> I tried to reproduce this problem, but I could not.  I tried to
> reproduce against just this patch 1/4, and also against all 4 of the
> patches.  Can you reproduce this on Junio's 'next' branch, which
> includes this series of patches?
> 
> Do you have some sort of unusual configuration that might cause
> different results? 

I don't think so.  I wondered if "status.branch=true" in the config
could do it, but there's no difference turning that off.  The rest of
the config looks like this:

-- >8 --
core.excludesfile=~/.gitexcludes
push.default=upstream
color.ui=auto
[snip alias.*]
[snip user.*]
[snip url.*.insteadof]
[snip sendemail.*]
rebase.autosquash=true
mailinfo.scissors=true
format.thread=true
rerere.enabled=true
status.branch=true
pull.ff=true
credential.helper=cache
diff.tool=vimdiff
merge.tool=vimdiff
core.repositoryformatversion=0
core.filemode=true
core.bare=false
core.logallrefupdates=true
-- 8< --

The following script (run from the Git source directory) bisects down
to:

	aecf567cbfb6ab46e82f7f5df36fb6a2dd5bee69 is the first bad commit
	commit aecf567cbfb6ab46e82f7f5df36fb6a2dd5bee69
	Author: David Turner <dturner@twopensource.com>
	Date:   Sat Jul 5 21:06:56 2014 -0700

	    cache-tree: create/update cache-tree on checkout

-- >8 --
#!/bin/sh
git init test &&
(
	cd test &&
	mkdir sub &&
	echo one >sub/one &&
	git add sub/one &&
	git commit -m one &&
	echo two >sub/two &&
	git add sub/two &&
	git branch other
) &&
cat >BISECT.sh <<\EOF &&
#!/bin/sh
make -j4 &&
cd test &&
../bin-wrappers/git checkout other 2>&1 | grep 'error:'
result=$?
git checkout master
if test $result = 0
then
	exit 1
else
	exit 0
fi
EOF
chmod +x BISECT.sh &&
git bisect start origin/next origin/master &&
git bisect run ./BISECT.sh

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkou
  2014-08-31 12:07 ` [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkou John Keeping
  2014-09-01 20:49   ` David Turner
@ 2014-09-02 20:52   ` Junio C Hamano
  2014-09-02 21:10     ` Junio C Hamano
  1 sibling, 1 reply; 28+ messages in thread
From: Junio C Hamano @ 2014-09-02 20:52 UTC (permalink / raw)
  To: John Keeping; +Cc: David Turner, git, David Turner

John Keeping <john@keeping.me.uk> writes:

> On Fri, Jul 11, 2014 at 09:44:33PM -0700, David Turner wrote:
>> When git checkout checks out a branch, create or update the
>> cache-tree so that subsequent operations are faster.
>> 
>> update_main_cache_tree learned a new flag, WRITE_TREE_REPAIR.  When
>> WRITE_TREE_REPAIR is set, portions of the cache-tree which do not
>> correspond to existing tree objects are invalidated (and portions which
>> do are marked as valid).  No new tree objects are created.
>> 
>> Signed-off-by: David Turner <dturner@twitter.com>
>> ---
>
> This causes an incorrect error message to be printed when switching
> branches with staged changes in a subdirectory.  The test case is pretty
> simple:
>
> 	git init test &&
> 	cd test &&
> 	mkdir sub &&
> 	echo one >sub/one &&
> 	git add sub/one &&
> 	git commit -m one &&
> 	echo two >sub/two &&
> 	git add sub/two &&
> 	git checkout -b test
>
> After this commit the output is:
>
> 	error: invalid object 040000 0000000000000000000000000000000000000000 for 'bar'
> 	A       bar/quux
> 	Switched to branch 'test'
>
> but the "error:" line should not be there.

Yeah, this seems to be broken and I am unhappy that I didn't notice
it myself as I always use a version that is somewhat ahead of 'next'
myself.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkou
  2014-09-02 20:52   ` Junio C Hamano
@ 2014-09-02 21:10     ` Junio C Hamano
  2014-09-02 21:24       ` [PATCH] cache-tree: propagate invalidation up when punting Junio C Hamano
  0 siblings, 1 reply; 28+ messages in thread
From: Junio C Hamano @ 2014-09-02 21:10 UTC (permalink / raw)
  To: John Keeping; +Cc: David Turner, git, David Turner

Junio C Hamano <gitster@pobox.com> writes:

> John Keeping <john@keeping.me.uk> writes:
>
>> On Fri, Jul 11, 2014 at 09:44:33PM -0700, David Turner wrote:
>>> When git checkout checks out a branch, create or update the
>>> cache-tree so that subsequent operations are faster.
>>> 
>>> update_main_cache_tree learned a new flag, WRITE_TREE_REPAIR.  When
>>> WRITE_TREE_REPAIR is set, portions of the cache-tree which do not
>>> correspond to existing tree objects are invalidated (and portions which
>>> do are marked as valid).  No new tree objects are created.
>>> 
>>> Signed-off-by: David Turner <dturner@twitter.com>
>>> ---
>>
>> This causes an incorrect error message to be printed when switching
>> branches with staged changes in a subdirectory.  The test case is pretty
>> simple:
>>
>> 	git init test &&
>> 	cd test &&
>> 	mkdir sub &&
>> 	echo one >sub/one &&
>> 	git add sub/one &&
>> 	git commit -m one &&
>> 	echo two >sub/two &&
>> 	git add sub/two &&
>> 	git checkout -b test
>>
>> After this commit the output is:
>>
>> 	error: invalid object 040000 0000000000000000000000000000000000000000 for 'bar'
>> 	A       bar/quux
>> 	Switched to branch 'test'
>>
>> but the "error:" line should not be there.
>
> Yeah, this seems to be broken and I am unhappy that I didn't notice
> it myself as I always use a version that is somewhat ahead of 'next'
> myself.

Perhaps like this, to make sure that we do not throw a garbage
object name into the cache tree when we avoid creating an unwanted
tree object?

All the tests added by the series seems to pass, so I am assuming
that this will not break the "repair" codepath when it should kick
in.

We may want to add your test to t0090 as well.

 cache-tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cache-tree.c b/cache-tree.c
index f951d7d..e3baf42 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -398,7 +398,7 @@ static int update_one(struct cache_tree *it,
 		it->entry_count, it->subtree_nr,
 		sha1_to_hex(it->sha1));
 #endif
-	return i;
+	return to_invalidate ? -1 : i;
 }
 
 int cache_tree_update(struct cache_tree *it,

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH] cache-tree: propagate invalidation up when punting
  2014-09-02 21:10     ` Junio C Hamano
@ 2014-09-02 21:24       ` Junio C Hamano
  2014-09-02 22:39         ` [PATCH v2] cache-tree: do not try to use an invalidated subtree info to build a tree Junio C Hamano
  0 siblings, 1 reply; 28+ messages in thread
From: Junio C Hamano @ 2014-09-02 21:24 UTC (permalink / raw)
  To: git; +Cc: John Keeping, David Turner, David Turner

We punt from "repair"ing the hash tree during a branch switching if
it involves having to create a new tree object that does not yet
exist in the object store.  "mkdir dir && >dir/file && git add dir"
followed by "git checkout" is one example, when a tree that records
the state of such "dir/" is not in the object store.

However, after discovering that we do not have a tree object that
records the state of "dir/", we failed to propagate the fact up the
callchain to stop the code to attempt populating the level that has
"dir/" as its immediate subdirectory.  This led the caller detect
and report a non-existent error.

Reported-by: John Keeping <john@keeping.me.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache-tree.c          | 2 +-
 t/t0090-cache-tree.sh | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/cache-tree.c b/cache-tree.c
index f951d7d..e3baf42 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -398,7 +398,7 @@ static int update_one(struct cache_tree *it,
 		it->entry_count, it->subtree_nr,
 		sha1_to_hex(it->sha1));
 #endif
-	return i;
+	return to_invalidate ? -1 : i;
 }
 
 int cache_tree_update(struct cache_tree *it,
diff --git a/t/t0090-cache-tree.sh b/t/t0090-cache-tree.sh
index 48c4240..f9648a8 100755
--- a/t/t0090-cache-tree.sh
+++ b/t/t0090-cache-tree.sh
@@ -210,4 +210,12 @@ test_expect_success 'partial commit gives cache-tree' '
 	test_cache_tree
 '
 
+test_expect_success 'no phantom error when switching trees' '
+	mkdir newdir &&
+	>newdir/one &&
+	git add newdir/one &&
+	git checkout 2>errors &&
+	! test -s errors
+'
+
 test_done
-- 
2.1.0-391-g57244f3

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v2] cache-tree: do not try to use an invalidated subtree info to build a tree
  2014-09-02 21:24       ` [PATCH] cache-tree: propagate invalidation up when punting Junio C Hamano
@ 2014-09-02 22:39         ` Junio C Hamano
  2014-09-03  2:56           ` David Turner
  2014-09-03 12:02           ` Eric Sunshine
  0 siblings, 2 replies; 28+ messages in thread
From: Junio C Hamano @ 2014-09-02 22:39 UTC (permalink / raw)
  To: git; +Cc: John Keeping, David Turner, David Turner

We punt from repairing the cache-tree during a branch switching if
it involves having to create a new tree object that does not yet
exist in the object store.  "mkdir dir && >dir/file && git add dir"
followed by "git checkout" is one example, when a tree that records
the state of such "dir/" is not in the object store.

However, after discovering that we do not have a tree object that
records the state of "dir/", the caller failed to remember the fact
that it noticed the cache-tree entry it received for "dir/" is
invalidated, it already knows it should not be populating the level
callchain to stop the code to attempt populating the level that has
"dir/" as its immediate subdirectory, and it is not an error at all
for the sublevel cache-tree entry gave it a bogus object name it
shouldn't even look at.

This led the caller detect and report a non-existent error.  The end
result was the same and we avoided stuffing a non-existent tree to
the cache-tree, but we shouldn't have issued an alarming error
message to the user.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---

 * Second try.  The level that has intent-to-add entries needs to be
   kept invalidated but the level above it needs to treat as if the
   i-t-a entries do not exist and build the whole tree; a directory
   that does not yet have corresponding tree object while repairing
   the cache-tree needs to invalidate itself *and* propagate the
   (in)validity upwards.  They have to be treated differently but
   the first attempt failed to do so.

 cache-tree.c          | 7 ++++++-
 t/t0090-cache-tree.sh | 8 ++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/cache-tree.c b/cache-tree.c
index f951d7d..57597ac 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -316,6 +316,7 @@ static int update_one(struct cache_tree *it,
 		int pathlen, entlen;
 		const unsigned char *sha1;
 		unsigned mode;
+		int expected_missing = 0;
 
 		path = ce->name;
 		pathlen = ce_namelen(ce);
@@ -332,8 +333,10 @@ static int update_one(struct cache_tree *it,
 			i += sub->count;
 			sha1 = sub->cache_tree->sha1;
 			mode = S_IFDIR;
-			if (sub->cache_tree->entry_count < 0)
+			if (sub->cache_tree->entry_count < 0) {
 				to_invalidate = 1;
+				expected_missing = 1;
+			}
 		}
 		else {
 			sha1 = ce->sha1;
@@ -343,6 +346,8 @@ static int update_one(struct cache_tree *it,
 		}
 		if (mode != S_IFGITLINK && !missing_ok && !has_sha1_file(sha1)) {
 			strbuf_release(&buffer);
+			if (expected_missing)
+				return -1;
 			return error("invalid object %06o %s for '%.*s'",
 				mode, sha1_to_hex(sha1), entlen+baselen, path);
 		}
diff --git a/t/t0090-cache-tree.sh b/t/t0090-cache-tree.sh
index 48c4240..f9648a8 100755
--- a/t/t0090-cache-tree.sh
+++ b/t/t0090-cache-tree.sh
@@ -210,4 +210,12 @@ test_expect_success 'partial commit gives cache-tree' '
 	test_cache_tree
 '
 
+test_expect_success 'no phantom error when switching trees' '
+	mkdir newdir &&
+	>newdir/one &&
+	git add newdir/one &&
+	git checkout 2>errors &&
+	! test -s errors
+'
+
 test_done
-- 
2.1.0-391-g57244f3

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] cache-tree: do not try to use an invalidated subtree info to build a tree
  2014-09-02 22:39         ` [PATCH v2] cache-tree: do not try to use an invalidated subtree info to build a tree Junio C Hamano
@ 2014-09-03  2:56           ` David Turner
  2014-09-03 12:02           ` Eric Sunshine
  1 sibling, 0 replies; 28+ messages in thread
From: David Turner @ 2014-09-03  2:56 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, John Keeping, David Turner

On Tue, 2014-09-02 at 15:39 -0700, Junio C Hamano wrote:
> We punt from repairing the cache-tree during a branch switching if
> it involves having to create a new tree object that does not yet
> exist in the object store.  "mkdir dir && >dir/file && git add dir"
> followed by "git checkout" is one example, when a tree that records
> the state of such "dir/" is not in the object store.
> 
> However, after discovering that we do not have a tree object that
> records the state of "dir/", the caller failed to remember the fact
> that it noticed the cache-tree entry it received for "dir/" is
> invalidated, it already knows it should not be populating the level
> callchain to stop the code to attempt populating the level that has
> "dir/" as its immediate subdirectory, and it is not an error at all
> for the sublevel cache-tree entry gave it a bogus object name it
> shouldn't even look at.
> 
> This led the caller detect and report a non-existent error.  The end
> result was the same and we avoided stuffing a non-existent tree to
> the cache-tree, but we shouldn't have issued an alarming error
> message to the user.
> 
> Signed-off-by: Junio C Hamano <gitster@pobox.com>
> ---
> 
>  * Second try.  The level that has intent-to-add entries needs to be
>    kept invalidated but the level above it needs to treat as if the
>    i-t-a entries do not exist and build the whole tree; a directory
>    that does not yet have corresponding tree object while repairing
>    the cache-tree needs to invalidate itself *and* propagate the
>    (in)validity upwards.  They have to be treated differently but
>    the first attempt failed to do so.
> 
>  cache-tree.c          | 7 ++++++-
>  t/t0090-cache-tree.sh | 8 ++++++++
>  2 files changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/cache-tree.c b/cache-tree.c
> index f951d7d..57597ac 100644
> --- a/cache-tree.c
> +++ b/cache-tree.c
> @@ -316,6 +316,7 @@ static int update_one(struct cache_tree *it,
>  		int pathlen, entlen;
>  		const unsigned char *sha1;
>  		unsigned mode;
> +		int expected_missing = 0;
>  
>  		path = ce->name;
>  		pathlen = ce_namelen(ce);
> @@ -332,8 +333,10 @@ static int update_one(struct cache_tree *it,
>  			i += sub->count;
>  			sha1 = sub->cache_tree->sha1;
>  			mode = S_IFDIR;
> -			if (sub->cache_tree->entry_count < 0)
> +			if (sub->cache_tree->entry_count < 0) {
>  				to_invalidate = 1;
> +				expected_missing = 1;
> +			}
>  		}
>  		else {
>  			sha1 = ce->sha1;
> @@ -343,6 +346,8 @@ static int update_one(struct cache_tree *it,
>  		}
>  		if (mode != S_IFGITLINK && !missing_ok && !has_sha1_file(sha1)) {
>  			strbuf_release(&buffer);
> +			if (expected_missing)
> +				return -1;
>  			return error("invalid object %06o %s for '%.*s'",
>  				mode, sha1_to_hex(sha1), entlen+baselen, path);
>  		}
> diff --git a/t/t0090-cache-tree.sh b/t/t0090-cache-tree.sh
> index 48c4240..f9648a8 100755
> --- a/t/t0090-cache-tree.sh
> +++ b/t/t0090-cache-tree.sh
> @@ -210,4 +210,12 @@ test_expect_success 'partial commit gives cache-tree' '
>  	test_cache_tree
>  '
>  
> +test_expect_success 'no phantom error when switching trees' '
> +	mkdir newdir &&
> +	>newdir/one &&
> +	git add newdir/one &&
> +	git checkout 2>errors &&
> +	! test -s errors
> +'
> +
>  test_done

LGTM.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] cache-tree: do not try to use an invalidated subtree info to build a tree
  2014-09-02 22:39         ` [PATCH v2] cache-tree: do not try to use an invalidated subtree info to build a tree Junio C Hamano
  2014-09-03  2:56           ` David Turner
@ 2014-09-03 12:02           ` Eric Sunshine
  1 sibling, 0 replies; 28+ messages in thread
From: Eric Sunshine @ 2014-09-03 12:02 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git List, John Keeping, David Turner, David Turner

On Tue, Sep 2, 2014 at 6:39 PM, Junio C Hamano <gitster@pobox.com> wrote:
> We punt from repairing the cache-tree during a branch switching if
> it involves having to create a new tree object that does not yet
> exist in the object store.  "mkdir dir && >dir/file && git add dir"
> followed by "git checkout" is one example, when a tree that records
> the state of such "dir/" is not in the object store.
>
> However, after discovering that we do not have a tree object that
> records the state of "dir/", the caller failed to remember the fact
> that it noticed the cache-tree entry it received for "dir/" is
> invalidated, it already knows it should not be populating the level
> callchain to stop the code to attempt populating the level that has
> "dir/" as its immediate subdirectory, and it is not an error at all
> for the sublevel cache-tree entry gave it a bogus object name it
> shouldn't even look at.
>
> This led the caller detect and report a non-existent error.  The end

s/caller/caller to/

> result was the same and we avoided stuffing a non-existent tree to
> the cache-tree, but we shouldn't have issued an alarming error
> message to the user.
>
> Signed-off-by: Junio C Hamano <gitster@pobox.com>

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2014-09-03 12:03 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-12  4:44 [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkout David Turner
2014-07-12  4:44 ` [PATCH v8 2/4] test-dump-cache-tree: invalid trees are not errors David Turner
2014-07-12  4:44 ` [PATCH v8 3/4] cache-tree: subdirectory tests David Turner
2014-07-12  4:44 ` [PATCH v8 4/4] cache-tree: Write updated cache-tree after commit David Turner
2014-07-13  5:09   ` Duy Nguyen
2014-07-14 15:54     ` Junio C Hamano
2014-07-14 17:33       ` Ramsay Jones
2014-07-14 17:51         ` Junio C Hamano
2014-07-14 18:41           ` Ramsay Jones
2014-07-14 22:16             ` Junio C Hamano
2014-07-14 22:32               ` David Turner
2014-07-15  2:15               ` Duy Nguyen
2014-07-15  6:38                 ` Junio C Hamano
2014-07-15 10:23                   ` Duy Nguyen
2014-07-15 16:45                     ` Junio C Hamano
2014-07-16 10:18                       ` Duy Nguyen
2014-07-16 17:33                         ` Junio C Hamano
2014-07-14 17:43       ` Junio C Hamano
2014-07-14 20:19         ` [PATCH v2] lockfile: allow reopening a closed but still locked file Junio C Hamano
2014-08-31 12:07 ` [PATCH v8 1/4] cache-tree: Create/update cache-tree on checkou John Keeping
2014-09-01 20:49   ` David Turner
2014-09-01 22:13     ` John Keeping
2014-09-02 20:52   ` Junio C Hamano
2014-09-02 21:10     ` Junio C Hamano
2014-09-02 21:24       ` [PATCH] cache-tree: propagate invalidation up when punting Junio C Hamano
2014-09-02 22:39         ` [PATCH v2] cache-tree: do not try to use an invalidated subtree info to build a tree Junio C Hamano
2014-09-03  2:56           ` David Turner
2014-09-03 12:02           ` Eric Sunshine

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.