All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/3] tree-walk: be more specific about corrupt tree errors
@ 2016-09-27 15:23 David Turner
  2016-09-27 15:23 ` [PATCH v3 2/3] fsck: handle bad trees like other errors David Turner
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: David Turner @ 2016-09-27 15:23 UTC (permalink / raw)
  To: git, peff; +Cc: David Turner

From: Jeff King <peff@peff.net>

When the tree-walker runs into an error, it just calls
die(), and the message is always "corrupt tree file".
However, we are actually covering several cases here; let's
give the user a hint about what happened.

Let's also avoid using the word "corrupt", which makes it
seem like the data bit-rotted on disk. Our sha1 check would
already have found that. These errors are ones of data that
is malformed in the first place.

Signed-off-by: David Turner <dturner@twosigma.com>
Signed-off-by: Jeff King <peff@peff.net>
---
 t/t1007-hash-object.sh | 21 +++++++++++++++++++--
 tree-walk.c            | 12 +++++++-----
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh
index acca9ac..0691b88 100755
--- a/t/t1007-hash-object.sh
+++ b/t/t1007-hash-object.sh
@@ -183,9 +183,26 @@ for args in "-w --stdin-paths" "--stdin-paths -w"; do
 	pop_repo
 done
 
-test_expect_success 'corrupt tree' '
+test_expect_success 'too-short tree' '
 	echo abc >malformed-tree &&
-	test_must_fail git hash-object -t tree malformed-tree
+	test_must_fail git hash-object -t tree malformed-tree 2>err &&
+	test_i18ngrep "too-short tree object" err
+'
+
+test_expect_success 'malformed mode in tree' '
+	hex_sha1=$(echo foo | git hash-object --stdin -w) &&
+	bin_sha1=$(echo $hex_sha1 | perl -ne "printf \"\\\\%03o\", ord for /../g") &&
+	printf "9100644 \0$bin_sha1" >tree-with-malformed-mode &&
+	test_must_fail git hash-object -t tree tree-with-malformed-mode 2>err &&
+	test_i18ngrep "malformed mode in tree entry" err
+'
+
+test_expect_success 'empty filename in tree' '
+	hex_sha1=$(echo foo | git hash-object --stdin -w) &&
+	bin_sha1=$(echo $hex_sha1 | perl -ne "printf \"\\\\%03o\", ord for /../g") &&
+	printf "100644 \0$bin_sha1" >tree-with-empty-filename &&
+	test_must_fail git hash-object -t tree tree-with-empty-filename 2>err &&
+	test_i18ngrep "empty filename in tree entry" err
 '
 
 test_expect_success 'corrupt commit' '
diff --git a/tree-walk.c b/tree-walk.c
index ce27842..24f9a0f 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -27,12 +27,14 @@ static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned
 	const char *path;
 	unsigned int mode, len;
 
-	if (size < 24 || buf[size - 21])
-		die("corrupt tree file");
+	if (size < 23 || buf[size - 21])
+		die(_("too-short tree object"));
 
 	path = get_mode(buf, &mode);
-	if (!path || !*path)
-		die("corrupt tree file");
+	if (!path)
+		die(_("malformed mode in tree entry for tree"));
+	if (!*path)
+		die(_("empty filename in tree entry for tree"));
 	len = strlen(path) + 1;
 
 	/* Initialize the descriptor entry */
@@ -81,7 +83,7 @@ void update_tree_entry(struct tree_desc *desc)
 	unsigned long len = end - (const unsigned char *)buf;
 
 	if (size < len)
-		die("corrupt tree file");
+		die(_("too-short tree file"));
 	buf = end;
 	size -= len;
 	desc->buffer = buf;
-- 
2.8.0.rc4.22.g8ae061a


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 2/3] fsck: handle bad trees like other errors
  2016-09-27 15:23 [PATCH v3 1/3] tree-walk: be more specific about corrupt tree errors David Turner
@ 2016-09-27 15:23 ` David Turner
  2016-09-27 19:03   ` Jeff King
  2016-09-27 15:23 ` [PATCH v3 3/3] add David Turner's Two Sigma address David Turner
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 7+ messages in thread
From: David Turner @ 2016-09-27 15:23 UTC (permalink / raw)
  To: git, peff; +Cc: David Turner

Instead of dying when fsck hits a malformed tree object, log the error
like any other and continue.  Now fsck can tell the user which tree is
bad, too.

Signed-off-by: David Turner <dturner@twosigma.com>
---
 fsck.c          | 18 ++++++++-----
 t/t1450-fsck.sh | 16 +++++++++--
 tree-walk.c     | 83 +++++++++++++++++++++++++++++++++++++++++++++++++--------
 tree-walk.h     |  8 ++++++
 4 files changed, 106 insertions(+), 19 deletions(-)

diff --git a/fsck.c b/fsck.c
index c9cf3de..4a3069e 100644
--- a/fsck.c
+++ b/fsck.c
@@ -347,8 +347,9 @@ static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *op
 		return -1;
 
 	name = get_object_name(options, &tree->object);
-	init_tree_desc(&desc, tree->buffer, tree->size);
-	while (tree_entry(&desc, &entry)) {
+	if (init_tree_desc_gently(&desc, tree->buffer, tree->size))
+		return -1;
+	while (tree_entry_gently(&desc, &entry)) {
 		struct object *obj;
 		int result;
 
@@ -520,7 +521,7 @@ static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, con
 
 static int fsck_tree(struct tree *item, struct fsck_options *options)
 {
-	int retval;
+	int retval = 0;
 	int has_null_sha1 = 0;
 	int has_full_path = 0;
 	int has_empty_name = 0;
@@ -535,7 +536,10 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
 	unsigned o_mode;
 	const char *o_name;
 
-	init_tree_desc(&desc, item->buffer, item->size);
+	if (init_tree_desc_gently(&desc, item->buffer, item->size)) {
+		retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
+		return retval;
+	}
 
 	o_mode = 0;
 	o_name = NULL;
@@ -556,7 +560,10 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
 			       is_hfs_dotgit(name) ||
 			       is_ntfs_dotgit(name));
 		has_zero_pad |= *(char *)desc.buffer == '0';
-		update_tree_entry(&desc);
+		if (update_tree_entry_gently(&desc)) {
+			retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
+			break;
+		}
 
 		switch (mode) {
 		/*
@@ -597,7 +604,6 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
 		o_name = name;
 	}
 
-	retval = 0;
 	if (has_null_sha1)
 		retval += report(options, &item->object, FSCK_MSG_NULL_SHA1, "contains entries pointing to null sha1");
 	if (has_full_path)
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index 8f52da2..0fcd38a 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -188,8 +188,7 @@ test_expect_success 'commit with NUL in header' '
 	grep "error in commit $new.*unterminated header: NUL at offset" out
 '
 
-test_expect_success 'malformatted tree object' '
-	test_when_finished "git update-ref -d refs/tags/wrong" &&
+test_expect_success 'tree object with duplicate entries' '
 	test_when_finished "remove_object \$T" &&
 	T=$(
 		GIT_INDEX_FILE=test-index &&
@@ -208,6 +207,19 @@ test_expect_success 'malformatted tree object' '
 	grep "error in tree .*contains duplicate file entries" out
 '
 
+test_expect_success 'unparseable tree object' '
+	test_when_finished "git update-ref -d refs/heads/wrong" &&
+	test_when_finished "remove_object \$tree_sha1" &&
+	test_when_finished "remove_object \$commit_sha1" &&
+	tree_sha1=$(printf "100644 \0twenty-bytes-of-junk" | git hash-object -t tree --stdin -w --literally) &&
+	commit_sha1=$(git commit-tree $tree_sha1) &&
+	git update-ref refs/heads/wrong $commit_sha1 &&
+	test_must_fail git fsck 2>out &&
+	test_i18ngrep "error: empty filename in tree entry" out &&
+	test_i18ngrep "$tree_sha1" out &&
+	! test_i18ngrep "fatal: empty filename in tree entry" out
+'
+
 test_expect_success 'tag pointing to nonexistent' '
 	cat >invalid-tag <<-\EOF &&
 	object ffffffffffffffffffffffffffffffffffffffff
diff --git a/tree-walk.c b/tree-walk.c
index 24f9a0f..828f435 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -22,33 +22,60 @@ static const char *get_mode(const char *str, unsigned int *modep)
 	return str;
 }
 
-static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size)
+static int decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size, struct strbuf *err)
 {
 	const char *path;
 	unsigned int mode, len;
 
-	if (size < 23 || buf[size - 21])
-		die(_("too-short tree object"));
+	if (size < 23 || buf[size - 21]) {
+		strbuf_addstr(err, _("too-short tree object"));
+		return -1;
+	}
 
 	path = get_mode(buf, &mode);
-	if (!path)
-		die(_("malformed mode in tree entry for tree"));
-	if (!*path)
-		die(_("empty filename in tree entry for tree"));
+	if (!path) {
+		strbuf_addstr(err, _("malformed mode in tree entry"));
+		return -1;
+	}
+	if (!*path) {
+		strbuf_addstr(err, _("empty filename in tree entry"));
+		return -1;
+	}
 	len = strlen(path) + 1;
 
 	/* Initialize the descriptor entry */
 	desc->entry.path = path;
 	desc->entry.mode = canon_mode(mode);
 	desc->entry.oid  = (const struct object_id *)(path + len);
+
+	return 0;
 }
 
-void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
+static int init_tree_desc_internal(struct tree_desc *desc, const void *buffer, unsigned long size, struct strbuf *err)
 {
 	desc->buffer = buffer;
 	desc->size = size;
 	if (size)
-		decode_tree_entry(desc, buffer, size);
+		return decode_tree_entry(desc, buffer, size, err);
+	return 0;
+}
+
+void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
+{
+	struct strbuf err = STRBUF_INIT;
+	if (init_tree_desc_internal(desc, buffer, size, &err))
+		die("%s", err.buf);
+	strbuf_release(&err);
+}
+
+int init_tree_desc_gently(struct tree_desc *desc, const void *buffer, unsigned long size)
+{
+	struct strbuf err = STRBUF_INIT;
+	int result = init_tree_desc_internal(desc, buffer, size, &err);
+	if (result)
+		error("%s", err.buf);
+	strbuf_release(&err);
+	return result;
 }
 
 void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1)
@@ -75,7 +102,7 @@ static void entry_extract(struct tree_desc *t, struct name_entry *a)
 	*a = t->entry;
 }
 
-void update_tree_entry(struct tree_desc *desc)
+static int update_tree_entry_internal(struct tree_desc *desc, struct strbuf *err)
 {
 	const void *buf = desc->buffer;
 	const unsigned char *end = desc->entry.oid->hash + 20;
@@ -89,7 +116,30 @@ void update_tree_entry(struct tree_desc *desc)
 	desc->buffer = buf;
 	desc->size = size;
 	if (size)
-		decode_tree_entry(desc, buf, size);
+		return decode_tree_entry(desc, buf, size, err);
+	return 0;
+}
+
+void update_tree_entry(struct tree_desc *desc)
+{
+	struct strbuf err = STRBUF_INIT;
+	if (update_tree_entry_internal(desc, &err))
+		die("%s", err.buf);
+	strbuf_release(&err);
+}
+
+int update_tree_entry_gently(struct tree_desc *desc)
+{
+	struct strbuf err = STRBUF_INIT;
+	if (update_tree_entry_internal(desc, &err)) {
+		error("%s", err.buf);
+		strbuf_release(&err);
+		/* Stop processing this tree after error */
+		desc->size = 0;
+		return -1;
+	}
+	strbuf_release(&err);
+	return 0;
 }
 
 int tree_entry(struct tree_desc *desc, struct name_entry *entry)
@@ -102,6 +152,17 @@ int tree_entry(struct tree_desc *desc, struct name_entry *entry)
 	return 1;
 }
 
+int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry)
+{
+	if (!desc->size)
+		return 0;
+
+	*entry = desc->entry;
+	if (update_tree_entry_gently(desc))
+		return 0;
+	return 1;
+}
+
 void setup_traverse_info(struct traverse_info *info, const char *base)
 {
 	int pathlen = strlen(base);
diff --git a/tree-walk.h b/tree-walk.h
index 97a7d69..68bb78b 100644
--- a/tree-walk.h
+++ b/tree-walk.h
@@ -25,14 +25,22 @@ static inline int tree_entry_len(const struct name_entry *ne)
 	return (const char *)ne->oid - ne->path - 1;
 }
 
+/*
+ * The _gently versions of these functions warn and return false on a
+ * corrupt tree entry rather than dying,
+ */
+
 void update_tree_entry(struct tree_desc *);
+int update_tree_entry_gently(struct tree_desc *);
 void init_tree_desc(struct tree_desc *desc, const void *buf, unsigned long size);
+int init_tree_desc_gently(struct tree_desc *desc, const void *buf, unsigned long size);
 
 /*
  * Helper function that does both tree_entry_extract() and update_tree_entry()
  * and returns true for success
  */
 int tree_entry(struct tree_desc *, struct name_entry *);
+int tree_entry_gently(struct tree_desc *, struct name_entry *);
 
 void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1);
 
-- 
2.8.0.rc4.22.g8ae061a


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 3/3] add David Turner's Two Sigma address
  2016-09-27 15:23 [PATCH v3 1/3] tree-walk: be more specific about corrupt tree errors David Turner
  2016-09-27 15:23 ` [PATCH v3 2/3] fsck: handle bad trees like other errors David Turner
@ 2016-09-27 15:23 ` David Turner
  2016-09-27 17:03   ` Junio C Hamano
  2016-09-27 16:55 ` [PATCH v3 1/3] tree-walk: be more specific about corrupt tree errors Junio C Hamano
  2016-09-27 18:59 ` Jeff King
  3 siblings, 1 reply; 7+ messages in thread
From: David Turner @ 2016-09-27 15:23 UTC (permalink / raw)
  To: git, peff; +Cc: David Turner

From: David Turner <novalis@novalis.org>

Signed-off-by: David Turner <novalis@novalis.org>
---
 .mailmap | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.mailmap b/.mailmap
index 9441a54..9cc33e9 100644
--- a/.mailmap
+++ b/.mailmap
@@ -48,6 +48,7 @@ David Kågedal <davidk@lysator.liu.se>
 David Reiss <dreiss@facebook.com> <dreiss@dreiss-vmware.(none)>
 David S. Miller <davem@davemloft.net>
 David Turner <novalis@novalis.org> <dturner@twopensource.com>
+David Turner <novalis@novalis.org> <dturner@twosigma.com>
 Deskin Miller <deskinm@umich.edu>
 Dirk Süsserott <newsletter@dirk.my1.cc>
 Eric Blake <eblake@redhat.com> <ebb9@byu.net>
-- 
2.8.0.rc4.22.g8ae061a


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 1/3] tree-walk: be more specific about corrupt tree errors
  2016-09-27 15:23 [PATCH v3 1/3] tree-walk: be more specific about corrupt tree errors David Turner
  2016-09-27 15:23 ` [PATCH v3 2/3] fsck: handle bad trees like other errors David Turner
  2016-09-27 15:23 ` [PATCH v3 3/3] add David Turner's Two Sigma address David Turner
@ 2016-09-27 16:55 ` Junio C Hamano
  2016-09-27 18:59 ` Jeff King
  3 siblings, 0 replies; 7+ messages in thread
From: Junio C Hamano @ 2016-09-27 16:55 UTC (permalink / raw)
  To: David Turner; +Cc: git, peff

David Turner <dturner@twosigma.com> writes:

> From: Jeff King <peff@peff.net>
>
> When the tree-walker runs into an error, it just calls
> die(), and the message is always "corrupt tree file".
> However, we are actually covering several cases here; let's
> give the user a hint about what happened.
>
> Let's also avoid using the word "corrupt", which makes it
> seem like the data bit-rotted on disk. Our sha1 check would
> already have found that. These errors are ones of data that
> is malformed in the first place.
>
> Signed-off-by: David Turner <dturner@twosigma.com>
> Signed-off-by: Jeff King <peff@peff.net>
> ---
>  t/t1007-hash-object.sh | 21 +++++++++++++++++++--
>  tree-walk.c            | 12 +++++++-----
>  2 files changed, 26 insertions(+), 7 deletions(-)

Nice that we now prepare the test data ourselves without shipping as
part of the source.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 3/3] add David Turner's Two Sigma address
  2016-09-27 15:23 ` [PATCH v3 3/3] add David Turner's Two Sigma address David Turner
@ 2016-09-27 17:03   ` Junio C Hamano
  0 siblings, 0 replies; 7+ messages in thread
From: Junio C Hamano @ 2016-09-27 17:03 UTC (permalink / raw)
  To: David Turner; +Cc: git, peff, David Turner

David Turner <dturner@twosigma.com> writes:

> From: David Turner <novalis@novalis.org>
>
> Signed-off-by: David Turner <novalis@novalis.org>
> ---
>  .mailmap | 1 +
>  1 file changed, 1 insertion(+)

Thanks. Queued separately in order to merge to master much earlier
than the tree-fsck topic.

>
> diff --git a/.mailmap b/.mailmap
> index 9441a54..9cc33e9 100644
> --- a/.mailmap
> +++ b/.mailmap
> @@ -48,6 +48,7 @@ David Kågedal <davidk@lysator.liu.se>
>  David Reiss <dreiss@facebook.com> <dreiss@dreiss-vmware.(none)>
>  David S. Miller <davem@davemloft.net>
>  David Turner <novalis@novalis.org> <dturner@twopensource.com>
> +David Turner <novalis@novalis.org> <dturner@twosigma.com>
>  Deskin Miller <deskinm@umich.edu>
>  Dirk Süsserott <newsletter@dirk.my1.cc>
>  Eric Blake <eblake@redhat.com> <ebb9@byu.net>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 1/3] tree-walk: be more specific about corrupt tree errors
  2016-09-27 15:23 [PATCH v3 1/3] tree-walk: be more specific about corrupt tree errors David Turner
                   ` (2 preceding siblings ...)
  2016-09-27 16:55 ` [PATCH v3 1/3] tree-walk: be more specific about corrupt tree errors Junio C Hamano
@ 2016-09-27 18:59 ` Jeff King
  3 siblings, 0 replies; 7+ messages in thread
From: Jeff King @ 2016-09-27 18:59 UTC (permalink / raw)
  To: David Turner; +Cc: git

On Tue, Sep 27, 2016 at 11:23:24AM -0400, David Turner wrote:

> +test_expect_success 'malformed mode in tree' '
> +	hex_sha1=$(echo foo | git hash-object --stdin -w) &&
> +	bin_sha1=$(echo $hex_sha1 | perl -ne "printf \"\\\\%03o\", ord for /../g") &&

Sorry, the perl snippet I posted earlier was totally braindead. It gives
you the octal for the raw bytes, but we really just want to convert the
hex to octal (we could also print the raw bytes from the hex, but I
didn't want to take chances on shells that can't handle NULs in
environment variables).

I also find it helps to define a helper function outside of the test
block to avoid quoting hell. So something like:

  hex2oct () {
	perl -ne 'printf "\\%03o", hex for /../g'
  }

  test_expect_success ... '
	bin_sha1=$(echo $hex_sha1 | hex2oct)
  '

-Peff

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 2/3] fsck: handle bad trees like other errors
  2016-09-27 15:23 ` [PATCH v3 2/3] fsck: handle bad trees like other errors David Turner
@ 2016-09-27 19:03   ` Jeff King
  0 siblings, 0 replies; 7+ messages in thread
From: Jeff King @ 2016-09-27 19:03 UTC (permalink / raw)
  To: David Turner; +Cc: git

On Tue, Sep 27, 2016 at 11:23:25AM -0400, David Turner wrote:

> +test_expect_success 'unparseable tree object' '
> +	test_when_finished "git update-ref -d refs/heads/wrong" &&
> +	test_when_finished "remove_object \$tree_sha1" &&
> +	test_when_finished "remove_object \$commit_sha1" &&
> +	tree_sha1=$(printf "100644 \0twenty-bytes-of-junk" | git hash-object -t tree --stdin -w --literally) &&
> +	commit_sha1=$(git commit-tree $tree_sha1) &&
> +	git update-ref refs/heads/wrong $commit_sha1 &&
> +	test_must_fail git fsck 2>out &&
> +	test_i18ngrep "error: empty filename in tree entry" out &&
> +	test_i18ngrep "$tree_sha1" out &&
> +	! test_i18ngrep "fatal: empty filename in tree entry" out
> +'

Unfortunately this last one needs to be spelled as:

  test_i18ngrep ! "fatal: empty filename in tree entry" out

because the function always pretends to match when GETTEXT_POISON is
set.

Other than the minor test fixups, this all looks good to me.

-Peff

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-09-27 19:03 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-27 15:23 [PATCH v3 1/3] tree-walk: be more specific about corrupt tree errors David Turner
2016-09-27 15:23 ` [PATCH v3 2/3] fsck: handle bad trees like other errors David Turner
2016-09-27 19:03   ` Jeff King
2016-09-27 15:23 ` [PATCH v3 3/3] add David Turner's Two Sigma address David Turner
2016-09-27 17:03   ` Junio C Hamano
2016-09-27 16:55 ` [PATCH v3 1/3] tree-walk: be more specific about corrupt tree errors Junio C Hamano
2016-09-27 18:59 ` Jeff King

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.