All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 1/2] tree-walk: be more specific about corrupt tree errors
@ 2016-09-27 20:59 David Turner
  2016-09-27 20:59 ` [PATCH v4 2/2] fsck: handle bad trees like other errors David Turner
  2016-09-27 21:45 ` [PATCH v4 1/2] tree-walk: be more specific about corrupt tree errors Junio C Hamano
  0 siblings, 2 replies; 3+ messages in thread
From: David Turner @ 2016-09-27 20:59 UTC (permalink / raw)
  To: git, peff; +Cc: David Turner

From: Jeff King <peff@peff.net>

When the tree-walker runs into an error, it just calls
die(), and the message is always "corrupt tree file".
However, we are actually covering several cases here; let's
give the user a hint about what happened.

Let's also avoid using the word "corrupt", which makes it
seem like the data bit-rotted on disk. Our sha1 check would
already have found that. These errors are ones of data that
is malformed in the first place.

Signed-off-by: David Turner <dturner@twosigma.com>
Signed-off-by: Jeff King <peff@peff.net>
---
 t/t1007-hash-object.sh | 25 +++++++++++++++++++++++--
 tree-walk.c            | 12 +++++++-----
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh
index acca9ac..c5245c5 100755
--- a/t/t1007-hash-object.sh
+++ b/t/t1007-hash-object.sh
@@ -183,9 +183,30 @@ for args in "-w --stdin-paths" "--stdin-paths -w"; do
 	pop_repo
 done
 
-test_expect_success 'corrupt tree' '
+test_expect_success 'too-short tree' '
 	echo abc >malformed-tree &&
-	test_must_fail git hash-object -t tree malformed-tree
+	test_must_fail git hash-object -t tree malformed-tree 2>err &&
+	test_i18ngrep "too-short tree object" err
+'
+
+hex2oct() {
+    perl -ne 'printf "\\%03o", hex for /../g'
+}
+
+test_expect_success 'malformed mode in tree' '
+	hex_sha1=$(echo foo | git hash-object --stdin -w) &&
+	bin_sha1=$(echo $hex_sha1 | hex2oct) &&
+	printf "9100644 \0$bin_sha1" >tree-with-malformed-mode &&
+	test_must_fail git hash-object -t tree tree-with-malformed-mode 2>err &&
+	test_i18ngrep "malformed mode in tree entry" err
+'
+
+test_expect_success 'empty filename in tree' '
+	hex_sha1=$(echo foo | git hash-object --stdin -w) &&
+	bin_sha1=$(echo $hex_sha1 | hex2oct) &&
+	printf "100644 \0$bin_sha1" >tree-with-empty-filename &&
+	test_must_fail git hash-object -t tree tree-with-empty-filename 2>err &&
+	test_i18ngrep "empty filename in tree entry" err
 '
 
 test_expect_success 'corrupt commit' '
diff --git a/tree-walk.c b/tree-walk.c
index ce27842..24f9a0f 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -27,12 +27,14 @@ static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned
 	const char *path;
 	unsigned int mode, len;
 
-	if (size < 24 || buf[size - 21])
-		die("corrupt tree file");
+	if (size < 23 || buf[size - 21])
+		die(_("too-short tree object"));
 
 	path = get_mode(buf, &mode);
-	if (!path || !*path)
-		die("corrupt tree file");
+	if (!path)
+		die(_("malformed mode in tree entry for tree"));
+	if (!*path)
+		die(_("empty filename in tree entry for tree"));
 	len = strlen(path) + 1;
 
 	/* Initialize the descriptor entry */
@@ -81,7 +83,7 @@ void update_tree_entry(struct tree_desc *desc)
 	unsigned long len = end - (const unsigned char *)buf;
 
 	if (size < len)
-		die("corrupt tree file");
+		die(_("too-short tree file"));
 	buf = end;
 	size -= len;
 	desc->buffer = buf;
-- 
2.8.0.rc4.22.g8ae061a


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v4 2/2] fsck: handle bad trees like other errors
  2016-09-27 20:59 [PATCH v4 1/2] tree-walk: be more specific about corrupt tree errors David Turner
@ 2016-09-27 20:59 ` David Turner
  2016-09-27 21:45 ` [PATCH v4 1/2] tree-walk: be more specific about corrupt tree errors Junio C Hamano
  1 sibling, 0 replies; 3+ messages in thread
From: David Turner @ 2016-09-27 20:59 UTC (permalink / raw)
  To: git, peff; +Cc: David Turner

Instead of dying when fsck hits a malformed tree object, log the error
like any other and continue.  Now fsck can tell the user which tree is
bad, too.

Signed-off-by: David Turner <dturner@twosigma.com>
---
 fsck.c          | 18 ++++++++-----
 t/t1450-fsck.sh | 16 +++++++++--
 tree-walk.c     | 83 +++++++++++++++++++++++++++++++++++++++++++++++++--------
 tree-walk.h     |  8 ++++++
 4 files changed, 106 insertions(+), 19 deletions(-)

diff --git a/fsck.c b/fsck.c
index c9cf3de..4a3069e 100644
--- a/fsck.c
+++ b/fsck.c
@@ -347,8 +347,9 @@ static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *op
 		return -1;
 
 	name = get_object_name(options, &tree->object);
-	init_tree_desc(&desc, tree->buffer, tree->size);
-	while (tree_entry(&desc, &entry)) {
+	if (init_tree_desc_gently(&desc, tree->buffer, tree->size))
+		return -1;
+	while (tree_entry_gently(&desc, &entry)) {
 		struct object *obj;
 		int result;
 
@@ -520,7 +521,7 @@ static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, con
 
 static int fsck_tree(struct tree *item, struct fsck_options *options)
 {
-	int retval;
+	int retval = 0;
 	int has_null_sha1 = 0;
 	int has_full_path = 0;
 	int has_empty_name = 0;
@@ -535,7 +536,10 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
 	unsigned o_mode;
 	const char *o_name;
 
-	init_tree_desc(&desc, item->buffer, item->size);
+	if (init_tree_desc_gently(&desc, item->buffer, item->size)) {
+		retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
+		return retval;
+	}
 
 	o_mode = 0;
 	o_name = NULL;
@@ -556,7 +560,10 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
 			       is_hfs_dotgit(name) ||
 			       is_ntfs_dotgit(name));
 		has_zero_pad |= *(char *)desc.buffer == '0';
-		update_tree_entry(&desc);
+		if (update_tree_entry_gently(&desc)) {
+			retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
+			break;
+		}
 
 		switch (mode) {
 		/*
@@ -597,7 +604,6 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
 		o_name = name;
 	}
 
-	retval = 0;
 	if (has_null_sha1)
 		retval += report(options, &item->object, FSCK_MSG_NULL_SHA1, "contains entries pointing to null sha1");
 	if (has_full_path)
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index 8f52da2..ee7d473 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -188,8 +188,7 @@ test_expect_success 'commit with NUL in header' '
 	grep "error in commit $new.*unterminated header: NUL at offset" out
 '
 
-test_expect_success 'malformatted tree object' '
-	test_when_finished "git update-ref -d refs/tags/wrong" &&
+test_expect_success 'tree object with duplicate entries' '
 	test_when_finished "remove_object \$T" &&
 	T=$(
 		GIT_INDEX_FILE=test-index &&
@@ -208,6 +207,19 @@ test_expect_success 'malformatted tree object' '
 	grep "error in tree .*contains duplicate file entries" out
 '
 
+test_expect_success 'unparseable tree object' '
+	test_when_finished "git update-ref -d refs/heads/wrong" &&
+	test_when_finished "remove_object \$tree_sha1" &&
+	test_when_finished "remove_object \$commit_sha1" &&
+	tree_sha1=$(printf "100644 \0twenty-bytes-of-junk" | git hash-object -t tree --stdin -w --literally) &&
+	commit_sha1=$(git commit-tree $tree_sha1) &&
+	git update-ref refs/heads/wrong $commit_sha1 &&
+	test_must_fail git fsck 2>out &&
+	test_i18ngrep "error: empty filename in tree entry" out &&
+	test_i18ngrep "$tree_sha1" out &&
+	test_i18ngrep ! "fatal: empty filename in tree entry" out
+'
+
 test_expect_success 'tag pointing to nonexistent' '
 	cat >invalid-tag <<-\EOF &&
 	object ffffffffffffffffffffffffffffffffffffffff
diff --git a/tree-walk.c b/tree-walk.c
index 24f9a0f..828f435 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -22,33 +22,60 @@ static const char *get_mode(const char *str, unsigned int *modep)
 	return str;
 }
 
-static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size)
+static int decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size, struct strbuf *err)
 {
 	const char *path;
 	unsigned int mode, len;
 
-	if (size < 23 || buf[size - 21])
-		die(_("too-short tree object"));
+	if (size < 23 || buf[size - 21]) {
+		strbuf_addstr(err, _("too-short tree object"));
+		return -1;
+	}
 
 	path = get_mode(buf, &mode);
-	if (!path)
-		die(_("malformed mode in tree entry for tree"));
-	if (!*path)
-		die(_("empty filename in tree entry for tree"));
+	if (!path) {
+		strbuf_addstr(err, _("malformed mode in tree entry"));
+		return -1;
+	}
+	if (!*path) {
+		strbuf_addstr(err, _("empty filename in tree entry"));
+		return -1;
+	}
 	len = strlen(path) + 1;
 
 	/* Initialize the descriptor entry */
 	desc->entry.path = path;
 	desc->entry.mode = canon_mode(mode);
 	desc->entry.oid  = (const struct object_id *)(path + len);
+
+	return 0;
 }
 
-void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
+static int init_tree_desc_internal(struct tree_desc *desc, const void *buffer, unsigned long size, struct strbuf *err)
 {
 	desc->buffer = buffer;
 	desc->size = size;
 	if (size)
-		decode_tree_entry(desc, buffer, size);
+		return decode_tree_entry(desc, buffer, size, err);
+	return 0;
+}
+
+void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
+{
+	struct strbuf err = STRBUF_INIT;
+	if (init_tree_desc_internal(desc, buffer, size, &err))
+		die("%s", err.buf);
+	strbuf_release(&err);
+}
+
+int init_tree_desc_gently(struct tree_desc *desc, const void *buffer, unsigned long size)
+{
+	struct strbuf err = STRBUF_INIT;
+	int result = init_tree_desc_internal(desc, buffer, size, &err);
+	if (result)
+		error("%s", err.buf);
+	strbuf_release(&err);
+	return result;
 }
 
 void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1)
@@ -75,7 +102,7 @@ static void entry_extract(struct tree_desc *t, struct name_entry *a)
 	*a = t->entry;
 }
 
-void update_tree_entry(struct tree_desc *desc)
+static int update_tree_entry_internal(struct tree_desc *desc, struct strbuf *err)
 {
 	const void *buf = desc->buffer;
 	const unsigned char *end = desc->entry.oid->hash + 20;
@@ -89,7 +116,30 @@ void update_tree_entry(struct tree_desc *desc)
 	desc->buffer = buf;
 	desc->size = size;
 	if (size)
-		decode_tree_entry(desc, buf, size);
+		return decode_tree_entry(desc, buf, size, err);
+	return 0;
+}
+
+void update_tree_entry(struct tree_desc *desc)
+{
+	struct strbuf err = STRBUF_INIT;
+	if (update_tree_entry_internal(desc, &err))
+		die("%s", err.buf);
+	strbuf_release(&err);
+}
+
+int update_tree_entry_gently(struct tree_desc *desc)
+{
+	struct strbuf err = STRBUF_INIT;
+	if (update_tree_entry_internal(desc, &err)) {
+		error("%s", err.buf);
+		strbuf_release(&err);
+		/* Stop processing this tree after error */
+		desc->size = 0;
+		return -1;
+	}
+	strbuf_release(&err);
+	return 0;
 }
 
 int tree_entry(struct tree_desc *desc, struct name_entry *entry)
@@ -102,6 +152,17 @@ int tree_entry(struct tree_desc *desc, struct name_entry *entry)
 	return 1;
 }
 
+int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry)
+{
+	if (!desc->size)
+		return 0;
+
+	*entry = desc->entry;
+	if (update_tree_entry_gently(desc))
+		return 0;
+	return 1;
+}
+
 void setup_traverse_info(struct traverse_info *info, const char *base)
 {
 	int pathlen = strlen(base);
diff --git a/tree-walk.h b/tree-walk.h
index 97a7d69..68bb78b 100644
--- a/tree-walk.h
+++ b/tree-walk.h
@@ -25,14 +25,22 @@ static inline int tree_entry_len(const struct name_entry *ne)
 	return (const char *)ne->oid - ne->path - 1;
 }
 
+/*
+ * The _gently versions of these functions warn and return false on a
+ * corrupt tree entry rather than dying,
+ */
+
 void update_tree_entry(struct tree_desc *);
+int update_tree_entry_gently(struct tree_desc *);
 void init_tree_desc(struct tree_desc *desc, const void *buf, unsigned long size);
+int init_tree_desc_gently(struct tree_desc *desc, const void *buf, unsigned long size);
 
 /*
  * Helper function that does both tree_entry_extract() and update_tree_entry()
  * and returns true for success
  */
 int tree_entry(struct tree_desc *, struct name_entry *);
+int tree_entry_gently(struct tree_desc *, struct name_entry *);
 
 void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1);
 
-- 
2.8.0.rc4.22.g8ae061a


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v4 1/2] tree-walk: be more specific about corrupt tree errors
  2016-09-27 20:59 [PATCH v4 1/2] tree-walk: be more specific about corrupt tree errors David Turner
  2016-09-27 20:59 ` [PATCH v4 2/2] fsck: handle bad trees like other errors David Turner
@ 2016-09-27 21:45 ` Junio C Hamano
  1 sibling, 0 replies; 3+ messages in thread
From: Junio C Hamano @ 2016-09-27 21:45 UTC (permalink / raw)
  To: David Turner; +Cc: git, peff

Thanks, will queue both.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-09-27 21:45 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-27 20:59 [PATCH v4 1/2] tree-walk: be more specific about corrupt tree errors David Turner
2016-09-27 20:59 ` [PATCH v4 2/2] fsck: handle bad trees like other errors David Turner
2016-09-27 21:45 ` [PATCH v4 1/2] tree-walk: be more specific about corrupt tree errors Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.