All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v7 0/4] cat-file: teach cat-file a '--literally' option
@ 2015-04-04  5:41 karthik nayak
  2015-04-04  5:42 ` [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type Karthik Nayak
                   ` (3 more replies)
  0 siblings, 4 replies; 19+ messages in thread
From: karthik nayak @ 2015-04-04  5:41 UTC (permalink / raw)
  To: Git List; +Cc: Junio C Hamano, Eric Sunshine

Changes made :

* Fixed the problem with packed types, which failed a few tests.
* Misspelled email.

Thanks for the suggestions and comments on version 6.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-04  5:41 [PATCH v7 0/4] cat-file: teach cat-file a '--literally' option karthik nayak
@ 2015-04-04  5:42 ` Karthik Nayak
  2015-04-04 19:34   ` Junio C Hamano
  2015-04-05 18:28   ` Karthik Nayak
  2015-04-04  5:44 ` [PATCH v7 2/4] cat-file: teach cat-file a '--literally' option Karthik Nayak
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 19+ messages in thread
From: Karthik Nayak @ 2015-04-04  5:42 UTC (permalink / raw)
  To: git; +Cc: gitster, sunshine, Karthik Nayak

Update sha1_loose_object_info() to optionally allow it to read
from a loose object file of unknown/bogus type; as the function
usually returns the type of the object it read in the form of enum
for known types, add an optional "typename" field to receive the
name of the type in textual form and a flag to indicate the reading
of a loose object file of unknown/bogus type.

Add parse_sha1_header_extended() which acts as a wrapper around
parse_sha1_header() allowing more information to be obtained.

Add unpack_sha1_header_to_strbuf() to unpack sha1 headers of
unknown/corrupt objects which have a unknown sha1 header size to
a strbuf structure. This was written by Junio C Hamano but tested
by me.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 cache.h     |   2 ++
 sha1_file.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 91 insertions(+), 22 deletions(-)

diff --git a/cache.h b/cache.h
index c47323e..ea6faf0 100644
--- a/cache.h
+++ b/cache.h
@@ -881,6 +881,7 @@ extern int is_ntfs_dotgit(const char *name);
 
 /* object replacement */
 #define LOOKUP_REPLACE_OBJECT 1
+#define LOOKUP_LITERALLY 2
 extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag);
 static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size)
 {
@@ -1349,6 +1350,7 @@ struct object_info {
 	unsigned long *sizep;
 	unsigned long *disk_sizep;
 	unsigned char *delta_base_sha1;
+	struct strbuf *typename;
 
 	/* Response */
 	enum {
diff --git a/sha1_file.c b/sha1_file.c
index 980ce6b..5a1b904 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1564,6 +1564,34 @@ int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long ma
 	return git_inflate(stream, 0);
 }
 
+static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map,
+					unsigned long mapsize,
+					struct strbuf *header)
+{
+	unsigned char buffer[32], *cp;
+	unsigned long bufsiz = sizeof(buffer);
+	int status;
+
+	status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);
+
+	if (status) {
+		strbuf_add(header, buffer, stream->next_out - buffer);
+		return status;
+	}
+
+	do {
+		status = git_inflate(stream, 0);
+		strbuf_add(header, buffer, stream->next_out - buffer);
+		for (cp = buffer; cp < stream->next_out; cp++)
+			if (!*cp)
+				/* Found the NUL at the end of the header */
+				return 0;
+		stream->next_out = buffer;
+		stream->avail_out = bufsiz;
+	} while (status != Z_STREAM_END);
+	return -1;
+}
+
 static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
 {
 	int bytes = strlen(buffer) + 1;
@@ -1614,27 +1642,24 @@ static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long s
  * too permissive for what we want to check. So do an anal
  * object header parse by hand.
  */
-int parse_sha1_header(const char *hdr, unsigned long *sizep)
+int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
+			       unsigned int flags)
 {
-	char type[10];
-	int i;
+	struct strbuf typename = STRBUF_INIT;
 	unsigned long size;
+	int type;
 
 	/*
 	 * The type can be at most ten bytes (including the
 	 * terminating '\0' that we add), and is followed by
 	 * a space.
 	 */
-	i = 0;
 	for (;;) {
 		char c = *hdr++;
 		if (c == ' ')
 			break;
-		type[i++] = c;
-		if (i >= sizeof(type))
-			return -1;
+		strbuf_addch(&typename, c);
 	}
-	type[i] = 0;
 
 	/*
 	 * The length must follow immediately, and be in canonical
@@ -1652,12 +1677,39 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep)
 			size = size * 10 + c;
 		}
 	}
-	*sizep = size;
 
+	type = type_from_string_gently(typename.buf, typename.len, 1);
+	if (oi->sizep)
+		*oi->sizep = size;
+	if (oi->typename)
+		strbuf_addbuf(oi->typename, &typename);
+	strbuf_release(&typename);
+
+	/*
+	 * Set type to 0 if its an unknown object and
+	 * we're obtaining the type using '--literally'
+	 * option.
+	 */
+	if ((flags & LOOKUP_LITERALLY) && (type == -1))
+		type = 0;
+	else if (type == -1)
+		die("invalid object type");
+	if (oi->typep)
+		*oi->typep = type;
 	/*
 	 * The length must be followed by a zero byte
 	 */
-	return *hdr ? -1 : type_from_string(type);
+	return *hdr ? -1 : type;
+}
+
+int parse_sha1_header(const char *hdr, unsigned long *sizep)
+{
+	struct object_info oi;
+
+	oi.sizep = sizep;
+	oi.typename = NULL;
+	oi.typep = NULL;
+	return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
 }
 
 static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
@@ -2522,13 +2574,15 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
 }
 
 static int sha1_loose_object_info(const unsigned char *sha1,
-				  struct object_info *oi)
+				  struct object_info *oi,
+				  int flags)
 {
-	int status;
-	unsigned long mapsize, size;
+	int status = 0;
+	unsigned long mapsize;
 	void *map;
 	git_zstream stream;
 	char hdr[32];
+	struct strbuf hdrbuf = STRBUF_INIT;
 
 	if (oi->delta_base_sha1)
 		hashclr(oi->delta_base_sha1);
@@ -2555,17 +2609,26 @@ static int sha1_loose_object_info(const unsigned char *sha1,
 		return -1;
 	if (oi->disk_sizep)
 		*oi->disk_sizep = mapsize;
-	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
-		status = error("unable to unpack %s header",
-			       sha1_to_hex(sha1));
-	else if ((status = parse_sha1_header(hdr, &size)) < 0)
-		status = error("unable to parse %s header", sha1_to_hex(sha1));
-	else if (oi->sizep)
-		*oi->sizep = size;
+	if ((flags & LOOKUP_LITERALLY)) {
+		if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, &hdrbuf) < 0)
+			status = error("unable to unpack %s header with --literally",
+				       sha1_to_hex(sha1));
+		else if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
+			status = error("unable to parse %s header with --literally",
+				       sha1_to_hex(sha1));
+	} else {
+		if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
+			status = error("unable to unpack %s header",
+				       sha1_to_hex(sha1));
+		else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
+			status = error("unable to parse %s header", sha1_to_hex(sha1));
+	}
 	git_inflate_end(&stream);
 	munmap(map, mapsize);
-	if (oi->typep)
+	if (status && oi->typep)
 		*oi->typep = status;
+	if (hdrbuf.buf)
+		strbuf_release(&hdrbuf);
 	return 0;
 }
 
@@ -2586,13 +2649,15 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 			*(oi->disk_sizep) = 0;
 		if (oi->delta_base_sha1)
 			hashclr(oi->delta_base_sha1);
+		if (oi->typename)
+			strbuf_addstr(oi->typename, typename(co->type));
 		oi->whence = OI_CACHED;
 		return 0;
 	}
 
 	if (!find_pack_entry(real, &e)) {
 		/* Most likely it's a loose object. */
-		if (!sha1_loose_object_info(real, oi)) {
+		if (!sha1_loose_object_info(real, oi, flags)) {
 			oi->whence = OI_LOOSE;
 			return 0;
 		}
@@ -2616,6 +2681,8 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 		oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
 					 rtype == OBJ_OFS_DELTA);
 	}
+	if (oi->typename)
+		strbuf_addstr(oi->typename, typename(*oi->typep));
 
 	return 0;
 }
-- 
2.4.0.rc1.249.g9f2ee54

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v7 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-04  5:41 [PATCH v7 0/4] cat-file: teach cat-file a '--literally' option karthik nayak
  2015-04-04  5:42 ` [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type Karthik Nayak
@ 2015-04-04  5:44 ` Karthik Nayak
  2015-04-07 20:49   ` Eric Sunshine
  2015-04-04  5:44 ` [PATCH v7 3/4] cat-file: add documentation for " Karthik Nayak
  2015-04-04  5:44 ` [PATCH v7 4/4] t1006: add tests for git cat-file --literally Karthik Nayak
  3 siblings, 1 reply; 19+ messages in thread
From: Karthik Nayak @ 2015-04-04  5:44 UTC (permalink / raw)
  To: git; +Cc: gitster, sunshine, Karthik Nayak

Currently 'git cat-file' throws an error while trying to
print the type or size of a broken/corrupt object which is
created using 'git hash-object --literally'. This is
because these objects are usually of unknown types.

Teach git cat-file a '--literally' option where it prints
the type or size of a broken/corrupt object without throwing
an error.

Modify '-t' and '-s' options to call sha1_object_info_extended()
directly to support the '--literally' option.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 builtin/cat-file.c | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index df99df4..91ceae0 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -9,13 +9,20 @@
 #include "userdiff.h"
 #include "streaming.h"
 
-static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
+static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
+			int literally)
 {
 	unsigned char sha1[20];
 	enum object_type type;
 	char *buf;
 	unsigned long size;
 	struct object_context obj_context;
+	struct object_info oi = {NULL};
+	struct strbuf sb = STRBUF_INIT;
+	unsigned flags = LOOKUP_REPLACE_OBJECT;
+
+	if (literally)
+		flags |= LOOKUP_LITERALLY;
 
 	if (get_sha1_with_context(obj_name, 0, sha1, &obj_context))
 		die("Not a valid object name %s", obj_name);
@@ -23,16 +30,24 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 	buf = NULL;
 	switch (opt) {
 	case 't':
-		type = sha1_object_info(sha1, NULL);
-		if (type > 0) {
-			printf("%s\n", typename(type));
+		oi.typep = &type;
+		oi.typename = &sb;
+		if (sha1_object_info_extended(sha1, &oi, flags) < 0)
+			die("git cat-file: could not get object info");
+		if (type >= 0 && sb.len) {
+			printf("%s\n", sb.buf);
+			strbuf_release(&sb);
 			return 0;
 		}
 		break;
 
 	case 's':
-		type = sha1_object_info(sha1, &size);
-		if (type > 0) {
+		oi.typep = &type;
+		oi.typename = &sb;
+		oi.sizep = &size;
+		if (sha1_object_info_extended(sha1, &oi, flags) < 0)
+			die("git cat-file: could not get object info");
+		if (type >= 0 && sb.len) {
 			printf("%lu\n", size);
 			return 0;
 		}
@@ -323,7 +338,7 @@ static int batch_objects(struct batch_options *opt)
 }
 
 static const char * const cat_file_usage[] = {
-	N_("git cat-file (-t | -s | -e | -p | <type> | --textconv) <object>"),
+	N_("git cat-file (-t [--literally]|-s [--literally]|-e|-p|<type>|--textconv) <object>"),
 	N_("git cat-file (--batch | --batch-check) < <list-of-objects>"),
 	NULL
 };
@@ -359,6 +374,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 	int opt = 0;
 	const char *exp_type = NULL, *obj_name = NULL;
 	struct batch_options batch = {0};
+	int literally = 0;
 
 	const struct option options[] = {
 		OPT_GROUP(N_("<type> can be one of: blob, tree, commit, tag")),
@@ -369,6 +385,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
 		OPT_SET_INT(0, "textconv", &opt,
 			    N_("for blob objects, run textconv on object's content"), 'c'),
+		OPT_BOOL( 0, "literally", &literally,
+			  N_("get information about corrupt objects for debugging Git")),
 		{ OPTION_CALLBACK, 0, "batch", &batch, "format",
 			N_("show info and content of objects fed from the standard input"),
 			PARSE_OPT_OPTARG, batch_option_callback },
@@ -380,7 +398,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 
 	git_config(git_cat_file_config, NULL);
 
-	if (argc != 3 && argc != 2)
+	if (argc < 2 || argc > 4)
 		usage_with_options(cat_file_usage, options);
 
 	argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
@@ -405,5 +423,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 	if (batch.enabled)
 		return batch_objects(&batch);
 
-	return cat_one_file(opt, exp_type, obj_name);
+	if (literally && opt != 't' && opt != 's')
+		die("git cat-file --literally: use with -s or -t");
+	return cat_one_file(opt, exp_type, obj_name, literally);
 }
-- 
2.4.0.rc1.249.g9f2ee54

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v7 3/4] cat-file: add documentation for '--literally' option.
  2015-04-04  5:41 [PATCH v7 0/4] cat-file: teach cat-file a '--literally' option karthik nayak
  2015-04-04  5:42 ` [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type Karthik Nayak
  2015-04-04  5:44 ` [PATCH v7 2/4] cat-file: teach cat-file a '--literally' option Karthik Nayak
@ 2015-04-04  5:44 ` Karthik Nayak
  2015-04-07 20:49   ` Eric Sunshine
  2015-04-04  5:44 ` [PATCH v7 4/4] t1006: add tests for git cat-file --literally Karthik Nayak
  3 siblings, 1 reply; 19+ messages in thread
From: Karthik Nayak @ 2015-04-04  5:44 UTC (permalink / raw)
  To: git; +Cc: gitster, sunshine, Karthik Nayak

Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 Documentation/git-cat-file.txt | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index f6a16f4..8bac7bd 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -9,7 +9,7 @@ git-cat-file - Provide content or type and size information for repository objec
 SYNOPSIS
 --------
 [verse]
-'git cat-file' (-t | -s | -e | -p | <type> | --textconv ) <object>
+'git cat-file' (-t [--literally]| -s [--literally]| -e | -p | <type> | --textconv ) <object>
 'git cat-file' (--batch | --batch-check) < <list-of-objects>
 
 DESCRIPTION
@@ -69,6 +69,10 @@ OPTIONS
 	not be combined with any other options or arguments.  See the
 	section `BATCH OUTPUT` below for details.
 
+--literally::
+	Print information of broken/corrupt objects of unknown type without
+	throwing an error. To be used combined with '-s' or '-t' option.
+
 OUTPUT
 ------
 If '-t' is specified, one of the <type>.
-- 
2.4.0.rc1.249.g9f2ee54

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v7 4/4] t1006: add tests for git cat-file --literally
  2015-04-04  5:41 [PATCH v7 0/4] cat-file: teach cat-file a '--literally' option karthik nayak
                   ` (2 preceding siblings ...)
  2015-04-04  5:44 ` [PATCH v7 3/4] cat-file: add documentation for " Karthik Nayak
@ 2015-04-04  5:44 ` Karthik Nayak
  2015-04-07 20:49   ` Eric Sunshine
  3 siblings, 1 reply; 19+ messages in thread
From: Karthik Nayak @ 2015-04-04  5:44 UTC (permalink / raw)
  To: git; +Cc: gitster, sunshine, Karthik Nayak

Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 t/t1006-cat-file.sh | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index ab36b1e..5b74044 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -47,6 +47,18 @@ $content"
 	test_cmp expect actual
     '
 
+    test_expect_success "Type of $type is correct using --literally" '
+	echo $type >expect &&
+	git cat-file -t --literally $sha1 >actual &&
+	test_cmp expect actual
+    '
+
+    test_expect_success "Size of $type is correct using --literally" '
+	echo $size >expect &&
+	git cat-file -s --literally $sha1 >actual &&
+	test_cmp expect actual
+    '
+
     test -z "$content" ||
     test_expect_success "Content of $type is correct" '
 	maybe_remove_timestamp "$content" $no_ts >expect &&
@@ -296,4 +308,19 @@ test_expect_success '%(deltabase) reports packed delta bases' '
 	}
 '
 
+bogus_type="bogus"
+bogus_sha1=$(git hash-object -t $bogus_type --literally -w --stdin </dev/null)
+
+test_expect_success "Type of broken object is correct" '
+	echo $bogus_type >expect &&
+	git cat-file -t --literally $bogus_sha1 >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success "Size of broken object is correct" '
+	echo "0" >expect &&
+	git cat-file -s --literally $bogus_sha1 >actual &&
+	test_cmp expect actual
+'
+
 test_done
-- 
2.4.0.rc1.249.g9f2ee54

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-04  5:42 ` [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type Karthik Nayak
@ 2015-04-04 19:34   ` Junio C Hamano
  2015-04-04 19:53     ` karthik nayak
  2015-04-05 18:28   ` Karthik Nayak
  1 sibling, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2015-04-04 19:34 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: git, sunshine

Karthik Nayak <karthik.188@gmail.com> writes:

> @@ -2586,13 +2649,15 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
>  			*(oi->disk_sizep) = 0;
>  		if (oi->delta_base_sha1)
>  			hashclr(oi->delta_base_sha1);
> +		if (oi->typename)
> +			strbuf_addstr(oi->typename, typename(co->type));
>  		oi->whence = OI_CACHED;
>  		return 0;
>  	}

Just before the pre-context of this hunk, there is this bit:

	if (oi->typep)
		*(oi->typep) = co->type;

which tells me that the callers of this function is allowed to pass
a NULL in oi->typep when they are not interested in the type of the
object.

>  	if (!find_pack_entry(real, &e)) {
>  		/* Most likely it's a loose object. */
> -		if (!sha1_loose_object_info(real, oi)) {
> +		if (!sha1_loose_object_info(real, oi, flags)) {
>  			oi->whence = OI_LOOSE;
>  			return 0;
>  		}
> @@ -2616,6 +2681,8 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
>  		oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
>  					 rtype == OBJ_OFS_DELTA);
>  	}
> +	if (oi->typename)
> +		strbuf_addstr(oi->typename, typename(*oi->typep));

So, it makes me wonder what guarantee we have that this does not
dereference a NULL here.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-04 19:34   ` Junio C Hamano
@ 2015-04-04 19:53     ` karthik nayak
  2015-04-05  7:46       ` Junio C Hamano
  0 siblings, 1 reply; 19+ messages in thread
From: karthik nayak @ 2015-04-04 19:53 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, sunshine


On 04/05/2015 01:04 AM, Junio C Hamano wrote:
> Karthik Nayak <karthik.188@gmail.com> writes:
>
> > @@ -2586,13 +2649,15 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
> >               *(oi->disk_sizep) = 0;
> >           if (oi->delta_base_sha1)
> >               hashclr(oi->delta_base_sha1);
> > +        if (oi->typename)
> > +            strbuf_addstr(oi->typename, typename(co->type));
> >           oi->whence = OI_CACHED;
> >           return 0;
> >       }
>
> Just before the pre-context of this hunk, there is this bit:
>
>     if (oi->typep)
>         *(oi->typep) = co->type;
>
> which tells me that the callers of this function is allowed to pass
> a NULL in oi->typep when they are not interested in the type of the
> object.
>
> >       if (!find_pack_entry(real, &e)) {
> >           /* Most likely it's a loose object. */
> > -        if (!sha1_loose_object_info(real, oi)) {
> > +        if (!sha1_loose_object_info(real, oi, flags)) {
> >               oi->whence = OI_LOOSE;
> >               return 0;
> >           }
> > @@ -2616,6 +2681,8 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
> >           oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
> >                        rtype == OBJ_OFS_DELTA);
> >       }
> > +    if (oi->typename)
> > +        strbuf_addstr(oi->typename, typename(*oi->typep));
>
> So, it makes me wonder what guarantee we have that this does not
> dereference a NULL here.
>
As per my code, oi->typename is only pointing to something when oi->typep
is ( As oi->typename is currently only used in cat-file.c).
But what you're saying also is true, there is no other guarantee, as a user may
set oi->typename to point to a struct strbuf and leave out oi->typep.

  if (oi->typename && oi->typep)
          strbuf_addstr(oi->typename, typename(*oi->typep));

This should suffice. Do you want me to re-roll this?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-04 19:53     ` karthik nayak
@ 2015-04-05  7:46       ` Junio C Hamano
  2015-04-05  7:52         ` karthik nayak
  0 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2015-04-05  7:46 UTC (permalink / raw)
  To: karthik nayak; +Cc: git, sunshine

karthik nayak <karthik.188@gmail.com> writes:

>> So, it makes me wonder what guarantee we have that this does not
>> dereference a NULL here.
>>
> As per my code, oi->typename is only pointing to something when oi->typep
> is ( As oi->typename is currently only used in cat-file.c).
> But what you're saying also is true, there is no other guarantee, as a user may
> set oi->typename to point to a struct strbuf and leave out oi->typep.
>
>  if (oi->typename && oi->typep)
>          strbuf_addstr(oi->typename, typename(*oi->typep));
>
> This should suffice. Do you want me to re-roll this?

I'd rather avoid the thinking along the lines of "at this moment,
there happens to be only one caller that asks for typename and the
caller also sets typep, so we will be safe as long as we make sure
the caller passed typep before giving him typename back".

Somebody else may write new code that wants to learn the typename,
forgets to set typep, calls into this codepath, and ends up
scratching his head wondering why the typename string is returned to
him.  Surely the code may not crash at the new code you wrote, but
you are not helping him.

If it semantically does not make sense to ask for the typename
without asking for the type code, then we can and should make that
as a new calling convention _all_ callers must follow.

In other words, I think it is better to have

	if (oi->typename && !oi->typep)
		die("BUG");

somewhere near the beginning of the callchain that takes oi; that
will ensure all callers understand the rule.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-05  7:46       ` Junio C Hamano
@ 2015-04-05  7:52         ` karthik nayak
  2015-04-05 19:57           ` Junio C Hamano
  0 siblings, 1 reply; 19+ messages in thread
From: karthik nayak @ 2015-04-05  7:52 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, sunshine



On 04/05/2015 01:16 PM, Junio C Hamano wrote:
> karthik nayak <karthik.188@gmail.com> writes:
>
>>> So, it makes me wonder what guarantee we have that this does not
>>> dereference a NULL here.
>>>
>> As per my code, oi->typename is only pointing to something when oi->typep
>> is ( As oi->typename is currently only used in cat-file.c).
>> But what you're saying also is true, there is no other guarantee, as a user may
>> set oi->typename to point to a struct strbuf and leave out oi->typep.
>>
>>   if (oi->typename && oi->typep)
>>           strbuf_addstr(oi->typename, typename(*oi->typep));
>>
>> This should suffice. Do you want me to re-roll this?
>
> I'd rather avoid the thinking along the lines of "at this moment,
> there happens to be only one caller that asks for typename and the
> caller also sets typep, so we will be safe as long as we make sure
> the caller passed typep before giving him typename back".
>
> Somebody else may write new code that wants to learn the typename,
> forgets to set typep, calls into this codepath, and ends up
> scratching his head wondering why the typename string is returned to
> him.  Surely the code may not crash at the new code you wrote, but
> you are not helping him.


Yes! I do agree with you, If you read my previous reply, I did mention 
what you said about not having a guarantee that typep and typename are 
both set and another user might have a bug with this.

>
> If it semantically does not make sense to ask for the typename
> without asking for the type code, then we can and should make that
> as a new calling convention _all_ callers must follow.
>
> In other words, I think it is better to have
>
> 	if (oi->typename && !oi->typep)
> 		die("BUG");
>
> somewhere near the beginning of the callchain that takes oi; that
> will ensure all callers understand the rule.
>

Yes! this is a better approach as it will enforce that typep must be set 
when typename is set.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-04  5:42 ` [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type Karthik Nayak
  2015-04-04 19:34   ` Junio C Hamano
@ 2015-04-05 18:28   ` Karthik Nayak
  2015-04-07 20:46     ` Eric Sunshine
  1 sibling, 1 reply; 19+ messages in thread
From: Karthik Nayak @ 2015-04-05 18:28 UTC (permalink / raw)
  To: git; +Cc: gitster, sunshine, Karthik Nayak

Update sha1_loose_object_info() to optionally allow it to read
from a loose object file of unknown/bogus type; as the function
usually returns the type of the object it read in the form of enum
for known types, add an optional "typename" field to receive the
name of the type in textual form and a flag to indicate the reading
of a loose object file of unknown/bogus type.

Add parse_sha1_header_extended() which acts as a wrapper around
parse_sha1_header() allowing more information to be obtained.

Add unpack_sha1_header_to_strbuf() to unpack sha1 headers of
unknown/corrupt objects which have a unknown sha1 header size to
a strbuf structure. This was written by Junio C Hamano but tested
by me.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 cache.h     |   2 ++
 sha1_file.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 94 insertions(+), 22 deletions(-)

diff --git a/cache.h b/cache.h
index c47323e..ea6faf0 100644
--- a/cache.h
+++ b/cache.h
@@ -881,6 +881,7 @@ extern int is_ntfs_dotgit(const char *name);
 
 /* object replacement */
 #define LOOKUP_REPLACE_OBJECT 1
+#define LOOKUP_LITERALLY 2
 extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag);
 static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size)
 {
@@ -1349,6 +1350,7 @@ struct object_info {
 	unsigned long *sizep;
 	unsigned long *disk_sizep;
 	unsigned char *delta_base_sha1;
+	struct strbuf *typename;
 
 	/* Response */
 	enum {
diff --git a/sha1_file.c b/sha1_file.c
index 980ce6b..ac8fffc 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1564,6 +1564,34 @@ int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long ma
 	return git_inflate(stream, 0);
 }
 
+static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map,
+					unsigned long mapsize,
+					struct strbuf *header)
+{
+	unsigned char buffer[32], *cp;
+	unsigned long bufsiz = sizeof(buffer);
+	int status;
+
+	status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);
+
+	if (status) {
+		strbuf_add(header, buffer, stream->next_out - buffer);
+		return status;
+	}
+
+	do {
+		status = git_inflate(stream, 0);
+		strbuf_add(header, buffer, stream->next_out - buffer);
+		for (cp = buffer; cp < stream->next_out; cp++)
+			if (!*cp)
+				/* Found the NUL at the end of the header */
+				return 0;
+		stream->next_out = buffer;
+		stream->avail_out = bufsiz;
+	} while (status != Z_STREAM_END);
+	return -1;
+}
+
 static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
 {
 	int bytes = strlen(buffer) + 1;
@@ -1614,27 +1642,24 @@ static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long s
  * too permissive for what we want to check. So do an anal
  * object header parse by hand.
  */
-int parse_sha1_header(const char *hdr, unsigned long *sizep)
+int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
+			       unsigned int flags)
 {
-	char type[10];
-	int i;
+	struct strbuf typename = STRBUF_INIT;
 	unsigned long size;
+	int type;
 
 	/*
 	 * The type can be at most ten bytes (including the
 	 * terminating '\0' that we add), and is followed by
 	 * a space.
 	 */
-	i = 0;
 	for (;;) {
 		char c = *hdr++;
 		if (c == ' ')
 			break;
-		type[i++] = c;
-		if (i >= sizeof(type))
-			return -1;
+		strbuf_addch(&typename, c);
 	}
-	type[i] = 0;
 
 	/*
 	 * The length must follow immediately, and be in canonical
@@ -1652,12 +1677,39 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep)
 			size = size * 10 + c;
 		}
 	}
-	*sizep = size;
+
+	type = type_from_string_gently(typename.buf, typename.len, 1);
+	if (oi->sizep)
+		*oi->sizep = size;
+	if (oi->typename)
+		strbuf_addbuf(oi->typename, &typename);
+	strbuf_release(&typename);
 
 	/*
+	 * Set type to 0 if its an unknown object and
+	 * we're obtaining the type using '--literally'
+	 * option.
+	 */
+	if ((flags & LOOKUP_LITERALLY) && (type == -1))
+		type = 0;
+	else if (type == -1)
+		die("invalid object type");
+	if (oi->typep)
+		*oi->typep = type;
+	/*
 	 * The length must be followed by a zero byte
 	 */
-	return *hdr ? -1 : type_from_string(type);
+	return *hdr ? -1 : type;
+}
+
+int parse_sha1_header(const char *hdr, unsigned long *sizep)
+{
+	struct object_info oi;
+
+	oi.sizep = sizep;
+	oi.typename = NULL;
+	oi.typep = NULL;
+	return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
 }
 
 static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
@@ -2522,13 +2574,15 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
 }
 
 static int sha1_loose_object_info(const unsigned char *sha1,
-				  struct object_info *oi)
+				  struct object_info *oi,
+				  int flags)
 {
-	int status;
-	unsigned long mapsize, size;
+	int status = 0;
+	unsigned long mapsize;
 	void *map;
 	git_zstream stream;
 	char hdr[32];
+	struct strbuf hdrbuf = STRBUF_INIT;
 
 	if (oi->delta_base_sha1)
 		hashclr(oi->delta_base_sha1);
@@ -2555,17 +2609,26 @@ static int sha1_loose_object_info(const unsigned char *sha1,
 		return -1;
 	if (oi->disk_sizep)
 		*oi->disk_sizep = mapsize;
-	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
-		status = error("unable to unpack %s header",
-			       sha1_to_hex(sha1));
-	else if ((status = parse_sha1_header(hdr, &size)) < 0)
-		status = error("unable to parse %s header", sha1_to_hex(sha1));
-	else if (oi->sizep)
-		*oi->sizep = size;
+	if ((flags & LOOKUP_LITERALLY)) {
+		if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, &hdrbuf) < 0)
+			status = error("unable to unpack %s header with --literally",
+				       sha1_to_hex(sha1));
+		else if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
+			status = error("unable to parse %s header with --literally",
+				       sha1_to_hex(sha1));
+	} else {
+		if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
+			status = error("unable to unpack %s header",
+				       sha1_to_hex(sha1));
+		else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
+			status = error("unable to parse %s header", sha1_to_hex(sha1));
+	}
 	git_inflate_end(&stream);
 	munmap(map, mapsize);
-	if (oi->typep)
+	if (status && oi->typep)
 		*oi->typep = status;
+	if (hdrbuf.buf)
+		strbuf_release(&hdrbuf);
 	return 0;
 }
 
@@ -2576,6 +2639,9 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 	int rtype;
 	const unsigned char *real = lookup_replace_object_extended(sha1, flags);
 
+	if (oi->typename && !oi->typep)
+		die("object_info: cannot use typename without typep");
+
 	co = find_cached_object(real);
 	if (co) {
 		if (oi->typep)
@@ -2586,13 +2652,15 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 			*(oi->disk_sizep) = 0;
 		if (oi->delta_base_sha1)
 			hashclr(oi->delta_base_sha1);
+		if (oi->typename)
+			strbuf_addstr(oi->typename, typename(co->type));
 		oi->whence = OI_CACHED;
 		return 0;
 	}
 
 	if (!find_pack_entry(real, &e)) {
 		/* Most likely it's a loose object. */
-		if (!sha1_loose_object_info(real, oi)) {
+		if (!sha1_loose_object_info(real, oi, flags)) {
 			oi->whence = OI_LOOSE;
 			return 0;
 		}
@@ -2616,6 +2684,8 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 		oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
 					 rtype == OBJ_OFS_DELTA);
 	}
+	if (oi->typename)
+		strbuf_addstr(oi->typename, typename(*oi->typep));
 
 	return 0;
 }
-- 
2.4.0.rc1.249.g9f2ee54

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-05  7:52         ` karthik nayak
@ 2015-04-05 19:57           ` Junio C Hamano
  2015-04-07 10:34             ` karthik nayak
  0 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2015-04-05 19:57 UTC (permalink / raw)
  To: karthik nayak; +Cc: git, sunshine

karthik nayak <karthik.188@gmail.com> writes:

> On 04/05/2015 01:16 PM, Junio C Hamano wrote:
>
>> If it semantically does not make sense to ask for the typename
>> without asking for the type code, then we can and should make that
>> as a new calling convention _all_ callers must follow.
>>
>> In other words, I think it is better to have
>>
>> 	if (oi->typename && !oi->typep)
>> 		die("BUG");
>>
>> somewhere near the beginning of the callchain that takes oi; that
>> will ensure all callers understand the rule.
>
> Yes! this is a better approach as it will enforce that typep must be
> set when typename is set.

Not so fast ;-)

The key phrase in what I wrote above is "If it does not make sense",
and I am not yet convinced if that is the case or not.  If we are to
change the calling convention for the callers, the reason why it
does not make sense to ask only for typename but not typep must be
explained in the log message.

And if it turns out that the answer to that question is "it is valid
to ask only for typename", then it would be wrong to force everybody
who wants to learn the stringified typename to supply typep.  And in
such a case it might be better to do something like this instead (on
top of your patch I am responding to):

 sha1_file.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sha1_file.c b/sha1_file.c
index f4055dd..26fbb7c 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2639,6 +2639,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 	struct cached_object *co;
 	struct pack_entry e;
 	int rtype;
+	enum object_type real_type;
 	const unsigned char *real = lookup_replace_object_extended(sha1, flags);
 
 	co = find_cached_object(real);
@@ -2670,9 +2671,18 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 			return -1;
 	}
 
+	/*
+	 * packed_object_info() does not follow the delta chain to
+	 * find out the real type, unless it is given oi->typep.
+	 */
+	if (oi->typename && !oi->typep)
+		oi->typep = &real_type;
+
 	rtype = packed_object_info(e.p, e.offset, oi);
 	if (rtype < 0) {
 		mark_bad_packed_object(e.p, real);
+		if (oi->typep == &real_type)
+			oi->typep = NULL;
 		return sha1_object_info_extended(real, oi, 0);
 	} else if (in_delta_base_cache(e.p, e.offset)) {
 		oi->whence = OI_DBCACHED;
@@ -2686,6 +2696,8 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 	if (oi->typename)
 		strbuf_addstr(oi->typename, typename(*oi->typep));
 
+	if (oi->typep == &real_type)
+		oi->typep = NULL;
 	return 0;
 }
 

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-05 19:57           ` Junio C Hamano
@ 2015-04-07 10:34             ` karthik nayak
  0 siblings, 0 replies; 19+ messages in thread
From: karthik nayak @ 2015-04-07 10:34 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, sunshine


On 04/06/2015 01:27 AM, Junio C Hamano wrote:
> karthik nayak <karthik.188@gmail.com> writes:
>
> > On 04/05/2015 01:16 PM, Junio C Hamano wrote:
> >
> >> If it semantically does not make sense to ask for the typename
> >> without asking for the type code, then we can and should make that
> >> as a new calling convention _all_ callers must follow.
> >>
> >> In other words, I think it is better to have
> >>
> >>     if (oi->typename && !oi->typep)
> >>         die("BUG");
> >>
> >> somewhere near the beginning of the callchain that takes oi; that
> >> will ensure all callers understand the rule.
> >
> > Yes! this is a better approach as it will enforce that typep must be
> > set when typename is set.
>
> Not so fast ;-)
>
> The key phrase in what I wrote above is "If it does not make sense",
> and I am not yet convinced if that is the case or not.  If we are to
> change the calling convention for the callers, the reason why it
> does not make sense to ask only for typename but not typep must be
> explained in the log message.
>
> And if it turns out that the answer to that question is "it is valid
> to ask only for typename", then it would be wrong to force everybody
> who wants to learn the stringified typename to supply typep.  And in
> such a case it might be better to do something like this instead (on
> top of your patch I am responding to):
>
>   sha1_file.c | 12 ++++++++++++
>   1 file changed, 12 insertions(+)
>
> diff --git a/sha1_file.c b/sha1_file.c
> index f4055dd..26fbb7c 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -2639,6 +2639,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
>       struct cached_object *co;
>       struct pack_entry e;
>       int rtype;
> +    enum object_type real_type;
>       const unsigned char *real = lookup_replace_object_extended(sha1, flags);
>
>       co = find_cached_object(real);
> @@ -2670,9 +2671,18 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
>               return -1;
>       }
>
> +    /*
> +     * packed_object_info() does not follow the delta chain to
> +     * find out the real type, unless it is given oi->typep.
> +     */
> +    if (oi->typename && !oi->typep)
> +        oi->typep = &real_type;
> +
>       rtype = packed_object_info(e.p, e.offset, oi);
>       if (rtype < 0) {
>           mark_bad_packed_object(e.p, real);
> +        if (oi->typep == &real_type)
> +            oi->typep = NULL;
>           return sha1_object_info_extended(real, oi, 0);
>       } else if (in_delta_base_cache(e.p, e.offset)) {
>           oi->whence = OI_DBCACHED;
> @@ -2686,6 +2696,8 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
>       if (oi->typename)
>           strbuf_addstr(oi->typename, typename(*oi->typep));
>
> +    if (oi->typep == &real_type)
> +        oi->typep = NULL;
>       return 0;
>   }
>
>
Haha, If there is anything I love about code revisions its how you are subjected to
so many different ways of thinking. I didn't think of what you said.

We could do this and support typename without typep being used. This could probably
even eliminate the typep used by cat-file while getting the type of an object.

Will go through this again.

Thanks a lot.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-05 18:28   ` Karthik Nayak
@ 2015-04-07 20:46     ` Eric Sunshine
  0 siblings, 0 replies; 19+ messages in thread
From: Eric Sunshine @ 2015-04-07 20:46 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: Git List, Junio C Hamano

On Sun, Apr 5, 2015 at 2:28 PM, Karthik Nayak <karthik.188@gmail.com> wrote:
> Update sha1_loose_object_info() to optionally allow it to read
> from a loose object file of unknown/bogus type; as the function
> usually returns the type of the object it read in the form of enum
> for known types, add an optional "typename" field to receive the
> name of the type in textual form and a flag to indicate the reading
> of a loose object file of unknown/bogus type.
>
> Add parse_sha1_header_extended() which acts as a wrapper around
> parse_sha1_header() allowing more information to be obtained.
>
> Add unpack_sha1_header_to_strbuf() to unpack sha1 headers of
> unknown/corrupt objects which have a unknown sha1 header size to
> a strbuf structure. This was written by Junio C Hamano but tested
> by me.
>
> Helped-by: Junio C Hamano <gitster@pobox.com>
> Helped-by: Eric Sunshine <sunshine@sunshineco.com>
> Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> ---
> diff --git a/sha1_file.c b/sha1_file.c
> index 980ce6b..ac8fffc 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -1614,27 +1642,24 @@ static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long s
>   * too permissive for what we want to check. So do an anal
>   * object header parse by hand.
>   */
> -int parse_sha1_header(const char *hdr, unsigned long *sizep)
> +int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
> +                              unsigned int flags)
>  {
> -       char type[10];
> -       int i;
> +       struct strbuf typename = STRBUF_INIT;
>         unsigned long size;
> +       int type;
>
>         /*
>          * The type can be at most ten bytes (including the
>          * terminating '\0' that we add), and is followed by
>          * a space.
>          */
> -       i = 0;
>         for (;;) {
>                 char c = *hdr++;
>                 if (c == ' ')
>                         break;
> -               type[i++] = c;
> -               if (i >= sizeof(type))
> -                       return -1;
> +               strbuf_addch(&typename, c);
>         }
> -       type[i] = 0;
>
>         /*
>          * The length must follow immediately, and be in canonical
> @@ -1652,12 +1677,39 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep)
>                         size = size * 10 + c;
>                 }
>         }
> -       *sizep = size;
> +
> +       type = type_from_string_gently(typename.buf, typename.len, 1);
> +       if (oi->sizep)
> +               *oi->sizep = size;
> +       if (oi->typename)
> +               strbuf_addbuf(oi->typename, &typename);
> +       strbuf_release(&typename);
>
>         /*
> +        * Set type to 0 if its an unknown object and
> +        * we're obtaining the type using '--literally'
> +        * option.
> +        */
> +       if ((flags & LOOKUP_LITERALLY) && (type == -1))
> +               type = 0;
> +       else if (type == -1)
> +               die("invalid object type");
> +       if (oi->typep)
> +               *oi->typep = type;

This unnecessary intermixing of 'type'/'typename' and 'size'
processing makes the code more confusing than it ought to be. Why not
do all the processing related to 'type'/'typename' before the
processing of 'size'?

> +       /*
>          * The length must be followed by a zero byte
>          */
> -       return *hdr ? -1 : type_from_string(type);
> +       return *hdr ? -1 : type;
> +}
> +
> +int parse_sha1_header(const char *hdr, unsigned long *sizep)
> +{
> +       struct object_info oi;
> +
> +       oi.sizep = sizep;
> +       oi.typename = NULL;
> +       oi.typep = NULL;
> +       return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
>  }
>
>  static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v7 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-04  5:44 ` [PATCH v7 2/4] cat-file: teach cat-file a '--literally' option Karthik Nayak
@ 2015-04-07 20:49   ` Eric Sunshine
  0 siblings, 0 replies; 19+ messages in thread
From: Eric Sunshine @ 2015-04-07 20:49 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: Git List, Junio C Hamano

On Sat, Apr 4, 2015 at 1:44 AM, Karthik Nayak <karthik.188@gmail.com> wrote:
> Currently 'git cat-file' throws an error while trying to
> print the type or size of a broken/corrupt object which is
> created using 'git hash-object --literally'. This is
> because these objects are usually of unknown types.

This focus of this explanation is off-the-mark. The fact that such
objects can be created by 'hash-object --literally' is tangental to
the real purpose of the new 'cat-file --literally' option, which is
that it can help with diagnosing broken/corrupt objects encountered
in-the-wild.

Even mentioning 'hash-object --literally' here may be misleading and
confusing since its purpose it to intentionally create broken objects
for stress-testing git itself. I'd probably drop the reference
altogether, but if you insist upon mentioning 'hash-object
--literally', perhaps make it a very minor parenthetical comment at
the end of the commit message saying that 'cat-file --literally' was
inspired by its hash-object counterpart, or some such.

More below.

> Teach git cat-file a '--literally' option where it prints
> the type or size of a broken/corrupt object without throwing
> an error.
>
> Modify '-t' and '-s' options to call sha1_object_info_extended()
> directly to support the '--literally' option.
>
> Helped-by: Junio C Hamano <gitster@pobox.com>
> Helped-by: Eric Sunshine <sunshine@sunshineco.com>
> Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> ---
> diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> index df99df4..91ceae0 100644
> --- a/builtin/cat-file.c
> +++ b/builtin/cat-file.c
> @@ -9,13 +9,20 @@
>  #include "userdiff.h"
>  #include "streaming.h"
>
> -static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
> +static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
> +                       int literally)
>  {
>         unsigned char sha1[20];
>         enum object_type type;
>         char *buf;
>         unsigned long size;
>         struct object_context obj_context;
> +       struct object_info oi = {NULL};
> +       struct strbuf sb = STRBUF_INIT;
> +       unsigned flags = LOOKUP_REPLACE_OBJECT;
> +
> +       if (literally)
> +               flags |= LOOKUP_LITERALLY;
>
>         if (get_sha1_with_context(obj_name, 0, sha1, &obj_context))
>                 die("Not a valid object name %s", obj_name);
> @@ -23,16 +30,24 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
>         buf = NULL;
>         switch (opt) {
>         case 't':
> -               type = sha1_object_info(sha1, NULL);
> -               if (type > 0) {
> -                       printf("%s\n", typename(type));
> +               oi.typep = &type;
> +               oi.typename = &sb;

These two lines are common to the -t and -s cases. Would it make sense
to instead move them to just after 'oi' and 'sb' are declared? However
(see below)...

> +               if (sha1_object_info_extended(sha1, &oi, flags) < 0)
> +                       die("git cat-file: could not get object info");
> +               if (type >= 0 && sb.len) {
> +                       printf("%s\n", sb.buf);
> +                       strbuf_release(&sb);

Here you release the strbuf...

>                         return 0;
>                 }
>                 break;
>
>         case 's':
> -               type = sha1_object_info(sha1, &size);
> -               if (type > 0) {
> +               oi.typep = &type;
> +               oi.typename = &sb;

Why do you need to collect 'typename' for the -s case?
sha1_object_info_extended() promises that 'type' will be zero in the
--literally case for unknown types, so checking 'sb.len' in the
conditional below doesn't buy you anything, does it?

In fact, it's not even clear why you need to collect 'type' in the -s
case? The return value of sha1_object_info_extended() already tells
you whether or not the 'size' was retrieved successfully (--literally
or not).

> +               oi.sizep = &size;
> +               if (sha1_object_info_extended(sha1, &oi, flags) < 0)
> +                       die("git cat-file: could not get object info");
> +               if (type >= 0 && sb.len) {
>                         printf("%lu\n", size);

But here you do not release the strbuf.

>                         return 0;
>                 }
> @@ -369,6 +385,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
>                 OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
>                 OPT_SET_INT(0, "textconv", &opt,
>                             N_("for blob objects, run textconv on object's content"), 'c'),
> +               OPT_BOOL( 0, "literally", &literally,
> +                         N_("get information about corrupt objects for debugging Git")),

This option neither "gets information" nor is it for debugging Git.
Rather, it's useful for diagnosing broken/corrupt objects in
combination with other options. Perhaps rephrase something like this:

    "allow -s and -t to work with broken/corrupt objects"

>                 { OPTION_CALLBACK, 0, "batch", &batch, "format",
>                         N_("show info and content of objects fed from the standard input"),
>                         PARSE_OPT_OPTARG, batch_option_callback },

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v7 3/4] cat-file: add documentation for '--literally' option.
  2015-04-04  5:44 ` [PATCH v7 3/4] cat-file: add documentation for " Karthik Nayak
@ 2015-04-07 20:49   ` Eric Sunshine
  0 siblings, 0 replies; 19+ messages in thread
From: Eric Sunshine @ 2015-04-07 20:49 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: Git List, Junio C Hamano

On Sat, Apr 4, 2015 at 1:44 AM, Karthik Nayak <karthik.188@gmail.com> wrote:
> Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> ---
> diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
> index f6a16f4..8bac7bd 100644
> --- a/Documentation/git-cat-file.txt
> +++ b/Documentation/git-cat-file.txt
> @@ -9,7 +9,7 @@ git-cat-file - Provide content or type and size information for repository objec
>  SYNOPSIS
>  --------
>  [verse]
> -'git cat-file' (-t | -s | -e | -p | <type> | --textconv ) <object>
> +'git cat-file' (-t [--literally]| -s [--literally]| -e | -p | <type> | --textconv ) <object>
>  'git cat-file' (--batch | --batch-check) < <list-of-objects>
>
>  DESCRIPTION
> @@ -69,6 +69,10 @@ OPTIONS
>         not be combined with any other options or arguments.  See the
>         section `BATCH OUTPUT` below for details.
>
> +--literally::
> +       Print information of broken/corrupt objects of unknown type without
> +       throwing an error. To be used combined with '-s' or '-t' option.

The way this is worded, it sounds as if the --literally option itself
is printing some sort of additional information beyond what -s or -t
outputs. In fact, it merely allows -s and -t to work with
broken/corrupt objects. Perhaps rephrase something like this?

    Allow -s or -t to query broken or corrupt objects of
    unknown type.

>  OUTPUT
>  ------
>  If '-t' is specified, one of the <type>.
> --
> 2.4.0.rc1.249.g9f2ee54
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v7 4/4] t1006: add tests for git cat-file --literally
  2015-04-04  5:44 ` [PATCH v7 4/4] t1006: add tests for git cat-file --literally Karthik Nayak
@ 2015-04-07 20:49   ` Eric Sunshine
  2015-04-08 17:42     ` karthik nayak
  0 siblings, 1 reply; 19+ messages in thread
From: Eric Sunshine @ 2015-04-07 20:49 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: Git List, Junio C Hamano

On Sat, Apr 4, 2015 at 1:44 AM, Karthik Nayak <karthik.188@gmail.com> wrote:
> Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> ---
> diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
> index ab36b1e..5b74044 100755
> --- a/t/t1006-cat-file.sh
> +++ b/t/t1006-cat-file.sh
> @@ -47,6 +47,18 @@ $content"
>         test_cmp expect actual
>      '
>
> +    test_expect_success "Type of $type is correct using --literally" '
> +       echo $type >expect &&
> +       git cat-file -t --literally $sha1 >actual &&
> +       test_cmp expect actual
> +    '
> +
> +    test_expect_success "Size of $type is correct using --literally" '
> +       echo $size >expect &&
> +       git cat-file -s --literally $sha1 >actual &&
> +       test_cmp expect actual
> +    '
> +
>      test -z "$content" ||
>      test_expect_success "Content of $type is correct" '
>         maybe_remove_timestamp "$content" $no_ts >expect &&
> @@ -296,4 +308,19 @@ test_expect_success '%(deltabase) reports packed delta bases' '
>         }
>  '
>
> +bogus_type="bogus"
> +bogus_sha1=$(git hash-object -t $bogus_type --literally -w --stdin </dev/null)
> +
> +test_expect_success "Type of broken object is correct" '
> +       echo $bogus_type >expect &&
> +       git cat-file -t --literally $bogus_sha1 >actual &&
> +       test_cmp expect actual
> +'
> +
> +test_expect_success "Size of broken object is correct" '
> +       echo "0" >expect &&

Zero is such a common fallback value when things go wrong that it may
not be the best choice for this test. Consequently, it might be better
to create the bogus object with non-zero length. Take a look at how
'hello_length' and 'hello_sha1' are computed elsewhere in this script
for inspiration.

> +       git cat-file -s --literally $bogus_sha1 >actual &&
> +       test_cmp expect actual
> +'
> +
>  test_done
> --
> 2.4.0.rc1.249.g9f2ee54

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v7 4/4] t1006: add tests for git cat-file --literally
  2015-04-07 20:49   ` Eric Sunshine
@ 2015-04-08 17:42     ` karthik nayak
  2015-04-08 20:34       ` Eric Sunshine
  0 siblings, 1 reply; 19+ messages in thread
From: karthik nayak @ 2015-04-08 17:42 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Git List, Junio C Hamano


On 04/08/2015 02:19 AM, Eric Sunshine wrote:
> On Sat, Apr 4, 2015 at 1:44 AM, Karthik Nayak <karthik.188@gmail.com> wrote:
> > Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> > ---
> > diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
> > index ab36b1e..5b74044 100755
> > --- a/t/t1006-cat-file.sh
> > +++ b/t/t1006-cat-file.sh
> > @@ -47,6 +47,18 @@ $content"
> >          test_cmp expect actual
> >       '
> >
> > +    test_expect_success "Type of $type is correct using --literally" '
> > +       echo $type >expect &&
> > +       git cat-file -t --literally $sha1 >actual &&
> > +       test_cmp expect actual
> > +    '
> > +
> > +    test_expect_success "Size of $type is correct using --literally" '
> > +       echo $size >expect &&
> > +       git cat-file -s --literally $sha1 >actual &&
> > +       test_cmp expect actual
> > +    '
> > +
> >       test -z "$content" ||
> >       test_expect_success "Content of $type is correct" '
> >          maybe_remove_timestamp "$content" $no_ts >expect &&
> > @@ -296,4 +308,19 @@ test_expect_success '%(deltabase) reports packed delta bases' '
> >          }
> >   '
> >
> > +bogus_type="bogus"
> > +bogus_sha1=$(git hash-object -t $bogus_type --literally -w --stdin </dev/null)
> > +
> > +test_expect_success "Type of broken object is correct" '
> > +       echo $bogus_type >expect &&
> > +       git cat-file -t --literally $bogus_sha1 >actual &&
> > +       test_cmp expect actual
> > +'
> > +
> > +test_expect_success "Size of broken object is correct" '
> > +       echo "0" >expect &&
>
> Zero is such a common fallback value when things go wrong that it may
> not be the best choice for this test. Consequently, it might be better
> to create the bogus object with non-zero length. Take a look at how
> 'hello_length' and 'hello_sha1' are computed elsewhere in this script
> for inspiration.
The first part of this patch contains tests which make use of 'hello_length'
adn 'hello_sha1', but I get what you're saying, will look into it. Thanks.
>
> > +       git cat-file -s --literally $bogus_sha1 >actual &&
> > +       test_cmp expect actual
> > +'
> > +
> >   test_done
> > --
> > 2.4.0.rc1.249.g9f2ee54

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v7 4/4] t1006: add tests for git cat-file --literally
  2015-04-08 17:42     ` karthik nayak
@ 2015-04-08 20:34       ` Eric Sunshine
  2015-04-09  3:24         ` Karthik Nayak
  0 siblings, 1 reply; 19+ messages in thread
From: Eric Sunshine @ 2015-04-08 20:34 UTC (permalink / raw)
  To: karthik nayak; +Cc: Git List, Junio C Hamano

On Wed, Apr 8, 2015 at 1:42 PM, karthik nayak <karthik.188@gmail.com> wrote:
> On 04/08/2015 02:19 AM, Eric Sunshine wrote:
>> On Sat, Apr 4, 2015 at 1:44 AM, Karthik Nayak <karthik.188@gmail.com>
>> wrote:
>> > Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
>> > ---
>> > diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
>> > index ab36b1e..5b74044 100755
>> > --- a/t/t1006-cat-file.sh
>> > +++ b/t/t1006-cat-file.sh
>> > @@ -296,4 +308,19 @@ test_expect_success '%(deltabase) reports packed
>> > delta bases' '
>> >          }
>> >   '
>> >
>> > +bogus_type="bogus"
>> > +bogus_sha1=$(git hash-object -t $bogus_type --literally -w --stdin
>> > </dev/null)
>> > +
>> > +test_expect_success "Type of broken object is correct" '
>> > +       echo $bogus_type >expect &&
>> > +       git cat-file -t --literally $bogus_sha1 >actual &&
>> > +       test_cmp expect actual
>> > +'
>> > +
>> > +test_expect_success "Size of broken object is correct" '
>> > +       echo "0" >expect &&
>>
>> Zero is such a common fallback value when things go wrong that it may
>> not be the best choice for this test. Consequently, it might be better
>> to create the bogus object with non-zero length. Take a look at how
>> 'hello_length' and 'hello_sha1' are computed elsewhere in this script
>> for inspiration.
>
> The first part of this patch contains tests which make use of 'hello_length'
> adn 'hello_sha1', but I get what you're saying, will look into it. Thanks.

Just to be sure we're on the same page, I wasn't suggesting re-using
'hello_size' and 'hello_sha1', but merely to use that as an example of
how to avoid hard-coding the length of your non-zero-length bogus
object. So, something like this, perhaps:

    bogus_content='bogus'
    bogus_size=$(strlen "$bogus_content")
    bogus_sha1=$(echo_without_newline "$bogus_content" |
        git hash-object -t $bogus_type --literally -w --stdin)
    ...
    test_expect_success "Size of broken object is correct" '
        echo $bogus_size >expect &&
        ...

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v7 4/4] t1006: add tests for git cat-file --literally
  2015-04-08 20:34       ` Eric Sunshine
@ 2015-04-09  3:24         ` Karthik Nayak
  0 siblings, 0 replies; 19+ messages in thread
From: Karthik Nayak @ 2015-04-09  3:24 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Git List, Junio C Hamano



On April 9, 2015 2:04:08 AM GMT+05:30, Eric Sunshine <sunshine@sunshineco.com> wrote:
>On Wed, Apr 8, 2015 at 1:42 PM, karthik nayak <karthik.188@gmail.com>
>wrote:
>> On 04/08/2015 02:19 AM, Eric Sunshine wrote:
>>> On Sat, Apr 4, 2015 at 1:44 AM, Karthik Nayak
><karthik.188@gmail.com>
>>> wrote:
>>> > Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
>>> > ---
>>> > diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
>>> > index ab36b1e..5b74044 100755
>>> > --- a/t/t1006-cat-file.sh
>>> > +++ b/t/t1006-cat-file.sh
>>> > @@ -296,4 +308,19 @@ test_expect_success '%(deltabase) reports
>packed
>>> > delta bases' '
>>> >          }
>>> >   '
>>> >
>>> > +bogus_type="bogus"
>>> > +bogus_sha1=$(git hash-object -t $bogus_type --literally -w
>--stdin
>>> > </dev/null)
>>> > +
>>> > +test_expect_success "Type of broken object is correct" '
>>> > +       echo $bogus_type >expect &&
>>> > +       git cat-file -t --literally $bogus_sha1 >actual &&
>>> > +       test_cmp expect actual
>>> > +'
>>> > +
>>> > +test_expect_success "Size of broken object is correct" '
>>> > +       echo "0" >expect &&
>>>
>>> Zero is such a common fallback value when things go wrong that it
>may
>>> not be the best choice for this test. Consequently, it might be
>better
>>> to create the bogus object with non-zero length. Take a look at how
>>> 'hello_length' and 'hello_sha1' are computed elsewhere in this
>script
>>> for inspiration.
>>
>> The first part of this patch contains tests which make use of
>'hello_length'
>> adn 'hello_sha1', but I get what you're saying, will look into it.
>Thanks.
>
>Just to be sure we're on the same page, I wasn't suggesting re-using
>'hello_size' and 'hello_sha1', but merely to use that as an example of
>how to avoid hard-coding the length of your non-zero-length bogus
>object. So, something like this, perhaps:
>
>    bogus_content='bogus'
>    bogus_size=$(strlen "$bogus_content")
>    bogus_sha1=$(echo_without_newline "$bogus_content" |
>        git hash-object -t $bogus_type --literally -w --stdin)
>    ...
>    test_expect_success "Size of broken object is correct" '
>        echo $bogus_size >expect &&
>        ...
Yes, I had done exactly what you suggested here. Thanks 

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2015-04-09  3:24 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-04  5:41 [PATCH v7 0/4] cat-file: teach cat-file a '--literally' option karthik nayak
2015-04-04  5:42 ` [PATCH v7 1/4] sha1_file.c: support reading from a loose object of unknown type Karthik Nayak
2015-04-04 19:34   ` Junio C Hamano
2015-04-04 19:53     ` karthik nayak
2015-04-05  7:46       ` Junio C Hamano
2015-04-05  7:52         ` karthik nayak
2015-04-05 19:57           ` Junio C Hamano
2015-04-07 10:34             ` karthik nayak
2015-04-05 18:28   ` Karthik Nayak
2015-04-07 20:46     ` Eric Sunshine
2015-04-04  5:44 ` [PATCH v7 2/4] cat-file: teach cat-file a '--literally' option Karthik Nayak
2015-04-07 20:49   ` Eric Sunshine
2015-04-04  5:44 ` [PATCH v7 3/4] cat-file: add documentation for " Karthik Nayak
2015-04-07 20:49   ` Eric Sunshine
2015-04-04  5:44 ` [PATCH v7 4/4] t1006: add tests for git cat-file --literally Karthik Nayak
2015-04-07 20:49   ` Eric Sunshine
2015-04-08 17:42     ` karthik nayak
2015-04-08 20:34       ` Eric Sunshine
2015-04-09  3:24         ` Karthik Nayak

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.