All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v8 0/4] cat-file: teach cat-file a '--literally' option
@ 2015-04-15 16:55 karthik nayak
  2015-04-15 16:59 ` [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type Karthik Nayak
                   ` (3 more replies)
  0 siblings, 4 replies; 44+ messages in thread
From: karthik nayak @ 2015-04-15 16:55 UTC (permalink / raw)
  To: Git List; +Cc: Junio C Hamano, Eric Sunshine

Version 7 can be found here :
http://thread.gmane.org/gmane.comp.version-control.git/266761/match=patch+v7+0+4

Changes In this version :

sha1_file.c
* sha1_object_info_extended() can now accept object_info *oi with 
oi->typename without needing a oi->typep.
* Changes in parse_sha1_header_extended() to make the code more organized.

cat-file.c
* Rephrasing of the '--literally' option.
* cat_one_file() only uses oi.typename for case 't' and oi.sizep for 
case 's'.

t1006
* Changed the size of bogus type from 0 to strlen(bogus_content).

Documentation
* Rephrasing of the explanation for '--literally' option.

Suggestion's given by Eric Sunshine and Junio Hamano.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-15 16:55 [PATCH v8 0/4] cat-file: teach cat-file a '--literally' option karthik nayak
@ 2015-04-15 16:59 ` Karthik Nayak
  2015-04-15 20:21   ` Junio C Hamano
  2015-04-17 23:31   ` Eric Sunshine
  2015-04-15 16:59 ` [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option Karthik Nayak
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 44+ messages in thread
From: Karthik Nayak @ 2015-04-15 16:59 UTC (permalink / raw)
  To: git; +Cc: gitster, sunshine, Karthik Nayak

Update sha1_loose_object_info() to optionally allow it to read
from a loose object file of unknown/bogus type; as the function
usually returns the type of the object it read in the form of enum
for known types, add an optional "typename" field to receive the
name of the type in textual form and a flag to indicate the reading
of a loose object file of unknown/bogus type.

Add parse_sha1_header_extended() which acts as a wrapper around
parse_sha1_header() allowing more information to be obtained.

Add unpack_sha1_header_to_strbuf() to unpack sha1 headers of
unknown/corrupt objects which have a unknown sha1 header size to
a strbuf structure. This was written by Junio C Hamano but tested
by me.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 cache.h     |   2 +
 sha1_file.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 105 insertions(+), 23 deletions(-)

diff --git a/cache.h b/cache.h
index c47323e..ea6faf0 100644
--- a/cache.h
+++ b/cache.h
@@ -881,6 +881,7 @@ extern int is_ntfs_dotgit(const char *name);
 
 /* object replacement */
 #define LOOKUP_REPLACE_OBJECT 1
+#define LOOKUP_LITERALLY 2
 extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag);
 static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size)
 {
@@ -1349,6 +1350,7 @@ struct object_info {
 	unsigned long *sizep;
 	unsigned long *disk_sizep;
 	unsigned char *delta_base_sha1;
+	struct strbuf *typename;
 
 	/* Response */
 	enum {
diff --git a/sha1_file.c b/sha1_file.c
index 980ce6b..267399d 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1564,6 +1564,34 @@ int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long ma
 	return git_inflate(stream, 0);
 }
 
+static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map,
+					unsigned long mapsize,
+					struct strbuf *header)
+{
+	unsigned char buffer[32], *cp;
+	unsigned long bufsiz = sizeof(buffer);
+	int status;
+
+	status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);
+
+	if (status) {
+		strbuf_add(header, buffer, stream->next_out - buffer);
+		return status;
+	}
+
+	do {
+		status = git_inflate(stream, 0);
+		strbuf_add(header, buffer, stream->next_out - buffer);
+		for (cp = buffer; cp < stream->next_out; cp++)
+			if (!*cp)
+				/* Found the NUL at the end of the header */
+				return 0;
+		stream->next_out = buffer;
+		stream->avail_out = bufsiz;
+	} while (status != Z_STREAM_END);
+	return -1;
+}
+
 static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
 {
 	int bytes = strlen(buffer) + 1;
@@ -1614,27 +1642,40 @@ static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long s
  * too permissive for what we want to check. So do an anal
  * object header parse by hand.
  */
-int parse_sha1_header(const char *hdr, unsigned long *sizep)
+int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
+			       unsigned int flags)
 {
-	char type[10];
-	int i;
+	struct strbuf typename = STRBUF_INIT;
 	unsigned long size;
+	int type;
 
 	/*
 	 * The type can be at most ten bytes (including the
 	 * terminating '\0' that we add), and is followed by
 	 * a space.
 	 */
-	i = 0;
 	for (;;) {
 		char c = *hdr++;
 		if (c == ' ')
 			break;
-		type[i++] = c;
-		if (i >= sizeof(type))
-			return -1;
+		strbuf_addch(&typename, c);
 	}
-	type[i] = 0;
+
+	type = type_from_string_gently(typename.buf, typename.len, 1);
+	if (oi->typename)
+		strbuf_addbuf(oi->typename, &typename);
+	strbuf_release(&typename);
+	/*
+	 * Set type to 0 if its an unknown object and
+	 * we're obtaining the type using '--literally'
+	 * option.
+	 */
+	if ((flags & LOOKUP_LITERALLY) && (type == -1))
+		type = 0;
+	else if (type == -1)
+		die("invalid object type");
+	if (oi->typep)
+		*oi->typep = type;
 
 	/*
 	 * The length must follow immediately, and be in canonical
@@ -1652,12 +1693,24 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep)
 			size = size * 10 + c;
 		}
 	}
-	*sizep = size;
+
+	if (oi->sizep)
+		*oi->sizep = size;
 
 	/*
 	 * The length must be followed by a zero byte
 	 */
-	return *hdr ? -1 : type_from_string(type);
+	return *hdr ? -1 : type;
+}
+
+int parse_sha1_header(const char *hdr, unsigned long *sizep)
+{
+	struct object_info oi;
+
+	oi.sizep = sizep;
+	oi.typename = NULL;
+	oi.typep = NULL;
+	return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
 }
 
 static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
@@ -2522,13 +2575,15 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
 }
 
 static int sha1_loose_object_info(const unsigned char *sha1,
-				  struct object_info *oi)
+				  struct object_info *oi,
+				  int flags)
 {
-	int status;
-	unsigned long mapsize, size;
+	int status = 0;
+	unsigned long mapsize;
 	void *map;
 	git_zstream stream;
 	char hdr[32];
+	struct strbuf hdrbuf = STRBUF_INIT;
 
 	if (oi->delta_base_sha1)
 		hashclr(oi->delta_base_sha1);
@@ -2541,7 +2596,7 @@ static int sha1_loose_object_info(const unsigned char *sha1,
 	 * return value implicitly indicates whether the
 	 * object even exists.
 	 */
-	if (!oi->typep && !oi->sizep) {
+	if (!oi->typep && !oi->typename && !oi->sizep) {
 		struct stat st;
 		if (stat_sha1_file(sha1, &st) < 0)
 			return -1;
@@ -2555,17 +2610,26 @@ static int sha1_loose_object_info(const unsigned char *sha1,
 		return -1;
 	if (oi->disk_sizep)
 		*oi->disk_sizep = mapsize;
-	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
-		status = error("unable to unpack %s header",
-			       sha1_to_hex(sha1));
-	else if ((status = parse_sha1_header(hdr, &size)) < 0)
-		status = error("unable to parse %s header", sha1_to_hex(sha1));
-	else if (oi->sizep)
-		*oi->sizep = size;
+	if ((flags & LOOKUP_LITERALLY)) {
+		if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, &hdrbuf) < 0)
+			status = error("unable to unpack %s header with --literally",
+				       sha1_to_hex(sha1));
+		else if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
+			status = error("unable to parse %s header with --literally",
+				       sha1_to_hex(sha1));
+	} else {
+		if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
+			status = error("unable to unpack %s header",
+				       sha1_to_hex(sha1));
+		else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
+			status = error("unable to parse %s header", sha1_to_hex(sha1));
+	}
 	git_inflate_end(&stream);
 	munmap(map, mapsize);
-	if (oi->typep)
+	if (status && oi->typep)
 		*oi->typep = status;
+	if (hdrbuf.buf)
+		strbuf_release(&hdrbuf);
 	return 0;
 }
 
@@ -2574,6 +2638,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 	struct cached_object *co;
 	struct pack_entry e;
 	int rtype;
+	enum object_type real_type;
 	const unsigned char *real = lookup_replace_object_extended(sha1, flags);
 
 	co = find_cached_object(real);
@@ -2586,13 +2651,15 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 			*(oi->disk_sizep) = 0;
 		if (oi->delta_base_sha1)
 			hashclr(oi->delta_base_sha1);
+		if (oi->typename)
+			strbuf_addstr(oi->typename, typename(co->type));
 		oi->whence = OI_CACHED;
 		return 0;
 	}
 
 	if (!find_pack_entry(real, &e)) {
 		/* Most likely it's a loose object. */
-		if (!sha1_loose_object_info(real, oi)) {
+		if (!sha1_loose_object_info(real, oi, flags)) {
 			oi->whence = OI_LOOSE;
 			return 0;
 		}
@@ -2603,9 +2670,18 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 			return -1;
 	}
 
+	/*
+	 * packed_object_info() does not follow the delta chain to
+	 * find out the real type, unless it is given oi->typep.
+	 */
+	if (oi->typename && !oi->typep)
+		oi->typep = &real_type;
+
 	rtype = packed_object_info(e.p, e.offset, oi);
 	if (rtype < 0) {
 		mark_bad_packed_object(e.p, real);
+		if (oi->typep == &real_type)
+			oi->typep = NULL;
 		return sha1_object_info_extended(real, oi, 0);
 	} else if (in_delta_base_cache(e.p, e.offset)) {
 		oi->whence = OI_DBCACHED;
@@ -2616,6 +2692,10 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 		oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
 					 rtype == OBJ_OFS_DELTA);
 	}
+	if (oi->typename)
+		strbuf_addstr(oi->typename, typename(*oi->typep));
+	if (oi->typep == &real_type)
+		oi->typep = NULL;
 
 	return 0;
 }
-- 
2.4.0.rc1.249.gb598846

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-15 16:55 [PATCH v8 0/4] cat-file: teach cat-file a '--literally' option karthik nayak
  2015-04-15 16:59 ` [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type Karthik Nayak
@ 2015-04-15 16:59 ` Karthik Nayak
  2015-04-15 20:20   ` Junio C Hamano
  2015-04-19  0:28   ` Charles Bailey
  2015-04-15 17:00 ` [PATCH v8 3/4] cat-file: add documentation for " Karthik Nayak
  2015-04-15 17:00 ` [PATCH v8 4/4] t1006: add tests for git cat-file --literally Karthik Nayak
  3 siblings, 2 replies; 44+ messages in thread
From: Karthik Nayak @ 2015-04-15 16:59 UTC (permalink / raw)
  To: git; +Cc: gitster, sunshine, Karthik Nayak

Currently 'git cat-file' throws an error while trying to
print the type or size of a broken/corrupt object. This is
because these objects are usually of unknown types.

Teach git cat-file a '--literally' option where it prints
the type or size of a broken/corrupt object without throwing
an error.

Modify '-t' and '-s' options to call sha1_object_info_extended()
directly to support the '--literally' option.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 builtin/cat-file.c | 40 +++++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index df99df4..1ec7190 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -9,13 +9,20 @@
 #include "userdiff.h"
 #include "streaming.h"
 
-static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
+static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
+			int literally)
 {
 	unsigned char sha1[20];
 	enum object_type type;
 	char *buf;
 	unsigned long size;
 	struct object_context obj_context;
+	struct object_info oi = {NULL};
+	struct strbuf sb = STRBUF_INIT;
+	unsigned flags = LOOKUP_REPLACE_OBJECT;
+
+	if (literally)
+		flags |= LOOKUP_LITERALLY;
 
 	if (get_sha1_with_context(obj_name, 0, sha1, &obj_context))
 		die("Not a valid object name %s", obj_name);
@@ -23,20 +30,22 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 	buf = NULL;
 	switch (opt) {
 	case 't':
-		type = sha1_object_info(sha1, NULL);
-		if (type > 0) {
-			printf("%s\n", typename(type));
+		oi.typename = &sb;
+		if (sha1_object_info_extended(sha1, &oi, flags) < 0)
+			die("git cat-file: could not get object info");
+		if (sb.len) {
+			printf("%s\n", sb.buf);
+			strbuf_release(&sb);
 			return 0;
 		}
 		break;
 
 	case 's':
-		type = sha1_object_info(sha1, &size);
-		if (type > 0) {
-			printf("%lu\n", size);
-			return 0;
-		}
-		break;
+		oi.sizep = &size;
+		if ((type = sha1_object_info_extended(sha1, &oi, flags)) < 0)
+			die("git cat-file: could not get object info");
+		printf("%lu\n", size);
+		return 0;
 
 	case 'e':
 		return !has_sha1_file(sha1);
@@ -323,7 +332,7 @@ static int batch_objects(struct batch_options *opt)
 }
 
 static const char * const cat_file_usage[] = {
-	N_("git cat-file (-t | -s | -e | -p | <type> | --textconv) <object>"),
+	N_("git cat-file (-t [--literally]|-s [--literally]|-e|-p|<type>|--textconv) <object>"),
 	N_("git cat-file (--batch | --batch-check) < <list-of-objects>"),
 	NULL
 };
@@ -359,6 +368,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 	int opt = 0;
 	const char *exp_type = NULL, *obj_name = NULL;
 	struct batch_options batch = {0};
+	int literally = 0;
 
 	const struct option options[] = {
 		OPT_GROUP(N_("<type> can be one of: blob, tree, commit, tag")),
@@ -369,6 +379,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
 		OPT_SET_INT(0, "textconv", &opt,
 			    N_("for blob objects, run textconv on object's content"), 'c'),
+		OPT_BOOL( 0, "literally", &literally,
+			  N_("allow -s and -t to work with broken/corrupt objects")),
 		{ OPTION_CALLBACK, 0, "batch", &batch, "format",
 			N_("show info and content of objects fed from the standard input"),
 			PARSE_OPT_OPTARG, batch_option_callback },
@@ -380,7 +392,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 
 	git_config(git_cat_file_config, NULL);
 
-	if (argc != 3 && argc != 2)
+	if (argc < 2 || argc > 4)
 		usage_with_options(cat_file_usage, options);
 
 	argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
@@ -405,5 +417,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 	if (batch.enabled)
 		return batch_objects(&batch);
 
-	return cat_one_file(opt, exp_type, obj_name);
+	if (literally && opt != 't' && opt != 's')
+		die("git cat-file --literally: use with -s or -t");
+	return cat_one_file(opt, exp_type, obj_name, literally);
 }
-- 
2.4.0.rc1.249.gb598846

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v8 3/4] cat-file: add documentation for '--literally' option.
  2015-04-15 16:55 [PATCH v8 0/4] cat-file: teach cat-file a '--literally' option karthik nayak
  2015-04-15 16:59 ` [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type Karthik Nayak
  2015-04-15 16:59 ` [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option Karthik Nayak
@ 2015-04-15 17:00 ` Karthik Nayak
  2015-04-15 17:00 ` [PATCH v8 4/4] t1006: add tests for git cat-file --literally Karthik Nayak
  3 siblings, 0 replies; 44+ messages in thread
From: Karthik Nayak @ 2015-04-15 17:00 UTC (permalink / raw)
  To: git; +Cc: gitster, sunshine, Karthik Nayak

Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 Documentation/git-cat-file.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index f6a16f4..226a9c4 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -9,7 +9,7 @@ git-cat-file - Provide content or type and size information for repository objec
 SYNOPSIS
 --------
 [verse]
-'git cat-file' (-t | -s | -e | -p | <type> | --textconv ) <object>
+'git cat-file' (-t [--literally]| -s [--literally]| -e | -p | <type> | --textconv ) <object>
 'git cat-file' (--batch | --batch-check) < <list-of-objects>
 
 DESCRIPTION
@@ -69,6 +69,9 @@ OPTIONS
 	not be combined with any other options or arguments.  See the
 	section `BATCH OUTPUT` below for details.
 
+--literally::
+	Allow -s or -t to query broken/corrupt objects of unknown type.
+
 OUTPUT
 ------
 If '-t' is specified, one of the <type>.
-- 
2.4.0.rc1.249.gb598846

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v8 4/4] t1006: add tests for git cat-file --literally
  2015-04-15 16:55 [PATCH v8 0/4] cat-file: teach cat-file a '--literally' option karthik nayak
                   ` (2 preceding siblings ...)
  2015-04-15 17:00 ` [PATCH v8 3/4] cat-file: add documentation for " Karthik Nayak
@ 2015-04-15 17:00 ` Karthik Nayak
  2015-04-18  0:00   ` Eric Sunshine
  3 siblings, 1 reply; 44+ messages in thread
From: Karthik Nayak @ 2015-04-15 17:00 UTC (permalink / raw)
  To: git; +Cc: gitster, sunshine, Karthik Nayak

Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 t/t1006-cat-file.sh | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index ab36b1e..61fab78 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -47,6 +47,18 @@ $content"
 	test_cmp expect actual
     '
 
+    test_expect_success "Type of $type is correct using --literally" '
+	echo $type >expect &&
+	git cat-file -t --literally $sha1 >actual &&
+	test_cmp expect actual
+    '
+
+    test_expect_success "Size of $type is correct using --literally" '
+	echo $size >expect &&
+	git cat-file -s --literally $sha1 >actual &&
+	test_cmp expect actual
+    '
+
     test -z "$content" ||
     test_expect_success "Content of $type is correct" '
 	maybe_remove_timestamp "$content" $no_ts >expect &&
@@ -296,4 +308,21 @@ test_expect_success '%(deltabase) reports packed delta bases' '
 	}
 '
 
+bogus_type="bogus"
+bogus_content="bogus"
+bogus_size=$(strlen $bogus_content)
+bogus_sha1=$(printf $bogus_content | git hash-object -t $bogus_type --literally -w --stdin)
+
+test_expect_success "Type of broken object is correct" '
+	echo $bogus_type >expect &&
+	git cat-file -t --literally $bogus_sha1 >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success "Size of broken object is correct" '
+    echo $bogus_size >expect &&
+	git cat-file -s --literally $bogus_sha1 >actual &&
+	test_cmp expect actual
+'
+
 test_done
-- 
2.4.0.rc1.249.gb598846

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-15 16:59 ` [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option Karthik Nayak
@ 2015-04-15 20:20   ` Junio C Hamano
  2015-04-15 20:52     ` Junio C Hamano
  2015-04-19  0:28   ` Charles Bailey
  1 sibling, 1 reply; 44+ messages in thread
From: Junio C Hamano @ 2015-04-15 20:20 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: git, sunshine

Karthik Nayak <karthik.188@gmail.com> writes:

>  	case 's':
> -		type = sha1_object_info(sha1, &size);
> -		if (type > 0) {
> -			printf("%lu\n", size);
> -			return 0;
> -		}
> -		break;
> +		oi.sizep = &size;
> +		if ((type = sha1_object_info_extended(sha1, &oi, flags)) < 0)

Do you still need to assign to type here?

> +			die("git cat-file: could not get object info");
> +		printf("%lu\n", size);
> +		return 0;

> @@ -323,7 +332,7 @@ static int batch_objects(struct batch_options *opt)
>  }
>  
>  static const char * const cat_file_usage[] = {
> -	N_("git cat-file (-t | -s | -e | -p | <type> | --textconv) <object>"),
> +	N_("git cat-file (-t [--literally]|-s [--literally]|-e|-p|<type>|--textconv) <object>"),
>  	N_("git cat-file (--batch | --batch-check) < <list-of-objects>"),
>  	NULL
>  };
> @@ -359,6 +368,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
>  	int opt = 0;
>  	const char *exp_type = NULL, *obj_name = NULL;
>  	struct batch_options batch = {0};
> +	int literally = 0;
>  
>  	const struct option options[] = {
>  		OPT_GROUP(N_("<type> can be one of: blob, tree, commit, tag")),
> @@ -369,6 +379,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
>  		OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
>  		OPT_SET_INT(0, "textconv", &opt,
>  			    N_("for blob objects, run textconv on object's content"), 'c'),
> +		OPT_BOOL( 0, "literally", &literally,
> +			  N_("allow -s and -t to work with broken/corrupt objects")),
>  		{ OPTION_CALLBACK, 0, "batch", &batch, "format",
>  			N_("show info and content of objects fed from the standard input"),
>  			PARSE_OPT_OPTARG, batch_option_callback },
> @@ -380,7 +392,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
>  
>  	git_config(git_cat_file_config, NULL);
>  
> -	if (argc != 3 && argc != 2)
> +	if (argc < 2 || argc > 4)
>  		usage_with_options(cat_file_usage, options);

Hmm, why this change?

By allowing 4 args blindly like this, you will let something like
these to pass:

    $ git cat-file --textconv -t HEAD
    commit
    $ git cat-file -p -t HEAD
    commit
    $ git cat-file -s -t HEAD
    commit
    $ git cat-file -t -s HEAD
    879

It is fine if you are declaring that the last one wins when these
mutually-exclusive options are given. "git cat-file -e -s -t HEAD"
should also say "commit" if that is what you are doing, but instead
the code with this patch complains, which is bad.

It also is fine and is more in line with the spirit of the original
code if you keep the rule tight and only one of these mutually
exclusive options is allowed.

In either case, this check needs to be rethought.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-15 16:59 ` [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type Karthik Nayak
@ 2015-04-15 20:21   ` Junio C Hamano
  2015-04-15 22:18     ` Jeff King
  2015-04-17 23:31   ` Eric Sunshine
  1 sibling, 1 reply; 44+ messages in thread
From: Junio C Hamano @ 2015-04-15 20:21 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: git, sunshine, Jeff King

Karthik Nayak <karthik.188@gmail.com> writes:

> Update sha1_loose_object_info() to optionally allow it to read
> from a loose object file of unknown/bogus type; as the function
> usually returns the type of the object it read in the form of enum
> for known types, add an optional "typename" field to receive the
> name of the type in textual form and a flag to indicate the reading
> of a loose object file of unknown/bogus type.
>
> Add parse_sha1_header_extended() which acts as a wrapper around
> parse_sha1_header() allowing more information to be obtained.
>
> Add unpack_sha1_header_to_strbuf() to unpack sha1 headers of
> unknown/corrupt objects which have a unknown sha1 header size to
> a strbuf structure. This was written by Junio C Hamano but tested
> by me.
>
> Helped-by: Junio C Hamano <gitster@pobox.com>
> Helped-by: Eric Sunshine <sunshine@sunshineco.com>
> Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> ---

I see that you made the type parsing to happen earlier than the
previous round (which used to do the size first and then type).

Not a problem, though.  Just something I noticed...

> @@ -1614,27 +1642,40 @@ static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long s
>   * too permissive for what we want to check. So do an anal
>   * object header parse by hand.
>   */
> -int parse_sha1_header(const char *hdr, unsigned long *sizep)
> +int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
> +			       unsigned int flags)
>  {
> -	char type[10];
> -	int i;
> +	struct strbuf typename = STRBUF_INIT;
>  	unsigned long size;
> +	int type;
>  
>  	/*
>  	 * The type can be at most ten bytes (including the
>  	 * terminating '\0' that we add), and is followed by
>  	 * a space.
>  	 */
> -	i = 0;
>  	for (;;) {
>  		char c = *hdr++;
>  		if (c == ' ')
>  			break;
> -		type[i++] = c;
> -		if (i >= sizeof(type))
> -			return -1;
> +		strbuf_addch(&typename, c);
>  	}
> -	type[i] = 0;

This _might_ have some performance impact in that strbuf_addch()
involves strbuf_grow(*, 1), which does "does it overflow to
increment sb->len by one?"; I would say it should be unmeasurable
because the function is expected to be used only on loose objects
and you shouldn't have very many of them without packing in your
repository in the first place.

I guess Peff's c1822d4f (strbuf: add an optimized 1-character
strbuf_grow, 2015-04-04) may want to teach strbuf_addch() to use his
new strbuf_grow_ch(), and once that happens the performance worry
would disappear without this code to be changed at all.

> @@ -2541,7 +2596,7 @@ static int sha1_loose_object_info(const unsigned char *sha1,
>  	 * return value implicitly indicates whether the
>  	 * object even exists.
>  	 */
> -	if (!oi->typep && !oi->sizep) {
> +	if (!oi->typep && !oi->typename && !oi->sizep) {

You didn't have this check in the earlier round, and this new one is
correct, I think.  Good eyes to spot this potential problem.

Thanks.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-15 20:20   ` Junio C Hamano
@ 2015-04-15 20:52     ` Junio C Hamano
  2015-04-16  7:26       ` karthik nayak
  0 siblings, 1 reply; 44+ messages in thread
From: Junio C Hamano @ 2015-04-15 20:52 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: git, sunshine

Junio C Hamano <gitster@pobox.com> writes:

> Karthik Nayak <karthik.188@gmail.com> writes:
> ...
>> -	if (argc != 3 && argc != 2)
>> +	if (argc < 2 || argc > 4)
>>  		usage_with_options(cat_file_usage, options);
>
> Hmm, why this change?
>
> By allowing 4 args blindly like this, you will let something like
> these to pass:
>
>     $ git cat-file --textconv -t HEAD
>     commit
>     $ git cat-file -p -t HEAD
>     commit
>     $ git cat-file -s -t HEAD
>     commit
>     $ git cat-file -t -s HEAD
>     879
>
> It is fine if you are declaring that the last one wins when these
> mutually-exclusive options are given. "git cat-file -e -s -t HEAD"
> should also say "commit" if that is what you are doing, but instead
> the code with this patch complains, which is bad.
>
> It also is fine and is more in line with the spirit of the original
> code if you keep the rule tight and only one of these mutually
> exclusive options is allowed.
>
> In either case, this check needs to be rethought.

I wonder if we want to do something like this as a preliminary step
before your [PATCH 2/4].  Everything after the parse_options() call
seems to protect against leftover argc depending on what they need
already, so the only thing existing "we take only 2 or 3 args" check
is doing is to protect against giving more than one command mode
options, I think.  And OPT_CMDMODE() should do a much better job at
that that kind of thing, by giving a more informative error message
e.g.

    $ git cat-file -p -e HEAD
    error: switch 'e': incompatible with -p

This is a tangent, but while we are in the vicinity, we may want to
rethink the help message we attach to the '-e' option.  Technically
the current message is _not_ wrong per-se, but it misses the point.
The primary thing the option does is to check the (e)xistence of the
named object, and the fact that it does so silently is merely a
detail of the operation.  The current help text omits the more
important part of what the option is.

 builtin/cat-file.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 1ec7190..534991d 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -372,12 +372,12 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 
 	const struct option options[] = {
 		OPT_GROUP(N_("<type> can be one of: blob, tree, commit, tag")),
-		OPT_SET_INT('t', NULL, &opt, N_("show object type"), 't'),
-		OPT_SET_INT('s', NULL, &opt, N_("show object size"), 's'),
-		OPT_SET_INT('e', NULL, &opt,
+		OPT_CMDMODE('t', NULL, &opt, N_("show object type"), 't'),
+		OPT_CMDMODE('s', NULL, &opt, N_("show object size"), 's'),
+		OPT_CMDMODE('e', NULL, &opt,
 			    N_("exit with zero when there's no error"), 'e'),
-		OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
-		OPT_SET_INT(0, "textconv", &opt,
+		OPT_CMDMODE('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
+		OPT_CMDMODE(0, "textconv", &opt,
 			    N_("for blob objects, run textconv on object's content"), 'c'),
 		OPT_BOOL( 0, "literally", &literally,
 			  N_("allow -s and -t to work with broken/corrupt objects")),
@@ -392,9 +392,6 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 
 	git_config(git_cat_file_config, NULL);
 
-	if (argc < 2 || argc > 4)
-		usage_with_options(cat_file_usage, options);
-
 	argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
 
 	if (opt) {

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-15 20:21   ` Junio C Hamano
@ 2015-04-15 22:18     ` Jeff King
  2015-04-17 14:23       ` Jeff King
  0 siblings, 1 reply; 44+ messages in thread
From: Jeff King @ 2015-04-15 22:18 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Karthik Nayak, git, sunshine

On Wed, Apr 15, 2015 at 01:21:15PM -0700, Junio C Hamano wrote:

> > -		type[i++] = c;
> > -		if (i >= sizeof(type))
> > -			return -1;
> > +		strbuf_addch(&typename, c);
> >  	}
> > -	type[i] = 0;
> 
> This _might_ have some performance impact in that strbuf_addch()
> involves strbuf_grow(*, 1), which does "does it overflow to
> increment sb->len by one?"; I would say it should be unmeasurable
> because the function is expected to be used only on loose objects
> and you shouldn't have very many of them without packing in your
> repository in the first place.
> 
> I guess Peff's c1822d4f (strbuf: add an optimized 1-character
> strbuf_grow, 2015-04-04) may want to teach strbuf_addch() to use his
> new strbuf_grow_ch(), and once that happens the performance worry
> would disappear without this code to be changed at all.

I haven't re-rolled that series yet, but the discussion there showed
that strbuf_grow_ch() is unnecessary; call-sites can just check:

  if (!strbuf_avail(sb))
	strbuf_grow(sb, 1);

to get the fast inline check. Since we go to the trouble to inline
strbuf_addch, we should probably teach it the same trick. It would be
nice to identify a place with a tight strbuf_addch() loop that could
demonstrate the speed increase, though.

-Peff

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-15 20:52     ` Junio C Hamano
@ 2015-04-16  7:26       ` karthik nayak
  2015-04-16 13:35         ` Junio C Hamano
  0 siblings, 1 reply; 44+ messages in thread
From: karthik nayak @ 2015-04-16  7:26 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, sunshine



On 04/16/2015 02:22 AM, Junio C Hamano wrote:
> Junio C Hamano <gitster@pobox.com> writes:
>
>> Karthik Nayak <karthik.188@gmail.com> writes:
>> ...
>>> -	if (argc != 3 && argc != 2)
>>> +	if (argc < 2 || argc > 4)
>>>   		usage_with_options(cat_file_usage, options);
>>
>> Hmm, why this change?
>>
>> By allowing 4 args blindly like this, you will let something like
>> these to pass:
>>
>>      $ git cat-file --textconv -t HEAD
>>      commit
>>      $ git cat-file -p -t HEAD
>>      commit
>>      $ git cat-file -s -t HEAD
>>      commit
>>      $ git cat-file -t -s HEAD
>>      879
>>
>> It is fine if you are declaring that the last one wins when these
>> mutually-exclusive options are given. "git cat-file -e -s -t HEAD"
>> should also say "commit" if that is what you are doing, but instead
>> the code with this patch complains, which is bad.
>>
>> It also is fine and is more in line with the spirit of the original
>> code if you keep the rule tight and only one of these mutually
>> exclusive options is allowed.
>>
>> In either case, this check needs to be rethought.
>
> I wonder if we want to do something like this as a preliminary step
> before your [PATCH 2/4].  Everything after the parse_options() call
> seems to protect against leftover argc depending on what they need
> already, so the only thing existing "we take only 2 or 3 args" check
> is doing is to protect against giving more than one command mode
> options, I think.  And OPT_CMDMODE() should do a much better job at
> that that kind of thing, by giving a more informative error message
> e.g.
>
>      $ git cat-file -p -e HEAD
>      error: switch 'e': incompatible with -p
>
> This is a tangent, but while we are in the vicinity, we may want to
> rethink the help message we attach to the '-e' option.  Technically
> the current message is _not_ wrong per-se, but it misses the point.
> The primary thing the option does is to check the (e)xistence of the
> named object, and the fact that it does so silently is merely a
> detail of the operation.  The current help text omits the more
> important part of what the option is.

Would you rather check '-e' and go on to check '-p' or do you merely 
just want a different message.
As far as I see whenever any option other than a '-e' is given it 
indirectly has to check for the existence of the object.

eg:
$ git cat-file -t asdfghjkl
fatal: Not a valid object name asdfghjkl

$ git cat-file -e asdfghjkl
fatal: Not a valid object name asdfghjkl

So when a user is giving the '-e' option he just expects a silent output 
if the object exists, hence we rather have the option '-e' behave as a 
mutually exclusive option and output the error message like.

$ git cat-file -p -e HEAD
error: switch 'e': incompatible with -p

what do you think?
>
>   builtin/cat-file.c | 13 +++++--------
>   1 file changed, 5 insertions(+), 8 deletions(-)
>
> diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> index 1ec7190..534991d 100644
> --- a/builtin/cat-file.c
> +++ b/builtin/cat-file.c
> @@ -372,12 +372,12 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
>
>   	const struct option options[] = {
>   		OPT_GROUP(N_("<type> can be one of: blob, tree, commit, tag")),
> -		OPT_SET_INT('t', NULL, &opt, N_("show object type"), 't'),
> -		OPT_SET_INT('s', NULL, &opt, N_("show object size"), 's'),
> -		OPT_SET_INT('e', NULL, &opt,
> +		OPT_CMDMODE('t', NULL, &opt, N_("show object type"), 't'),
> +		OPT_CMDMODE('s', NULL, &opt, N_("show object size"), 's'),
> +		OPT_CMDMODE('e', NULL, &opt,
>   			    N_("exit with zero when there's no error"), 'e'),
> -		OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
> -		OPT_SET_INT(0, "textconv", &opt,
> +		OPT_CMDMODE('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
> +		OPT_CMDMODE(0, "textconv", &opt,
>   			    N_("for blob objects, run textconv on object's content"), 'c'),
>   		OPT_BOOL( 0, "literally", &literally,
>   			  N_("allow -s and -t to work with broken/corrupt objects")),
> @@ -392,9 +392,6 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
>
>   	git_config(git_cat_file_config, NULL);
>
> -	if (argc < 2 || argc > 4)
> -		usage_with_options(cat_file_usage, options);
> -
>   	argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
>
>   	if (opt) {
>
Thanks! Didn't know about the OPT_CMDMODE option.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-16  7:26       ` karthik nayak
@ 2015-04-16 13:35         ` Junio C Hamano
  2015-04-17  2:10           ` Karthik Nayak
  0 siblings, 1 reply; 44+ messages in thread
From: Junio C Hamano @ 2015-04-16 13:35 UTC (permalink / raw)
  To: karthik nayak; +Cc: git, sunshine

karthik nayak <karthik.188@gmail.com> writes:

> On 04/16/2015 02:22 AM, Junio C Hamano wrote:
>
>> This is a tangent, but while we are in the vicinity, we may want to
>> rethink the help message we attach to the '-e' option.  Technically
>> the current message is _not_ wrong per-se, but it misses the point.
>> The primary thing the option does is to check the (e)xistence of the
>> named object, and the fact that it does so silently is merely a
>> detail of the operation.  The current help text omits the more
>> important part of what the option is.
>
> Would you rather check '-e' and go on to check '-p' or do you merely
> just want a different message.

I meant just a different message.  The point of -e is to see if the
thing exists.  It is good to mention _how_ the result is reported
back to the user (i.e. via the exit code, not via an output to the
standard output "exists" vs "missing", for example), but that is
secondary.  Telling how it reports is meaningless without telling
what it reports in the first place.

> ... when a user is giving the '-e' option he just expects a silent
> output if the object exists, hence we rather have the option '-e'
> behave as a mutually exclusive option...

Yes, and that is in line with the switch to OPT_CMDMODE.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-16 13:35         ` Junio C Hamano
@ 2015-04-17  2:10           ` Karthik Nayak
  2015-04-17  2:14             ` Junio C Hamano
  0 siblings, 1 reply; 44+ messages in thread
From: Karthik Nayak @ 2015-04-17  2:10 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, sunshine



On April 16, 2015 7:05:04 PM GMT+05:30, Junio C Hamano <gitster@pobox.com> wrote:
>karthik nayak <karthik.188@gmail.com> writes:
>
>> On 04/16/2015 02:22 AM, Junio C Hamano wrote:
>>
>>> This is a tangent, but while we are in the vicinity, we may want to
>>> rethink the help message we attach to the '-e' option.  Technically
>>> the current message is _not_ wrong per-se, but it misses the point.
>>> The primary thing the option does is to check the (e)xistence of the
>>> named object, and the fact that it does so silently is merely a
>>> detail of the operation.  The current help text omits the more
>>> important part of what the option is.
>>
>> Would you rather check '-e' and go on to check '-p' or do you merely
>> just want a different message.
>
>I meant just a different message.  The point of -e is to see if the
>thing exists.  It is good to mention _how_ the result is reported
>back to the user (i.e. via the exit code, not via an output to the
>standard output "exists" vs "missing", for example), but that is
>secondary.  Telling how it reports is meaningless without telling
>what it reports in the first place.

I see what you mean. But I think it's beyond the scope of this patch series. If no one does, I'll work on it

>
>> ... when a user is giving the '-e' option he just expects a silent
>> output if the object exists, hence we rather have the option '-e'
>> behave as a mutually exclusive option...
>
>Yes, and that is in line with the switch to OPT_CMDMODE.

-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-17  2:10           ` Karthik Nayak
@ 2015-04-17  2:14             ` Junio C Hamano
  0 siblings, 0 replies; 44+ messages in thread
From: Junio C Hamano @ 2015-04-17  2:14 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: Git Mailing List, Eric Sunshine

On Thu, Apr 16, 2015 at 7:10 PM, Karthik Nayak <karthik.188@gmail.com> wrote:
>
> On April 16, 2015 7:05:04 PM GMT+05:30, Junio C Hamano <gitster@pobox.com> wrote:
>>
>>I meant just a different message.  The point of -e is to see if the
>>thing exists.  It is good to mention _how_ the result is reported
>>back to the user (i.e. via the exit code, not via an output to the
>>standard output "exists" vs "missing", for example), but that is
>>secondary.  Telling how it reports is meaningless without telling
>>what it reports in the first place.
>
> I see what you mean. But I think it's beyond the scope of this patch series.

It is perfectly fine to leave it as-is; that is why I said "This is a tangent".

Thanks.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-15 22:18     ` Jeff King
@ 2015-04-17 14:23       ` Jeff King
  2015-04-17 16:21         ` Junio C Hamano
  2015-04-17 18:45         ` karthik nayak
  0 siblings, 2 replies; 44+ messages in thread
From: Jeff King @ 2015-04-17 14:23 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Karthik Nayak, git, sunshine

On Wed, Apr 15, 2015 at 06:18:24PM -0400, Jeff King wrote:

> > This _might_ have some performance impact in that strbuf_addch()
> > involves strbuf_grow(*, 1), which does "does it overflow to
> > increment sb->len by one?"; I would say it should be unmeasurable
> > because the function is expected to be used only on loose objects
> > and you shouldn't have very many of them without packing in your
> > repository in the first place.
> > 
> > I guess Peff's c1822d4f (strbuf: add an optimized 1-character
> > strbuf_grow, 2015-04-04) may want to teach strbuf_addch() to use his
> > new strbuf_grow_ch(), and once that happens the performance worry
> > would disappear without this code to be changed at all.
> 
> I haven't re-rolled that series yet, but the discussion there showed
> that strbuf_grow_ch() is unnecessary; call-sites can just check:
> 
>   if (!strbuf_avail(sb))
> 	strbuf_grow(sb, 1);
> 
> to get the fast inline check. Since we go to the trouble to inline
> strbuf_addch, we should probably teach it the same trick. It would be
> nice to identify a place with a tight strbuf_addch() loop that could
> demonstrate the speed increase, though.

Just for reference, I did teach strbuf_addch this trick. And the config
code is the tight loop to test it with. Results are here:

  http://article.gmane.org/gmane.comp.version-control.git/267266

In this code path, we are typically only seeing short strings (the
original code used a 10-byte static buffer). So I doubt it makes all
that much difference.

If there _is_ a performance implication to worry about here, I think it
would be that we are doing an extra malloc/free. I'm not sure I
understand why we are copying it at all. The original code copied from
the hdr into type[10] so that we could NUL-terminate it, which was
required for type_from_string().

But now we use type_from_string_gently, which can accept a length[1]. So
we could just count the bytes to the first space and pass the original
buffer along with that length, no?

-Peff

[1] Not related to your patch, but it looks like type_from_string_gently
    is overly lax. It does a strncmp() with the length of the candidate
    name, but does not check that we consumed all of the matching name.
    So "tr" would match "tree", "comm" would match "commit", and so
    forth.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-17 14:23       ` Jeff King
@ 2015-04-17 16:21         ` Junio C Hamano
  2015-04-17 20:51           ` Jeff King
  2015-04-17 18:45         ` karthik nayak
  1 sibling, 1 reply; 44+ messages in thread
From: Junio C Hamano @ 2015-04-17 16:21 UTC (permalink / raw)
  To: Jeff King; +Cc: Karthik Nayak, git, sunshine

Jeff King <peff@peff.net> writes:

> If there _is_ a performance implication to worry about here, I think it
> would be that we are doing an extra malloc/free.

Thanks for reminding me; yes, that also worried me.

> I'm not sure I
> understand why we are copying it at all. The original code copied from
> the hdr into type[10] so that we could NUL-terminate it, which was
> required for type_from_string().

Sounds like a good plan.

>
> But now we use type_from_string_gently, which can accept a length[1]. So
> we could just count the bytes to the first space and pass the original
> buffer along with that length, no?
>
> -Peff
>
> [1] Not related to your patch, but it looks like type_from_string_gently
>     is overly lax. It does a strncmp() with the length of the candidate
>     name, but does not check that we consumed all of the matching name.
>     So "tr" would match "tree", "comm" would match "commit", and so
>     forth.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-17 14:23       ` Jeff King
  2015-04-17 16:21         ` Junio C Hamano
@ 2015-04-17 18:45         ` karthik nayak
  2015-04-17 18:49           ` Jeff King
  2015-04-17 19:23           ` Junio C Hamano
  1 sibling, 2 replies; 44+ messages in thread
From: karthik nayak @ 2015-04-17 18:45 UTC (permalink / raw)
  To: Jeff King; +Cc: git, sunshine, Junio C Hamano



On 04/17/2015 07:53 PM, Jeff King wrote:
> On Wed, Apr 15, 2015 at 06:18:24PM -0400, Jeff King wrote:
>
> >> This _might_ have some performance impact in that strbuf_addch()
> >> involves strbuf_grow(*, 1), which does "does it overflow to
> >> increment sb->len by one?"; I would say it should be unmeasurable
> >> because the function is expected to be used only on loose objects
> >> and you shouldn't have very many of them without packing in your
> >> repository in the first place.
> >>
> >> I guess Peff's c1822d4f (strbuf: add an optimized 1-character
> >> strbuf_grow, 2015-04-04) may want to teach strbuf_addch() to use his
> >> new strbuf_grow_ch(), and once that happens the performance worry
> >> would disappear without this code to be changed at all.
> >
> > I haven't re-rolled that series yet, but the discussion there showed
> > that strbuf_grow_ch() is unnecessary; call-sites can just check:
> >
> >    if (!strbuf_avail(sb))
> >     strbuf_grow(sb, 1);
> >
> > to get the fast inline check. Since we go to the trouble to inline
> > strbuf_addch, we should probably teach it the same trick. It would be
> > nice to identify a place with a tight strbuf_addch() loop that could
> > demonstrate the speed increase, though.
>
> Just for reference, I did teach strbuf_addch this trick. And the config
> code is the tight loop to test it with. Results are here:
>
>    http://article.gmane.org/gmane.comp.version-control.git/267266
>
> In this code path, we are typically only seeing short strings (the
> original code used a 10-byte static buffer). So I doubt it makes all
> that much difference.
>
> If there _is_ a performance implication to worry about here, I think it
> would be that we are doing an extra malloc/free. I'm not sure I
> understand why we are copying it at all. The original code copied from
> the hdr into type[10] so that we could NUL-terminate it, which was
> required for type_from_string().
>
> But now we use type_from_string_gently, which can accept a length[1]. So
> we could just count the bytes to the first space and pass the original
> buffer along with that length, no?

Yes, we could, that would eliminate  "struct strbuf typename =
STRBUF_INIT".

Something like this perhaps :

@@ -1614,27 +1642,40 @@ static void *unpack_sha1_rest(git_zstream
*stream, void *buffer, unsigned long s
    * too permissive for what we want to check. So do an anal
    * object header parse by hand.
    */
-int parse_sha1_header(const char *hdr, unsigned long *sizep)
+static int parse_sha1_header_extended(const char *hdr, struct
object_info *oi,
+                              unsigned int flags)
   {
-       char type[10];
-       int i;
+       const char *buf = hdr;
          unsigned long size;
+       int type, len = 0;

          /*
-        * The type can be at most ten bytes (including the
-        * terminating '\0' that we add), and is followed by
+        * The type can be of any size but is followed by
           * a space.
           */
-       i = 0;
          for (;;) {
                  char c = *hdr++;
                  if (c == ' ')
                          break;
-               type[i++] = c;
-               if (i >= sizeof(type))
-                       return -1;
+               len++;
          }
-       type[i] = 0;
+
+       type = type_from_string_gently(buf, len, 1);
+       if (oi->typename) {
+               strbuf_add(oi->typename, buf, len);
+               strbuf_addch(oi->typename, '\0');
+       }
+       /*
+        * Set type to 0 if its an unknown object and
+        * we're obtaining the type using '--literally'
+        * option.
+        */
+       if ((flags & LOOKUP_LITERALLY) && (type == -1))
+               type = 0;
+       else if (type == -1)
+               die("invalid object type");
+       if (oi->typep)
+               *oi->typep = type;

          /*
           * The length must follow immediately, and be in canonical

>
> -Peff
>
> [1] Not related to your patch, but it looks like type_from_string_gently
>      is overly lax. It does a strncmp() with the length of the candidate
>      name, but does not check that we consumed all of the matching name.
>      So "tr" would match "tree", "comm" would match "commit", and so
>      forth.
>
Thanks for the patch re-roll on strbuf_addch() and the patch on
type_from_string_gently().

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-17 18:45         ` karthik nayak
@ 2015-04-17 18:49           ` Jeff King
  2015-04-18  8:31             ` karthik nayak
  2015-04-17 19:23           ` Junio C Hamano
  1 sibling, 1 reply; 44+ messages in thread
From: Jeff King @ 2015-04-17 18:49 UTC (permalink / raw)
  To: karthik nayak; +Cc: git, sunshine, Junio C Hamano

On Sat, Apr 18, 2015 at 12:15:28AM +0530, karthik nayak wrote:

> >But now we use type_from_string_gently, which can accept a length[1]. So
> >we could just count the bytes to the first space and pass the original
> >buffer along with that length, no?
> 
> Yes, we could, that would eliminate  "struct strbuf typename =
> STRBUF_INIT".
> 
> Something like this perhaps :

Yeah, this is exactly what I had in mind.

>   {
> -       char type[10];
> -       int i;
> +       const char *buf = hdr;
>          unsigned long size;
> +       int type, len = 0;

Maybe switch the names of "buf" and "len" to "type_buf" and "type_len"
to make their purpose more clear?

-Peff

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-17 18:45         ` karthik nayak
  2015-04-17 18:49           ` Jeff King
@ 2015-04-17 19:23           ` Junio C Hamano
  2015-04-18  8:32             ` karthik nayak
  1 sibling, 1 reply; 44+ messages in thread
From: Junio C Hamano @ 2015-04-17 19:23 UTC (permalink / raw)
  To: karthik nayak; +Cc: Jeff King, git, sunshine

karthik nayak <karthik.188@gmail.com> writes:

> +       type = type_from_string_gently(buf, len, 1);
> +       if (oi->typename) {
> +               strbuf_add(oi->typename, buf, len);
> +               strbuf_addch(oi->typename, '\0');

add() has setlen() at the end so you do not have to NUL terminate it
yourself.  Doing addch() is actively wrong, as oi->typename->len now
counts the terminating NUL as part of the string, no?

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-17 16:21         ` Junio C Hamano
@ 2015-04-17 20:51           ` Jeff King
  2015-04-17 21:10             ` Junio C Hamano
  0 siblings, 1 reply; 44+ messages in thread
From: Jeff King @ 2015-04-17 20:51 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Karthik Nayak, git, sunshine

On Fri, Apr 17, 2015 at 09:21:31AM -0700, Junio C Hamano wrote:

> Jeff King <peff@peff.net> writes:
> 
> > If there _is_ a performance implication to worry about here, I think it
> > would be that we are doing an extra malloc/free.
> 
> Thanks for reminding me; yes, that also worried me.

As an aside, I worried about the extra allocation for reading the header
in the first place. But it looks like we only do this on the --literally
code path (and otherwise use the normal unpack_sha1_header).  Still, I
wonder if we could make this work automagically.  That is, speculatively
unpack the first N bytes, assuming we hit the end-of-header. If not,
then go to a strbuf as the slow path. Then it would be fine to cover all
cases; the normal ones would be fast, and only ridiculous things would
incur the extra allocation.

-Peff

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-17 20:51           ` Jeff King
@ 2015-04-17 21:10             ` Junio C Hamano
  2015-04-20 18:43               ` karthik nayak
  0 siblings, 1 reply; 44+ messages in thread
From: Junio C Hamano @ 2015-04-17 21:10 UTC (permalink / raw)
  To: Jeff King; +Cc: Karthik Nayak, git, sunshine

Jeff King <peff@peff.net> writes:

> On Fri, Apr 17, 2015 at 09:21:31AM -0700, Junio C Hamano wrote:
>
>> Jeff King <peff@peff.net> writes:
>> 
>> > If there _is_ a performance implication to worry about here, I think it
>> > would be that we are doing an extra malloc/free.
>> 
>> Thanks for reminding me; yes, that also worried me.
>
> As an aside, I worried about the extra allocation for reading the header
> in the first place. But it looks like we only do this on the --literally
> code path (and otherwise use the normal unpack_sha1_header).  Still, I
> wonder if we could make this work automagically.  That is, speculatively
> unpack the first N bytes, assuming we hit the end-of-header. If not,
> then go to a strbuf as the slow path. Then it would be fine to cover all
> cases; the normal ones would be fast, and only ridiculous things would
> incur the extra allocation.

Yes, that was what I was hoping to see eventually ;-)

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-15 16:59 ` [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type Karthik Nayak
  2015-04-15 20:21   ` Junio C Hamano
@ 2015-04-17 23:31   ` Eric Sunshine
  2015-04-18  9:03     ` karthik nayak
  1 sibling, 1 reply; 44+ messages in thread
From: Eric Sunshine @ 2015-04-17 23:31 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: Git List, Junio C Hamano

On Wed, Apr 15, 2015 at 12:59 PM, Karthik Nayak <karthik.188@gmail.com> wrote:
> Update sha1_loose_object_info() to optionally allow it to read
> from a loose object file of unknown/bogus type; as the function
> usually returns the type of the object it read in the form of enum
> for known types, add an optional "typename" field to receive the
> name of the type in textual form and a flag to indicate the reading
> of a loose object file of unknown/bogus type.
> [...]
> ---
> diff --git a/sha1_file.c b/sha1_file.c
> index 980ce6b..267399d 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -2522,13 +2575,15 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
>  }
>
>  static int sha1_loose_object_info(const unsigned char *sha1,
> -                                 struct object_info *oi)
> +                                 struct object_info *oi,
> +                                 int flags)
>  {
> -       int status;
> -       unsigned long mapsize, size;
> +       int status = 0;
> +       unsigned long mapsize;
>         void *map;
>         git_zstream stream;
>         char hdr[32];
> +       struct strbuf hdrbuf = STRBUF_INIT;
>
>         if (oi->delta_base_sha1)
>                 hashclr(oi->delta_base_sha1);
> @@ -2555,17 +2610,26 @@ static int sha1_loose_object_info(const unsigned char *sha1,
>                 return -1;
>         if (oi->disk_sizep)
>                 *oi->disk_sizep = mapsize;
> -       if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
> -               status = error("unable to unpack %s header",
> -                              sha1_to_hex(sha1));
> -       else if ((status = parse_sha1_header(hdr, &size)) < 0)
> -               status = error("unable to parse %s header", sha1_to_hex(sha1));
> -       else if (oi->sizep)
> -               *oi->sizep = size;
> +       if ((flags & LOOKUP_LITERALLY)) {
> +               if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, &hdrbuf) < 0)
> +                       status = error("unable to unpack %s header with --literally",
> +                                      sha1_to_hex(sha1));
> +               else if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
> +                       status = error("unable to parse %s header with --literally",
> +                                      sha1_to_hex(sha1));
> +       } else {
> +               if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
> +                       status = error("unable to unpack %s header",
> +                                      sha1_to_hex(sha1));
> +               else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
> +                       status = error("unable to parse %s header", sha1_to_hex(sha1));
> +       }
>         git_inflate_end(&stream);
>         munmap(map, mapsize);
> -       if (oi->typep)
> +       if (status && oi->typep)
>                 *oi->typep = status;
> +       if (hdrbuf.buf)
> +               strbuf_release(&hdrbuf);

Why is strbuf_release() protected by a conditional rather than being
called unconditionally? Am I missing something obvious?

>         return 0;
>  }
>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 4/4] t1006: add tests for git cat-file --literally
  2015-04-15 17:00 ` [PATCH v8 4/4] t1006: add tests for git cat-file --literally Karthik Nayak
@ 2015-04-18  0:00   ` Eric Sunshine
  2015-04-18  5:22     ` karthik nayak
  0 siblings, 1 reply; 44+ messages in thread
From: Eric Sunshine @ 2015-04-18  0:00 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: Git List, Junio C Hamano

On Wed, Apr 15, 2015 at 1:00 PM, Karthik Nayak <karthik.188@gmail.com> wrote:
> Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> ---
> diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
> index ab36b1e..61fab78 100755
> --- a/t/t1006-cat-file.sh
> +++ b/t/t1006-cat-file.sh
> @@ -296,4 +308,21 @@ test_expect_success '%(deltabase) reports packed delta bases' '
>         }
>  '
>
> +bogus_type="bogus"
> +bogus_content="bogus"
> +bogus_size=$(strlen $bogus_content)

If someone ever changes the value of 'bogus_content' so it contains
whitespace, then the result of strlen() will be incorrect as you've
invoked it. You should quote its argument (as other callers in this
script do) to safeguard against such an issue.

    bogus_size=$(strlen "$bogus_content")

> +bogus_sha1=$(printf $bogus_content | git hash-object -t $bogus_type --literally -w --stdin)

Ditto regarding quoting of 'bogus_content'.

Also, if someone ever modifies 'bogus_content' so that it contains a
literal '%' (such as "%s"), then your printf() invocation will
misbehave. Either call it like this:

    $(printf '%s' "$bogus_content" | ...)

or, better yet, call echo_without_newline() as other similar code
elsewhere in this script does, and as suggested earlier[1].

[1]: http://article.gmane.org/gmane.comp.version-control.git/266972/

> +test_expect_success "Type of broken object is correct" '
> +       echo $bogus_type >expect &&
> +       git cat-file -t --literally $bogus_sha1 >actual &&
> +       test_cmp expect actual
> +'
> +
> +test_expect_success "Size of broken object is correct" '
> +    echo $bogus_size >expect &&

Bad indentation. Use tab rather than spaces.

> +       git cat-file -s --literally $bogus_sha1 >actual &&
> +       test_cmp expect actual
> +'
> +
>  test_done
> --
> 2.4.0.rc1.249.gb598846

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 4/4] t1006: add tests for git cat-file --literally
  2015-04-18  0:00   ` Eric Sunshine
@ 2015-04-18  5:22     ` karthik nayak
  0 siblings, 0 replies; 44+ messages in thread
From: karthik nayak @ 2015-04-18  5:22 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Git List, Junio C Hamano


On 04/18/2015 05:30 AM, Eric Sunshine wrote:
> On Wed, Apr 15, 2015 at 1:00 PM, Karthik Nayak <karthik.188@gmail.com> wrote:
> > Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> > ---
> > diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
> > index ab36b1e..61fab78 100755
> > --- a/t/t1006-cat-file.sh
> > +++ b/t/t1006-cat-file.sh
> > @@ -296,4 +308,21 @@ test_expect_success '%(deltabase) reports packed delta bases' '
> >          }
> >   '
> >
> > +bogus_type="bogus"
> > +bogus_content="bogus"
> > +bogus_size=$(strlen $bogus_content)
>
> If someone ever changes the value of 'bogus_content' so it contains
> whitespace, then the result of strlen() will be incorrect as you've
> invoked it. You should quote its argument (as other callers in this
> script do) to safeguard against such an issue.
>
>      bogus_size=$(strlen "$bogus_content")
>
> > +bogus_sha1=$(printf $bogus_content | git hash-object -t $bogus_type --literally -w --stdin)
>
> Ditto regarding quoting of 'bogus_content'.
>
> Also, if someone ever modifies 'bogus_content' so that it contains a
> literal '%' (such as "%s"), then your printf() invocation will
> misbehave. Either call it like this:
>
>      $(printf '%s' "$bogus_content" | ...)
>
> or, better yet, call echo_without_newline() as other similar code
> elsewhere in this script does, and as suggested earlier[1].
>
> [1]: http://article.gmane.org/gmane.comp.version-control.git/266972/
>
> > +test_expect_success "Type of broken object is correct" '
> > +       echo $bogus_type >expect &&
> > +       git cat-file -t --literally $bogus_sha1 >actual &&
> > +       test_cmp expect actual
> > +'
> > +
> > +test_expect_success "Size of broken object is correct" '
> > +    echo $bogus_size >expect &&
>
> Bad indentation. Use tab rather than spaces.
>
> > +       git cat-file -s --literally $bogus_sha1 >actual &&
> > +       test_cmp expect actual
> > +'
> > +
> >   test_done
> > --
> > 2.4.0.rc1.249.gb598846
Thanks Eric for the changes. I did "echo -n" at the beginning but that
gave me a warning and asked me to use printf instead. I'll use
echo_without_newline, Thanks.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-17 18:49           ` Jeff King
@ 2015-04-18  8:31             ` karthik nayak
  0 siblings, 0 replies; 44+ messages in thread
From: karthik nayak @ 2015-04-18  8:31 UTC (permalink / raw)
  To: Jeff King; +Cc: git, sunshine, Junio C Hamano



On 04/18/2015 12:19 AM, Jeff King wrote:
> On Sat, Apr 18, 2015 at 12:15:28AM +0530, karthik nayak wrote:
>
>>> But now we use type_from_string_gently, which can accept a length[1]. So
>>> we could just count the bytes to the first space and pass the original
>>> buffer along with that length, no?
>>
>> Yes, we could, that would eliminate  "struct strbuf typename =
>> STRBUF_INIT".
>>
>> Something like this perhaps :
>
> Yeah, this is exactly what I had in mind.
>
>>    {
>> -       char type[10];
>> -       int i;
>> +       const char *buf = hdr;
>>           unsigned long size;
>> +       int type, len = 0;
>
> Maybe switch the names of "buf" and "len" to "type_buf" and "type_len"
> to make their purpose more clear?
>
> -Peff
>

Yes, will change the names.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-17 19:23           ` Junio C Hamano
@ 2015-04-18  8:32             ` karthik nayak
  0 siblings, 0 replies; 44+ messages in thread
From: karthik nayak @ 2015-04-18  8:32 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jeff King, git, sunshine



On 04/18/2015 12:53 AM, Junio C Hamano wrote:
> karthik nayak <karthik.188@gmail.com> writes:
>
>> +       type = type_from_string_gently(buf, len, 1);
>> +       if (oi->typename) {
>> +               strbuf_add(oi->typename, buf, len);
>> +               strbuf_addch(oi->typename, '\0');
>
> add() has setlen() at the end so you do not have to NUL terminate it
> yourself.  Doing addch() is actively wrong, as oi->typename->len now
> counts the terminating NUL as part of the string, no?
>

Yes. was speculative of that. thanks for clearing it up.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-17 23:31   ` Eric Sunshine
@ 2015-04-18  9:03     ` karthik nayak
  0 siblings, 0 replies; 44+ messages in thread
From: karthik nayak @ 2015-04-18  9:03 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Git List, Junio C Hamano



On 04/18/2015 05:01 AM, Eric Sunshine wrote:
> On Wed, Apr 15, 2015 at 12:59 PM, Karthik Nayak <karthik.188@gmail.com> wrote:
>> Update sha1_loose_object_info() to optionally allow it to read
>> from a loose object file of unknown/bogus type; as the function
>> usually returns the type of the object it read in the form of enum
>> for known types, add an optional "typename" field to receive the
>> name of the type in textual form and a flag to indicate the reading
>> of a loose object file of unknown/bogus type.
>> [...]
>> ---
>> diff --git a/sha1_file.c b/sha1_file.c
>> index 980ce6b..267399d 100644
>> --- a/sha1_file.c
>> +++ b/sha1_file.c
>> @@ -2522,13 +2575,15 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
>>   }
>>
>>   static int sha1_loose_object_info(const unsigned char *sha1,
>> -                                 struct object_info *oi)
>> +                                 struct object_info *oi,
>> +                                 int flags)
>>   {
>> -       int status;
>> -       unsigned long mapsize, size;
>> +       int status = 0;
>> +       unsigned long mapsize;
>>          void *map;
>>          git_zstream stream;
>>          char hdr[32];
>> +       struct strbuf hdrbuf = STRBUF_INIT;
>>
>>          if (oi->delta_base_sha1)
>>                  hashclr(oi->delta_base_sha1);
>> @@ -2555,17 +2610,26 @@ static int sha1_loose_object_info(const unsigned char *sha1,
>>                  return -1;
>>          if (oi->disk_sizep)
>>                  *oi->disk_sizep = mapsize;
>> -       if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
>> -               status = error("unable to unpack %s header",
>> -                              sha1_to_hex(sha1));
>> -       else if ((status = parse_sha1_header(hdr, &size)) < 0)
>> -               status = error("unable to parse %s header", sha1_to_hex(sha1));
>> -       else if (oi->sizep)
>> -               *oi->sizep = size;
>> +       if ((flags & LOOKUP_LITERALLY)) {
>> +               if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, &hdrbuf) < 0)
>> +                       status = error("unable to unpack %s header with --literally",
>> +                                      sha1_to_hex(sha1));
>> +               else if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
>> +                       status = error("unable to parse %s header with --literally",
>> +                                      sha1_to_hex(sha1));
>> +       } else {
>> +               if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
>> +                       status = error("unable to unpack %s header",
>> +                                      sha1_to_hex(sha1));
>> +               else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
>> +                       status = error("unable to parse %s header", sha1_to_hex(sha1));
>> +       }
>>          git_inflate_end(&stream);
>>          munmap(map, mapsize);
>> -       if (oi->typep)
>> +       if (status && oi->typep)
>>                  *oi->typep = status;
>> +       if (hdrbuf.buf)
>> +               strbuf_release(&hdrbuf);
>
> Why is strbuf_release() protected by a conditional rather than being
> called unconditionally? Am I missing something obvious?
No, you're right.
>
>>          return 0;
>>   }
>>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-15 16:59 ` [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option Karthik Nayak
  2015-04-15 20:20   ` Junio C Hamano
@ 2015-04-19  0:28   ` Charles Bailey
  2015-04-20  5:30     ` Junio C Hamano
  1 sibling, 1 reply; 44+ messages in thread
From: Charles Bailey @ 2015-04-19  0:28 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: git, gitster, sunshine

On Wed, Apr 15, 2015 at 10:29:34PM +0530, Karthik Nayak wrote:
> Currently 'git cat-file' throws an error while trying to
> print the type or size of a broken/corrupt object. This is
> because these objects are usually of unknown types.
> 
> Teach git cat-file a '--literally' option where it prints
> the type or size of a broken/corrupt object without throwing
> an error.

I'm sorry to come in with such a fundamental question at such a late
revision of this patch series, but am I the only person to wonder about
the choice of option name?

To me, cat-file already output objects "literally" (without -p) as
opposed to show. From the description, it feels more like it should be
"--unchecked" or perhaps something better that I haven't thought of?

The option isn't a true opposite of hash-object's --literally because
that also allows the creation of known types with invalid contents (e.g.
corrupt trees) whereas cat-file is quite happy to show the _contents_ of
such corrupt objects even without --literally.

Charles.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-19  0:28   ` Charles Bailey
@ 2015-04-20  5:30     ` Junio C Hamano
  2015-04-20  7:44       ` Charles Bailey
  0 siblings, 1 reply; 44+ messages in thread
From: Junio C Hamano @ 2015-04-20  5:30 UTC (permalink / raw)
  To: Charles Bailey; +Cc: Karthik Nayak, git, sunshine

Charles Bailey <charles@hashpling.org> writes:

> I'm sorry to come in with such a fundamental question at such a late
> revision of this patch series, but am I the only person to wonder about
> the choice of option name?
>
> To me, cat-file already output objects "literally" (without -p) as
> opposed to show. From the description, it feels more like it should be
> "--unchecked" or perhaps something better that I haven't thought of?

Yeah, it was conceived as a way to grok what hash-object --literally
would create, but the operation by "cat-file --literally" is not
about showing the contents literally without interpreting (the
general "cat-file <type> <objectname>" does the literal output
already).  So it was my fault to suggest that name, but I do not
think of a better alternative.

> The option isn't a true opposite of hash-object's --literally because
> that also allows the creation of known types with invalid contents (e.g.
> corrupt trees) whereas cat-file is quite happy to show the _contents_ of
> such corrupt objects even without --literally.

Not really.  If you create an object with corrupt type string (e.g. "BLOB"
instead of "blob"), cat-file would not be happy.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-20  5:30     ` Junio C Hamano
@ 2015-04-20  7:44       ` Charles Bailey
  2015-04-20  8:57         ` Karthik Nayak
  0 siblings, 1 reply; 44+ messages in thread
From: Charles Bailey @ 2015-04-20  7:44 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Karthik Nayak, git, sunshine

> On 20 Apr 2015, at 06:30, Junio C Hamano <gitster@pobox.com> wrote:
> Charles Bailey <charles@hashpling.org> writes:
> 
>> The option isn't a true opposite of hash-object's --literally because
>> that also allows the creation of known types with invalid contents (e.g.
>> corrupt trees) whereas cat-file is quite happy to show the _contents_ of
>> such corrupt objects even without --literally.
> 
> Not really.  If you create an object with corrupt type string (e.g. "BLOB"
> instead of "blob"), cat-file would not be happy.

Sorry, the emphasis should have been on "complete" of "complete
opposite".  There are some types of bad objects that can be created only
with hash-object --literally (malformed tag or tree), for which cat-file
works with fine and there are other types (pun unintended - BLOB,
wobble, etc.) for which --literally/--unchecked is required with
cat-file.

So I meant that cat-file's --literally is only a partial "opposite" or
analogue of hash-object's.

--allow-invalid-types? --force (in the sense of "suppress some possible
errors")? It's not a big thing but I'm aware that if we can find a better
name then now would be the best moment. If not, then --literally it is.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-20  7:44       ` Charles Bailey
@ 2015-04-20  8:57         ` Karthik Nayak
  2015-04-20  9:19           ` Charles Bailey
  0 siblings, 1 reply; 44+ messages in thread
From: Karthik Nayak @ 2015-04-20  8:57 UTC (permalink / raw)
  To: Charles Bailey, Junio C Hamano; +Cc: git, sunshine



On April 20, 2015 1:14:34 PM GMT+05:30, Charles Bailey <charles@hashpling.org> wrote:
>> On 20 Apr 2015, at 06:30, Junio C Hamano <gitster@pobox.com> wrote:
>> Charles Bailey <charles@hashpling.org> writes:
>> 
>>> The option isn't a true opposite of hash-object's --literally
>because
>>> that also allows the creation of known types with invalid contents
>(e.g.
>>> corrupt trees) whereas cat-file is quite happy to show the
>_contents_ of
>>> such corrupt objects even without --literally.
>> 
>> Not really.  If you create an object with corrupt type string (e.g.
>"BLOB"
>> instead of "blob"), cat-file would not be happy.
>
>Sorry, the emphasis should have been on "complete" of "complete
>opposite".  There are some types of bad objects that can be created
>only
>with hash-object --literally (malformed tag or tree), for which
>cat-file
>works with fine and there are other types (pun unintended - BLOB,
Sorry, but I didn't get you, broken objects created using hash-object --literally do not work with cat-file without the --literally option.
>wobble, etc.) for which --literally/--unchecked is required with
>cat-file.
>
>So I meant that cat-file's --literally is only a partial "opposite" or
>analogue of hash-object's.
>
>--allow-invalid-types? --force (in the sense of "suppress some possible
>errors")? It's not a big thing but I'm aware that if we can find a
>better
>name then now would be the best moment. If not, then --literally it is.

-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-20  8:57         ` Karthik Nayak
@ 2015-04-20  9:19           ` Charles Bailey
  2015-04-20 15:52             ` karthik nayak
  0 siblings, 1 reply; 44+ messages in thread
From: Charles Bailey @ 2015-04-20  9:19 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: Junio C Hamano, git, sunshine

On Mon, Apr 20, 2015 at 02:27:44PM +0530, Karthik Nayak wrote:
> Sorry, but I didn't get you, broken objects created using hash-object --literally do not work with cat-file without the --literally option.

Perhaps an example would help:

I cannot create a bad tree without --literally:

$ echo total garbage | ./git hash-object -t tree --stdin -w
fatal: corrupt tree file
$ echo total garbage | ./git hash-object -t tree --stdin -w --literally
fa2905d47028d00baec739f6d73540bb2a75c6f7

but I can use cat-file without --literally to query the contents and
information about the object as it stands.

$ ./git cat-file tree fa2905d47028d00baec739f6d73540bb2a75c6f7
total garbage
$ ./git cat-file -t fa2905d47028d00baec739f6d73540bb2a75c6f7
tree
$ ./git cat-file -s fa2905d47028d00baec739f6d73540bb2a75c6f7
14

As far as I could tell - and please correct me if I've misunderstood,
cat-file's literally is about dealing with unrecognized types whereas
hash-object's --literally is about both creating objects with bad types
and invalid objects of "recognized" types. This latter scenario is where
the option name "literally" makes the most sense.

Charles.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-20  9:19           ` Charles Bailey
@ 2015-04-20 15:52             ` karthik nayak
  2015-04-21 10:16               ` Charles Bailey
  0 siblings, 1 reply; 44+ messages in thread
From: karthik nayak @ 2015-04-20 15:52 UTC (permalink / raw)
  To: Charles Bailey; +Cc: Junio C Hamano, git, sunshine



On 04/20/2015 02:49 PM, Charles Bailey wrote:
> On Mon, Apr 20, 2015 at 02:27:44PM +0530, Karthik Nayak wrote:
> > Sorry, but I didn't get you, broken objects created using hash-object --literally do not work with cat-file without the --literally option.
>
> Perhaps an example would help:
>
> I cannot create a bad tree without --literally:
>
> $ echo total garbage | ./git hash-object -t tree --stdin -w
> fatal: corrupt tree file
> $ echo total garbage | ./git hash-object -t tree --stdin -w --literally
> fa2905d47028d00baec739f6d73540bb2a75c6f7
>
> but I can use cat-file without --literally to query the contents and
> information about the object as it stands.
>
> $ ./git cat-file tree fa2905d47028d00baec739f6d73540bb2a75c6f7
> total garbage
> $ ./git cat-file -t fa2905d47028d00baec739f6d73540bb2a75c6f7
> tree
> $ ./git cat-file -s fa2905d47028d00baec739f6d73540bb2a75c6f7
> 14
>
> As far as I could tell - and please correct me if I've misunderstood,
> cat-file's literally is about dealing with unrecognized types whereas
> hash-object's --literally is about both creating objects with bad types
> and invalid objects of "recognized" types. This latter scenario is where
> the option name "literally" makes the most sense.
Yes. What you're saying is correct, but it also makes sense as we're asking
"cat-file" to give us information about the object irrespective of the type of the
object, hence asking it to literally print the information. Also it stays as a compliment
to "hash-object --literally", which is already existing.
>
> Charles.
>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-17 21:10             ` Junio C Hamano
@ 2015-04-20 18:43               ` karthik nayak
  2015-04-20 18:51                 ` Jeff King
  0 siblings, 1 reply; 44+ messages in thread
From: karthik nayak @ 2015-04-20 18:43 UTC (permalink / raw)
  To: Junio C Hamano, Jeff King; +Cc: git, sunshine



On 04/18/2015 02:40 AM, Junio C Hamano wrote:
> Jeff King <peff@peff.net> writes:
>
>> On Fri, Apr 17, 2015 at 09:21:31AM -0700, Junio C Hamano wrote:
>>
>>> Jeff King <peff@peff.net> writes:
>>>
>>>> If there _is_ a performance implication to worry about here, I think it
>>>> would be that we are doing an extra malloc/free.
>>>
>>> Thanks for reminding me; yes, that also worried me.
>>
>> As an aside, I worried about the extra allocation for reading the header
>> in the first place. But it looks like we only do this on the --literally
>> code path (and otherwise use the normal unpack_sha1_header).  Still, I
>> wonder if we could make this work automagically.  That is, speculatively
>> unpack the first N bytes, assuming we hit the end-of-header. If not,
>> then go to a strbuf as the slow path. Then it would be fine to cover all
>> cases; the normal ones would be fast, and only ridiculous things would
>> incur the extra allocation.
>
> Yes, that was what I was hoping to see eventually ;-)
>

Sorry for the delay, So after reading what Jeff said I tried to 
implement it, this might not be a final version of the change, more of a 
RFC version. What do you'll think?

@@ -1564,6 +1564,36 @@ int unpack_sha1_header(git_zstream *stream, 
unsigned char *map, unsigned long ma
         return git_inflate(stream, 0);
  }

+static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned 
char *map,
+                                       unsigned long mapsize, void *buffer,
+                                       unsigned long bufsiz, struct 
strbuf *header)
+{
+       unsigned char *cp;
+       int status;
+       int i = 0;
+
+       status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);
+
+       for (cp = buffer; cp < stream->next_out; cp++)
+               if (!*cp) {
+                       /* Found the NUL at the end of the header */
+                       return 0;
+               }
+
+       strbuf_add(header, buffer, stream->next_out - (unsigned char 
*)buffer);
+       do {
+               status = git_inflate(stream, 0);
+               strbuf_add(header, buffer, stream->next_out - (unsigned 
char *)buffer);
+               for (cp = buffer; cp < stream->next_out; cp++)
+                       if (!*cp)
+                               /* Found the NUL at the end of the header */
+                               return 0;
+               stream->next_out = buffer;
+               stream->avail_out = bufsiz;
+       } while (status != Z_STREAM_END);
+       return -1;
+}
+


@@ -2555,17 +2610,24 @@ static int sha1_loose_object_info(const unsigned 
char *sha1,
                 return -1;
         if (oi->disk_sizep)
                 *oi->disk_sizep = mapsize;
-       if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
+       if ((flags & LOOKUP_LITERALLY)) {
+               if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, 
hdr, sizeof(hdr), &hdrbuf) < 0)
+                       status = error("unable to unpack %s header with 
--literally",
+                                      sha1_to_hex(sha1));
+       } else if (unpack_sha1_header(&stream, map, mapsize, hdr, 
sizeof(hdr)) < 0)
                 status = error("unable to unpack %s header",
                                sha1_to_hex(sha1));
-       else if ((status = parse_sha1_header(hdr, &size)) < 0)
+       if (hdrbuf.len) {
+               if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, 
flags)) < 0)
+                       status = error("unable to parse %s header with 
--literally",
+                                      sha1_to_hex(sha1));
+       } else if ((status = parse_sha1_header_extended(hdr, oi, flags)) 
< 0)
                 status = error("unable to parse %s header", 
sha1_to_hex(sha1));


and

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-20 18:43               ` karthik nayak
@ 2015-04-20 18:51                 ` Jeff King
  2015-04-21 11:26                   ` karthik nayak
  0 siblings, 1 reply; 44+ messages in thread
From: Jeff King @ 2015-04-20 18:51 UTC (permalink / raw)
  To: karthik nayak; +Cc: Junio C Hamano, git, sunshine

On Tue, Apr 21, 2015 at 12:13:30AM +0530, karthik nayak wrote:

> +static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char
> *map,
> +                                       unsigned long mapsize, void *buffer,
> +                                       unsigned long bufsiz, struct strbuf
> *header)
> +{
> +       unsigned char *cp;
> +       int status;
> +       int i = 0;
> +
> +       status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);

I wonder if we would feel comfortable just running this NUL-check as
part of unpack_sha1_header (i.e., in all code paths). It _shouldn't_
trigger in normal use, but I wonder if there would be any downsides
(e.g., maliciously crafted objects getting us to allocate memory or
something; I think it is fairly easy to convince git to allocate memory,
though).

> +       for (cp = buffer; cp < stream->next_out; cp++)
> +               if (!*cp) {
> +                       /* Found the NUL at the end of the header */
> +                       return 0;
> +               }

I think we can spell this as:

  if (memchr(buffer, '\0', stream->next_out - buffer))
	return 0;

which is shorter and possibly more efficient.

In theory we could also just start trying to parse the type/size header,
and notice there when we don't find the NUL. That's probably not worth
doing, though. The parsing is separated from the unpacking here, so it
would require combining those two operations in a single function. And
the extra NUL search here is likely not very expensive.

-Peff

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-20 15:52             ` karthik nayak
@ 2015-04-21 10:16               ` Charles Bailey
  2015-04-21 19:40                 ` Eric Sunshine
  0 siblings, 1 reply; 44+ messages in thread
From: Charles Bailey @ 2015-04-21 10:16 UTC (permalink / raw)
  To: karthik nayak; +Cc: Junio C Hamano, git, sunshine

On Mon, Apr 20, 2015 at 09:22:47PM +0530, karthik nayak wrote:
> 
> 
> On 04/20/2015 02:49 PM, Charles Bailey wrote:
> >As far as I could tell - and please correct me if I've misunderstood,
> >cat-file's literally is about dealing with unrecognized types whereas
> >hash-object's --literally is about both creating objects with bad types
> >and invalid objects of "recognized" types. This latter scenario is where
> >the option name "literally" makes the most sense.
> Yes. What you're saying is correct, but it also makes sense as we're asking
> "cat-file" to give us information about the object irrespective of the type of the
> object, hence asking it to literally print the information. Also it stays as a compliment
> to "hash-object --literally", which is already existing.

OK, I think you've hit the main point which I was trying to make.

To me, "literally" means "without transformation" or "exactly as
written/recorded/transmitted" (which -t/-s do anyway) and doesn't really
encompass the "irrespective of type" meaning that it has been given here.

In any case, I've made my point so I won't labour it any further. I
think that --no-validation or --allow-any-type might be more accurate
but if everyone else is happy enough with --literally then I'm happy to
live with that too.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-20 18:51                 ` Jeff King
@ 2015-04-21 11:26                   ` karthik nayak
  2015-04-21 14:24                     ` Jeff King
  0 siblings, 1 reply; 44+ messages in thread
From: karthik nayak @ 2015-04-21 11:26 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, git, sunshine



On 04/21/2015 12:21 AM, Jeff King wrote:
> On Tue, Apr 21, 2015 at 12:13:30AM +0530, karthik nayak wrote:
>
>> +static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char
>> *map,
>> +                                       unsigned long mapsize, void *buffer,
>> +                                       unsigned long bufsiz, struct strbuf
>> *header)
>> +{
>> +       unsigned char *cp;
>> +       int status;
>> +       int i = 0;
>> +
>> +       status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);
>
> I wonder if we would feel comfortable just running this NUL-check as
> part of unpack_sha1_header (i.e., in all code paths). It _shouldn't_
> trigger in normal use, but I wonder if there would be any downsides
> (e.g., maliciously crafted objects getting us to allocate memory or
> something; I think it is fairly easy to convince git to allocate memory,
> though).
>
But why would we want it to be a part of unpack_sha1_header?

>> +       for (cp = buffer; cp < stream->next_out; cp++)
>> +               if (!*cp) {
>> +                       /* Found the NUL at the end of the header */
>> +                       return 0;
>> +               }
>
> I think we can spell this as:
>
>    if (memchr(buffer, '\0', stream->next_out - buffer))
> 	return 0;
>
> which is shorter and possibly more efficient.
Noted. Thanks :)
>
> In theory we could also just start trying to parse the type/size header,
> and notice there when we don't find the NUL. That's probably not worth
> doing, though. The parsing is separated from the unpacking here, so it
> would require combining those two operations in a single function. And
> the extra NUL search here is likely not very expensive.
>
Yes, even I though about doing that, but wasn't keen on combining those 
two functions, they're meant to do two different things.
> -Peff
>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type
  2015-04-21 11:26                   ` karthik nayak
@ 2015-04-21 14:24                     ` Jeff King
  0 siblings, 0 replies; 44+ messages in thread
From: Jeff King @ 2015-04-21 14:24 UTC (permalink / raw)
  To: karthik nayak; +Cc: Junio C Hamano, git, sunshine

On Tue, Apr 21, 2015 at 04:56:08PM +0530, karthik nayak wrote:

> >>+       status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);
> >
> >I wonder if we would feel comfortable just running this NUL-check as
> >part of unpack_sha1_header (i.e., in all code paths). It _shouldn't_
> >trigger in normal use, but I wonder if there would be any downsides
> >(e.g., maliciously crafted objects getting us to allocate memory or
> >something; I think it is fairly easy to convince git to allocate memory,
> >though).
> >
> But why would we want it to be a part of unpack_sha1_header?

Just to reduce the number of functions and the complexity of the caller.
But I guess it doesn't help that much if the caller would then need to
speculatively pass in a strbuf. So it's probably not a good idea.

-Peff

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-21 10:16               ` Charles Bailey
@ 2015-04-21 19:40                 ` Eric Sunshine
  2015-04-21 20:36                   ` Junio C Hamano
  0 siblings, 1 reply; 44+ messages in thread
From: Eric Sunshine @ 2015-04-21 19:40 UTC (permalink / raw)
  To: Charles Bailey; +Cc: karthik nayak, Junio C Hamano, git

On Tue, Apr 21, 2015 at 6:16 AM, Charles Bailey <charles@hashpling.org> wrote:
> On Mon, Apr 20, 2015 at 09:22:47PM +0530, karthik nayak wrote:
>> On 04/20/2015 02:49 PM, Charles Bailey wrote:
>> >As far as I could tell - and please correct me if I've misunderstood,
>> >cat-file's literally is about dealing with unrecognized types whereas
>> >hash-object's --literally is about both creating objects with bad types
>> >and invalid objects of "recognized" types. This latter scenario is where
>> >the option name "literally" makes the most sense.
>> Yes. What you're saying is correct, but it also makes sense as we're asking
>> "cat-file" to give us information about the object irrespective of the type of the
>> object, hence asking it to literally print the information. Also it stays as a compliment
>> to "hash-object --literally", which is already existing.
>
> OK, I think you've hit the main point which I was trying to make.
>
> To me, "literally" means "without transformation" or "exactly as
> written/recorded/transmitted" (which -t/-s do anyway) and doesn't really
> encompass the "irrespective of type" meaning that it has been given here.
>
> In any case, I've made my point so I won't labour it any further. I
> think that --no-validation or --allow-any-type might be more accurate
> but if everyone else is happy enough with --literally then I'm happy to
> live with that too.

It's easy to be blinded into thinking that cat-file's new option
should be named --literally since it was inspired by the --literally
option of hash-object, but indeed it may not be the best choice.

In addition to your above suggestions (and --unchecked which you
proposed earlier), if we take inspiration from existing Git options,
perhaps one of the following (or something derived from them) would be
better?

    --force
    --ignore-errors
    --no-check
    --unsafe
    --no-strict
    --aggressive

Or, some pure made-up bike-shedding?

    --try-harder
    --allow-bogus-type
    --ignore-bogus-type
    --loose
    --gently

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-21 19:40                 ` Eric Sunshine
@ 2015-04-21 20:36                   ` Junio C Hamano
  2015-04-25 11:22                     ` karthik nayak
  0 siblings, 1 reply; 44+ messages in thread
From: Junio C Hamano @ 2015-04-21 20:36 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Charles Bailey, karthik nayak, git

Eric Sunshine <sunshine@sunshineco.com> writes:

> It's easy to be blinded into thinking that cat-file's new option
> should be named --literally since it was inspired by the --literally
> option of hash-object, but indeed it may not be the best choice.

Yeah, I wouldn't even say "inspired".  It was envisioned as a
counter-part debugging aid, nothing more.

Is there any other way to make cat-file looser other than accepting
an unknown type name from the future?  If so, then perhaps it may
make sense to give it a generic name that implys that we would
trigger such additional looseness in the future.  But as the
inventor of it as a debugging aid, I would say a name that spells
out the specific way this option is being loose, e.g.

>     --allow-bogus-type

or with s/bogus/unknown/, best describes what it currently does.

Thanks.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-21 20:36                   ` Junio C Hamano
@ 2015-04-25 11:22                     ` karthik nayak
  2015-04-25 17:04                       ` Junio C Hamano
  0 siblings, 1 reply; 44+ messages in thread
From: karthik nayak @ 2015-04-25 11:22 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Charles Bailey, git, Eric Sunshine


On 04/22/2015 02:06 AM, Junio C Hamano wrote:
> Eric Sunshine <sunshine@sunshineco.com> writes:
>
> > It's easy to be blinded into thinking that cat-file's new option
> > should be named --literally since it was inspired by the --literally
> > option of hash-object, but indeed it may not be the best choice.
>
> Yeah, I wouldn't even say "inspired".  It was envisioned as a
> counter-part debugging aid, nothing more.
>
> Is there any other way to make cat-file looser other than accepting
> an unknown type name from the future?  If so, then perhaps it may
> make sense to give it a generic name that implys that we would
> trigger such additional looseness in the future.  But as the
> inventor of it as a debugging aid, I would say a name that spells
> out the specific way this option is being loose, e.g.
>
> >      --allow-bogus-type
>
> or with s/bogus/unknown/, best describes what it currently does.
Yes this gives the best description, but its large, while we could use something
like --no-strict instead. Is the size worth the trade-off for a better description?
>
> Thanks.
>
>
>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-25 11:22                     ` karthik nayak
@ 2015-04-25 17:04                       ` Junio C Hamano
  2015-04-27 11:57                         ` karthik nayak
  0 siblings, 1 reply; 44+ messages in thread
From: Junio C Hamano @ 2015-04-25 17:04 UTC (permalink / raw)
  To: karthik nayak; +Cc: Charles Bailey, git, Eric Sunshine

karthik nayak <karthik.188@gmail.com> writes:

>> Is there any other way to make cat-file looser other than accepting
>> an unknown type name from the future?  If not, then perhaps it may
>> make sense to give it a generic name that implies that we would
>> trigger such additional looseness in the future.  But as the
>> inventor of it as a debugging aid, I would say a name that spells
>> out the specific way this option is being loose, e.g.
>>
>> >      --allow-bogus-type
>>
>> or with s/bogus/unknown/, best describes what it currently does.
>
> Yes this gives the best description, but its large, while we could use something
> like --no-strict instead.

We could, if you answered my first question with "no".

By naming this "--no-strict", the first bug report you will receive
may be that "cat-file --no-strict" should parse a zlib deflate that
begins with "blob 1234\n\0" (notice that there are two SPs instead
of the usual one, and length is followed by LF that should not be
there before the NUL) but it does not.

As your option name "--no-strict" signals that you will make the
best effort to parse such nonsense, that would be a valid bug
report, against which you would need to update the code to make it
work.  But is it worth the effort to make such a thing work?  I
dunno.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-25 17:04                       ` Junio C Hamano
@ 2015-04-27 11:57                         ` karthik nayak
  2015-04-27 18:38                           ` Eric Sunshine
  0 siblings, 1 reply; 44+ messages in thread
From: karthik nayak @ 2015-04-27 11:57 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Charles Bailey, git, Eric Sunshine


On 04/25/2015 10:34 PM, Junio C Hamano wrote:
> karthik nayak <karthik.188@gmail.com> writes:
>
> >> Is there any other way to make cat-file looser other than accepting
> >> an unknown type name from the future?  If not, then perhaps it may
> >> make sense to give it a generic name that implies that we would
> >> trigger such additional looseness in the future.  But as the
> >> inventor of it as a debugging aid, I would say a name that spells
> >> out the specific way this option is being loose, e.g.
> >>
> >>>       --allow-bogus-type
> >>
> >> or with s/bogus/unknown/, best describes what it currently does.
> >
> > Yes this gives the best description, but its large, while we could use something
> > like --no-strict instead.
>
> We could, if you answered my first question with "no".
>
> By naming this "--no-strict", the first bug report you will receive
> may be that "cat-file --no-strict" should parse a zlib deflate that
> begins with "blob 1234\n\0" (notice that there are two SPs instead
> of the usual one, and length is followed by LF that should not be
> there before the NUL) but it does not.
>
> As your option name "--no-strict" signals that you will make the
> best effort to parse such nonsense, that would be a valid bug
> report, against which you would need to update the code to make it
> work.  But is it worth the effort to make such a thing work?  I
> dunno.
>
Nice point, I don't see the need to parse such objects at the moment.
That rules out "--no-strict" and everything similar.

I still find "--allow-unkown-type" a bit too big, what about something like

* --any-type
* --arbitrary-type

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-27 11:57                         ` karthik nayak
@ 2015-04-27 18:38                           ` Eric Sunshine
  2015-04-28 12:03                             ` karthik nayak
  0 siblings, 1 reply; 44+ messages in thread
From: Eric Sunshine @ 2015-04-27 18:38 UTC (permalink / raw)
  To: karthik nayak; +Cc: Junio C Hamano, Charles Bailey, git

On Mon, Apr 27, 2015 at 7:57 AM, karthik nayak <karthik.188@gmail.com> wrote:
> On 04/25/2015 10:34 PM, Junio C Hamano wrote:
>> karthik nayak <karthik.188@gmail.com> writes:
>> > Yes this gives the best description, but its large, while we could use
>> > something like --no-strict instead.
>>
>> We could, if you answered my first question with "no".
>>
>> By naming this "--no-strict", the first bug report you will receive
>> may be that "cat-file --no-strict" should parse a zlib deflate that
>> begins with "blob 1234\n\0" (notice that there are two SPs instead
>> of the usual one, and length is followed by LF that should not be
>> there before the NUL) but it does not.
>>
>> As your option name "--no-strict" signals that you will make the
>> best effort to parse such nonsense, that would be a valid bug
>> report, against which you would need to update the code to make it
>> work.  But is it worth the effort to make such a thing work?  I
>> dunno.
>>
> Nice point, I don't see the need to parse such objects at the moment.
> That rules out "--no-strict" and everything similar.
>
> I still find "--allow-unkown-type" a bit too big, what about something like
>
> * --any-type
> * --arbitrary-type

As a diagnostic aid when encountering a (hopefully rare) broken or
corrupt object, this option is not likely to be used often. The
"allow" in --allow-unknown-type conveys the intended purpose more
accurately than either of the shorter names suggested above; and
considering how infrequently it is likely to be used, the extra six
characters should not be a significant burden.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option
  2015-04-27 18:38                           ` Eric Sunshine
@ 2015-04-28 12:03                             ` karthik nayak
  0 siblings, 0 replies; 44+ messages in thread
From: karthik nayak @ 2015-04-28 12:03 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Junio C Hamano, Charles Bailey, git


On 04/28/2015 12:08 AM, Eric Sunshine wrote:
> On Mon, Apr 27, 2015 at 7:57 AM, karthik nayak <karthik.188@gmail.com> wrote:
> > On 04/25/2015 10:34 PM, Junio C Hamano wrote:
> >> karthik nayak <karthik.188@gmail.com> writes:
> >>> Yes this gives the best description, but its large, while we could use
> >>> something like --no-strict instead.
> >>
> >> We could, if you answered my first question with "no".
> >>
> >> By naming this "--no-strict", the first bug report you will receive
> >> may be that "cat-file --no-strict" should parse a zlib deflate that
> >> begins with "blob 1234\n\0" (notice that there are two SPs instead
> >> of the usual one, and length is followed by LF that should not be
> >> there before the NUL) but it does not.
> >>
> >> As your option name "--no-strict" signals that you will make the
> >> best effort to parse such nonsense, that would be a valid bug
> >> report, against which you would need to update the code to make it
> >> work.  But is it worth the effort to make such a thing work?  I
> >> dunno.
> >>
> > Nice point, I don't see the need to parse such objects at the moment.
> > That rules out "--no-strict" and everything similar.
> >
> > I still find "--allow-unkown-type" a bit too big, what about something like
> >
> > * --any-type
> > * --arbitrary-type
>
> As a diagnostic aid when encountering a (hopefully rare) broken or
> corrupt object, this option is not likely to be used often. The
> "allow" in --allow-unknown-type conveys the intended purpose more
> accurately than either of the shorter names suggested above; and
> considering how infrequently it is likely to be used, the extra six
> characters should not be a significant burden.
>
You do have a point, thanks for putting it out. Will stick to "--allow-unkown-type".

^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2015-04-28 12:03 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-15 16:55 [PATCH v8 0/4] cat-file: teach cat-file a '--literally' option karthik nayak
2015-04-15 16:59 ` [PATCH v8 1/4] sha1_file.c: support reading from a loose object of unknown type Karthik Nayak
2015-04-15 20:21   ` Junio C Hamano
2015-04-15 22:18     ` Jeff King
2015-04-17 14:23       ` Jeff King
2015-04-17 16:21         ` Junio C Hamano
2015-04-17 20:51           ` Jeff King
2015-04-17 21:10             ` Junio C Hamano
2015-04-20 18:43               ` karthik nayak
2015-04-20 18:51                 ` Jeff King
2015-04-21 11:26                   ` karthik nayak
2015-04-21 14:24                     ` Jeff King
2015-04-17 18:45         ` karthik nayak
2015-04-17 18:49           ` Jeff King
2015-04-18  8:31             ` karthik nayak
2015-04-17 19:23           ` Junio C Hamano
2015-04-18  8:32             ` karthik nayak
2015-04-17 23:31   ` Eric Sunshine
2015-04-18  9:03     ` karthik nayak
2015-04-15 16:59 ` [PATCH v8 2/4] cat-file: teach cat-file a '--literally' option Karthik Nayak
2015-04-15 20:20   ` Junio C Hamano
2015-04-15 20:52     ` Junio C Hamano
2015-04-16  7:26       ` karthik nayak
2015-04-16 13:35         ` Junio C Hamano
2015-04-17  2:10           ` Karthik Nayak
2015-04-17  2:14             ` Junio C Hamano
2015-04-19  0:28   ` Charles Bailey
2015-04-20  5:30     ` Junio C Hamano
2015-04-20  7:44       ` Charles Bailey
2015-04-20  8:57         ` Karthik Nayak
2015-04-20  9:19           ` Charles Bailey
2015-04-20 15:52             ` karthik nayak
2015-04-21 10:16               ` Charles Bailey
2015-04-21 19:40                 ` Eric Sunshine
2015-04-21 20:36                   ` Junio C Hamano
2015-04-25 11:22                     ` karthik nayak
2015-04-25 17:04                       ` Junio C Hamano
2015-04-27 11:57                         ` karthik nayak
2015-04-27 18:38                           ` Eric Sunshine
2015-04-28 12:03                             ` karthik nayak
2015-04-15 17:00 ` [PATCH v8 3/4] cat-file: add documentation for " Karthik Nayak
2015-04-15 17:00 ` [PATCH v8 4/4] t1006: add tests for git cat-file --literally Karthik Nayak
2015-04-18  0:00   ` Eric Sunshine
2015-04-18  5:22     ` karthik nayak

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.