All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/2] cat-file: add a '--literally' option
@ 2015-03-17  5:13 karthik nayak
  2015-03-17  5:16 ` [PATCH v4 1/2] cat-file: teach cat-file " Karthik Nayak
  2015-03-17  5:16 ` [PATCH v4 2/2] sha1_file: refactor sha1_file.c to support 'cat-file --literally' Karthik Nayak
  0 siblings, 2 replies; 7+ messages in thread
From: karthik nayak @ 2015-03-17  5:13 UTC (permalink / raw)
  To: Git List

Based on Junios and Erics suggestion I have made various
changes over the previous iteration of the patch[1].

Changes in this version :
* Add a object_info::typename to hold all the typenames.
* Add a wrapper around parse_sha1_header() to get type and
   size of broken/corrupt objects without throwing an error
   whenever the type is unknown.
* Also add an option for 'cat-file -s --literally'.

Thanks to Junio and Eric for their suggestions and guidance.

[1]http://article.gmane.org/gmane.comp.version-control.git/264853

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v4 1/2] cat-file: teach cat-file a '--literally' option
  2015-03-17  5:13 [PATCH v4 0/2] cat-file: add a '--literally' option karthik nayak
@ 2015-03-17  5:16 ` Karthik Nayak
  2015-03-17  6:51   ` Eric Sunshine
  2015-03-17  5:16 ` [PATCH v4 2/2] sha1_file: refactor sha1_file.c to support 'cat-file --literally' Karthik Nayak
  1 sibling, 1 reply; 7+ messages in thread
From: Karthik Nayak @ 2015-03-17  5:16 UTC (permalink / raw)
  To: git; +Cc: Karthik Nayak

Currently 'git cat-file' throws an error while trying to
print the type or size of a broken/corrupt object which is
created using 'git hash-object --literally'. This is
because these objects are usually of unknown types.

Teach git cat-file a '--literally' option where it prints
the type or size of a broken/corrupt object without throwing
an error.

Modify '-t' and '-s' options to call sha1_object_info_extended()
directly to support the '--literally' option.

Add a 'LOOKUP_LITERALLY' flag to notify sha1_object_info_extended()
whenever the '--literally' is being used.

Add object_info::typename to support 'cat-file --literally' and
store the typename of objects.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 builtin/cat-file.c | 43 +++++++++++++++++++++++++++++++++----------
 cache.h            |  2 ++
 2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index df99df4..1625246 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -9,13 +9,20 @@
 #include "userdiff.h"
 #include "streaming.h"
 
-static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
+static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
+			int literally)
 {
 	unsigned char sha1[20];
 	enum object_type type;
 	char *buf;
 	unsigned long size;
 	struct object_context obj_context;
+	struct object_info oi = {NULL};
+	struct strbuf sb = STRBUF_INIT;
+	unsigned flags = LOOKUP_REPLACE_OBJECT;
+
+	if (literally)
+		flags |= LOOKUP_LITERALLY;
 
 	if (get_sha1_with_context(obj_name, 0, sha1, &obj_context))
 		die("Not a valid object name %s", obj_name);
@@ -23,16 +30,24 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 	buf = NULL;
 	switch (opt) {
 	case 't':
-		type = sha1_object_info(sha1, NULL);
-		if (type > 0) {
-			printf("%s\n", typename(type));
+		oi.typep = &type;
+		oi.typename = &sb;
+		if (sha1_object_info_extended(sha1, &oi, flags) < 0)
+			die("git cat-file: could not get object info");
+		if (sb.len) {
+			printf("%s\n", sb.buf);
+			strbuf_release(&sb);
 			return 0;
 		}
 		break;
 
 	case 's':
-		type = sha1_object_info(sha1, &size);
-		if (type > 0) {
+		oi.typep = &type;
+		oi.typename = &sb;
+		oi.sizep = &size;
+		if (sha1_object_info_extended(sha1, &oi, flags) < 0)
+			die("git cat-file: could not get object info");
+		if (sb.len) {
 			printf("%lu\n", size);
 			return 0;
 		}
@@ -323,8 +338,8 @@ static int batch_objects(struct batch_options *opt)
 }
 
 static const char * const cat_file_usage[] = {
-	N_("git cat-file (-t | -s | -e | -p | <type> | --textconv) <object>"),
-	N_("git cat-file (--batch | --batch-check) < <list-of-objects>"),
+	N_("git cat-file (-t [--literally]|-s [--literally]|-e|-p|<type>|--textconv) <object>"),
+	N_("git cat-file (--batch | --batch-check) <list-of-objects>"),
 	NULL
 };
 
@@ -359,6 +374,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 	int opt = 0;
 	const char *exp_type = NULL, *obj_name = NULL;
 	struct batch_options batch = {0};
+	int literally = 0;
 
 	const struct option options[] = {
 		OPT_GROUP(N_("<type> can be one of: blob, tree, commit, tag")),
@@ -369,6 +385,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
 		OPT_SET_INT(0, "textconv", &opt,
 			    N_("for blob objects, run textconv on object's content"), 'c'),
+		OPT_BOOL( 0, "literally", &literally,
+			  N_("get information about corrupt objects for debugging Git")),
 		{ OPTION_CALLBACK, 0, "batch", &batch, "format",
 			N_("show info and content of objects fed from the standard input"),
 			PARSE_OPT_OPTARG, batch_option_callback },
@@ -380,7 +398,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 
 	git_config(git_cat_file_config, NULL);
 
-	if (argc != 3 && argc != 2)
+	if (argc < 2 || argc > 4)
 		usage_with_options(cat_file_usage, options);
 
 	argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
@@ -405,5 +423,10 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 	if (batch.enabled)
 		return batch_objects(&batch);
 
-	return cat_one_file(opt, exp_type, obj_name);
+	if (literally && (opt == 't' || opt == 's'))
+		return cat_one_file(opt, exp_type, obj_name, literally);
+	else if (literally)
+		die("git cat-file --literally: use with -s or -t");
+
+	return cat_one_file(opt, exp_type, obj_name, literally);
 }
diff --git a/cache.h b/cache.h
index 761c570..7c151ee 100644
--- a/cache.h
+++ b/cache.h
@@ -830,6 +830,7 @@ extern int is_ntfs_dotgit(const char *name);
 
 /* object replacement */
 #define LOOKUP_REPLACE_OBJECT 1
+#define LOOKUP_LITERALLY 2
 extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag);
 static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size)
 {
@@ -1296,6 +1297,7 @@ struct object_info {
 	unsigned long *sizep;
 	unsigned long *disk_sizep;
 	unsigned char *delta_base_sha1;
+	struct strbuf *typename;
 
 	/* Response */
 	enum {
-- 
2.3.1.307.gf3db8a5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v4 2/2] sha1_file: refactor sha1_file.c to support 'cat-file --literally'
  2015-03-17  5:13 [PATCH v4 0/2] cat-file: add a '--literally' option karthik nayak
  2015-03-17  5:16 ` [PATCH v4 1/2] cat-file: teach cat-file " Karthik Nayak
@ 2015-03-17  5:16 ` Karthik Nayak
  2015-03-17 20:10   ` Junio C Hamano
  1 sibling, 1 reply; 7+ messages in thread
From: Karthik Nayak @ 2015-03-17  5:16 UTC (permalink / raw)
  To: git; +Cc: Karthik Nayak

Modify sha1_loose_object_info() to support 'cat-file --literally'
by accepting flags and also make changes to copy the type to
object_info::typename.

Add parse_sha1_header_extended() which acts as a wrapper around
parse_sha1_header() allowing for more information to be obtained
based on the given flags.

Add unpack_sha1_header_literally() to unpack sha1 headers of
unknown/corrupt objects which have a unknown sha1 header size.
This was written by Junio C Hamano but tested by me.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 sha1_file.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 97 insertions(+), 24 deletions(-)

diff --git a/sha1_file.c b/sha1_file.c
index 69a60ec..e31e9e2 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1564,6 +1564,36 @@ int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long ma
 	return git_inflate(stream, 0);
 }
 
+static int unpack_sha1_header_literally(git_zstream *stream, unsigned char *map,
+					unsigned long mapsize,
+					struct strbuf *header)
+{
+	unsigned char buffer[32], *cp;
+	unsigned long bufsiz = sizeof(buffer);
+	int status;
+
+	/* Get the data stream */
+	memset(stream, 0, sizeof(*stream));
+	stream->next_in = map;
+	stream->avail_in = mapsize;
+	stream->next_out = buffer;
+	stream->avail_out = bufsiz;
+
+	git_inflate_init(stream);
+
+	do {
+		status = git_inflate(stream, 0);
+		strbuf_add(header, buffer, stream->next_out - buffer);
+		for (cp = buffer; cp < stream->next_out; cp++)
+			if (!*cp)
+				/* Found the NUL at the end of the header */
+				return 0;
+		stream->next_out = buffer;
+		stream->avail_out = bufsiz;
+	} while (status == Z_OK);
+	return -1;
+}
+
 static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
 {
 	int bytes = strlen(buffer) + 1;
@@ -1609,32 +1639,24 @@ static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long s
 	return NULL;
 }
 
-/*
- * We used to just use "sscanf()", but that's actually way
- * too permissive for what we want to check. So do an anal
- * object header parse by hand.
- */
-int parse_sha1_header(const char *hdr, unsigned long *sizep)
+int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
+			       int flags)
 {
-	char type[10];
-	int i;
+	struct strbuf typename = STRBUF_INIT;
 	unsigned long size;
+	int type;
 
 	/*
 	 * The type can be at most ten bytes (including the
 	 * terminating '\0' that we add), and is followed by
 	 * a space.
 	 */
-	i = 0;
 	for (;;) {
 		char c = *hdr++;
 		if (c == ' ')
 			break;
-		type[i++] = c;
-		if (i >= sizeof(type))
-			return -1;
+		strbuf_addch(&typename, c);
 	}
-	type[i] = 0;
 
 	/*
 	 * The length must follow immediately, and be in canonical
@@ -1652,12 +1674,45 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep)
 			size = size * 10 + c;
 		}
 	}
-	*sizep = size;
+
+	type = type_from_string_gently(typename.buf, -1, 1);
+	if (oi->sizep)
+		*oi->sizep = size;
+	if (oi->typename)
+		strbuf_addstr(oi->typename, typename.buf);
+	if (oi->typep)
+		*oi->typep = type;
+	strbuf_release(&typename);
+
+	/*
+	 * Set type to 0 if its an unknown object and
+	 * we're obtaining the type using '--literally'
+	 * option.
+	 */
+	if ((flags & LOOKUP_LITERALLY) && (type == -1))
+		type = 0;
+	else if (type == -1)
+		die("invalid object type");
 
 	/*
 	 * The length must be followed by a zero byte
 	 */
-	return *hdr ? -1 : type_from_string(type);
+	return *hdr ? -1 : type;
+}
+
+/*
+ * We used to just use "sscanf()", but that's actually way
+ * too permissive for what we want to check. So do an anal
+ * object header parse by hand. Calls the extended function.
+ */
+int parse_sha1_header(const char *hdr, unsigned long *sizep)
+{
+	struct object_info oi;
+
+	oi.sizep = sizep;
+	oi.typename = NULL;
+	oi.typep = NULL;
+	return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
 }
 
 static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
@@ -2524,13 +2579,15 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
 }
 
 static int sha1_loose_object_info(const unsigned char *sha1,
-				  struct object_info *oi)
+				  struct object_info *oi,
+				  int flags)
 {
-	int status;
+	int status = 0;
 	unsigned long mapsize, size;
 	void *map;
 	git_zstream stream;
 	char hdr[32];
+	struct strbuf hdrbuf = STRBUF_INIT;
 
 	if (oi->delta_base_sha1)
 		hashclr(oi->delta_base_sha1);
@@ -2557,17 +2614,29 @@ static int sha1_loose_object_info(const unsigned char *sha1,
 		return -1;
 	if (oi->disk_sizep)
 		*oi->disk_sizep = mapsize;
-	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
-		status = error("unable to unpack %s header",
-			       sha1_to_hex(sha1));
-	else if ((status = parse_sha1_header(hdr, &size)) < 0)
-		status = error("unable to parse %s header", sha1_to_hex(sha1));
-	else if (oi->sizep)
+	if ((flags & LOOKUP_LITERALLY)) {
+		if (unpack_sha1_header_literally(&stream, map, mapsize, &hdrbuf) < 0)
+			status = error("unable to unpack %s header with --literally",
+				       sha1_to_hex(sha1));
+		else if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
+			status = error("unable to parse %s header", sha1_to_hex(sha1));
+	} else {
+		if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
+			status = error("unable to unpack %s header",
+				       sha1_to_hex(sha1));
+		else if ((status = parse_sha1_header(hdr, &size)) < 0)
+			status = error("unable to parse %s header", sha1_to_hex(sha1));
+	}
+	if (oi->sizep)
 		*oi->sizep = size;
 	git_inflate_end(&stream);
 	munmap(map, mapsize);
 	if (oi->typep)
 		*oi->typep = status;
+	if (oi->typename && !(oi->typename->len))
+		strbuf_addstr(oi->typename, typename(status));
+	if (hdrbuf.buf)
+		strbuf_release(&hdrbuf);
 	return 0;
 }
 
@@ -2588,13 +2657,15 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 			*(oi->disk_sizep) = 0;
 		if (oi->delta_base_sha1)
 			hashclr(oi->delta_base_sha1);
+		if (oi->typename)
+			strbuf_addstr(oi->typename, typename(co->type));
 		oi->whence = OI_CACHED;
 		return 0;
 	}
 
 	if (!find_pack_entry(real, &e)) {
 		/* Most likely it's a loose object. */
-		if (!sha1_loose_object_info(real, oi)) {
+		if (!sha1_loose_object_info(real, oi, flags)) {
 			oi->whence = OI_LOOSE;
 			return 0;
 		}
@@ -2618,6 +2689,8 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 		oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
 					 rtype == OBJ_OFS_DELTA);
 	}
+	if (oi->typename)
+		strbuf_addstr(oi->typename, typename(rtype));
 
 	return 0;
 }
-- 
2.3.1.307.gf3db8a5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v4 1/2] cat-file: teach cat-file a '--literally' option
  2015-03-17  5:16 ` [PATCH v4 1/2] cat-file: teach cat-file " Karthik Nayak
@ 2015-03-17  6:51   ` Eric Sunshine
  2015-03-17 13:59     ` Karthik Nayak
  0 siblings, 1 reply; 7+ messages in thread
From: Eric Sunshine @ 2015-03-17  6:51 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: Git List

On Tue, Mar 17, 2015 at 1:16 AM, Karthik Nayak <karthik.188@gmail.com> wrote:
> diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> index df99df4..1625246 100644
> --- a/builtin/cat-file.c
> +++ b/builtin/cat-file.c
> @@ -323,8 +338,8 @@ static int batch_objects(struct batch_options *opt)
>  }

I don't presently have time for a proper read-through, however, this
popped out while quickly running my eyes over the changes.

>  static const char * const cat_file_usage[] = {
> -       N_("git cat-file (-t | -s | -e | -p | <type> | --textconv) <object>"),
> -       N_("git cat-file (--batch | --batch-check) < <list-of-objects>"),
> +       N_("git cat-file (-t [--literally]|-s [--literally]|-e|-p|<type>|--textconv) <object>"),
> +       N_("git cat-file (--batch | --batch-check) <list-of-objects>"),

The '<' in the second line before <list-of-objects> is intentional. It
means that <list-of-objects> is provided via stdin. Therefore, it is
incorrect to remove it.

>         NULL
>  };
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4 1/2] cat-file: teach cat-file a '--literally' option
  2015-03-17  6:51   ` Eric Sunshine
@ 2015-03-17 13:59     ` Karthik Nayak
  0 siblings, 0 replies; 7+ messages in thread
From: Karthik Nayak @ 2015-03-17 13:59 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Git List



On March 17, 2015 12:21:33 PM GMT+05:30, Eric Sunshine <sunshine@sunshineco.com> wrote:
>On Tue, Mar 17, 2015 at 1:16 AM, Karthik Nayak <karthik.188@gmail.com>
>wrote:
>> diff --git a/builtin/cat-file.c b/builtin/cat-file.c
>> index df99df4..1625246 100644
>> --- a/builtin/cat-file.c
>> +++ b/builtin/cat-file.c
>> @@ -323,8 +338,8 @@ static int batch_objects(struct batch_options
>*opt)
>>  }
>
>I don't presently have time for a proper read-through, however, this
>popped out while quickly running my eyes over the changes.
>
>>  static const char * const cat_file_usage[] = {
>> -       N_("git cat-file (-t | -s | -e | -p | <type> | --textconv)
><object>"),
>> -       N_("git cat-file (--batch | --batch-check) <
><list-of-objects>"),
>> +       N_("git cat-file (-t [--literally]|-s
>[--literally]|-e|-p|<type>|--textconv) <object>"),
>> +       N_("git cat-file (--batch | --batch-check)
><list-of-objects>"),
>
>The '<' in the second line before <list-of-objects> is intentional. It
>means that <list-of-objects> is provided via stdin. Therefore, it is
>incorrect to remove it.
I see. It seemed out of place. Makes sense, probably should have asked. 

Thanks.
>
>>         NULL
>>  };
>>

-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4 2/2] sha1_file: refactor sha1_file.c to support 'cat-file --literally'
  2015-03-17  5:16 ` [PATCH v4 2/2] sha1_file: refactor sha1_file.c to support 'cat-file --literally' Karthik Nayak
@ 2015-03-17 20:10   ` Junio C Hamano
  2015-03-18 19:35     ` karthik nayak
  0 siblings, 1 reply; 7+ messages in thread
From: Junio C Hamano @ 2015-03-17 20:10 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: git

Karthik Nayak <karthik.188@gmail.com> writes:

> Subject: Re: [PATCH v4 2/2] sha1_file: refactor sha1_file.c to support 'cat-file --literally'

> Modify sha1_loose_object_info() to support 'cat-file --literally'
> by accepting flags and also make changes to copy the type to
> object_info::typename.

It would be more descriptive to mention the central point on the
title regarding what it takes to "support cat-file --literally".

For example:

  sha1_file.c: support reading from a loose object of a bogus type

  Update sha1_loose_object_info() to optionally allow it to read
  from a loose object file of unknown/bogus type; as the function
  usually returns the type of the object it read in the form of enum
  for known types, add an optional "typename" field to receive the
  name of the type in textual form.

By the way, I think your 1/2 would not even compile unless you have
this change; the patches in these two patch series must be swapped,
no?

> diff --git a/sha1_file.c b/sha1_file.c
> index 69a60ec..e31e9e2 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -1564,6 +1564,36 @@ int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long ma
>  	return git_inflate(stream, 0);
>  }
>  
> +static int unpack_sha1_header_literally(git_zstream *stream, unsigned char *map,
> +					unsigned long mapsize,
> +					struct strbuf *header)
> +{
> +	unsigned char buffer[32], *cp;
> +	unsigned long bufsiz = sizeof(buffer);
> +	int status;
> +
> +	/* Get the data stream */
> +	memset(stream, 0, sizeof(*stream));
> +	stream->next_in = map;
> +	stream->avail_in = mapsize;
> +	stream->next_out = buffer;
> +	stream->avail_out = bufsiz;
> +
> +	git_inflate_init(stream);
> +
> +	do {
> +		status = git_inflate(stream, 0);
> +		strbuf_add(header, buffer, stream->next_out - buffer);
> +		for (cp = buffer; cp < stream->next_out; cp++)
> +			if (!*cp)
> +				/* Found the NUL at the end of the header */
> +				return 0;
> +		stream->next_out = buffer;
> +		stream->avail_out = bufsiz;
> +	} while (status == Z_OK);
> +	return -1;
> +}
> +

There is nothing "literal" about this function.

The only difference between the original and this one is that this
uses a strbuf, instead of a buffer of a fixed size allocated by the
caller, to return the early part of the inflated data.

I wonder if it incurs too much overhead to the existing callers if
you reimplementated unpack_sha1_header() as a thin wrapper around
this function, something like

	int unpack_sha1_header(git_zstream *stream,
				unsigned char *map, unsigned long mapsize,
                                void *buffer, unsigned long bufsiz)
	{
                int status;
		struct strbuf header = STRBUF_INIT;

                status = unpack_sha1_header_to_strbuf(stream, map, mapsize, &header);
		if (bufsiz < header.len)
                	die("object header too long");
		memcpy(buffer, header.buf, header.len);
                return status;
	}
    			  
Note that I am *not* suggesting to do this blindly.  If there is no
downside from performance point of view, however, the above would be
a way to avoid code duplication.

Another way to avoid code duplication may be to implement
unpack_sha1_header_to_strbuf() in terms of unpack_sha1_header(),
perhaps like this:

	static int unpack_sha1_header_to_strbuf(...)
        {
		unsigned char buffer[32], *cp;
                unsigned long bufsiz = sizeof(buffer);
        	int status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);

		if (status)
                	return status;
		do {
			strbuf_add(header, buffer, stream->next_out - buffer);
			for (cp = buffer; cp < stream->next_out; cp++)
				if (!*cp)
					/* Found the NUL at the end of the header */
					return 0;
			stream->next_out = buffer;
			stream->avail_out = bufsiz;
		} while (status == Z_OK);
		return -1;
	}

which may be more in line with how we read data from loose objects.

> +int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
> +			       int flags)

Unless we are taking advantage of the fact that MSB is special in
signed integral types, I would prefer to see us use an unsigned type
to store these bits in a variable of an integral type.  That would
let the readers assume that they have fewer tricky things possibly
going on in the code (also see the footnote to $gmane/263751).

> @@ -1652,12 +1674,45 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep)
>  			size = size * 10 + c;
>  		}
>  	}
> -	*sizep = size;
> +
> +	type = type_from_string_gently(typename.buf, -1, 1);

Doesn't the code know how long the typename is at this point?

> +	if (oi->sizep)
> +		*oi->sizep = size;
> +	if (oi->typename)
> +		strbuf_addstr(oi->typename, typename.buf);

Likewise.  Perhaps strbuf_addbuf()?

> +	if (oi->typep)
> +		*oi->typep = type;

Do you want to store -1 to *oi->typep when the caller asked to do
the "literally" thing?  Shouldn't it match what is returned from
this function?

> +	return *hdr ? -1 : type;
> +}
> +
> +/*
> + * We used to just use "sscanf()", but that's actually way
> + * too permissive for what we want to check. So do an anal
> + * object header parse by hand. Calls the extended function.
> + */

The comment "let's do better than sscanf() by parsing ourselves"
applies to the implementation of _extended() function, not this one,
no?  It is clear to everybody that it "Calls the extended function",
so why mention it?

> +int parse_sha1_header(const char *hdr, unsigned long *sizep)
> +{
> +	struct object_info oi;
> +
> +	oi.sizep = sizep;
> +	oi.typename = NULL;
> +	oi.typep = NULL;
> +	return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
>  }

> @@ -2524,13 +2579,15 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
>  }
>  
>  static int sha1_loose_object_info(const unsigned char *sha1,
> -				  struct object_info *oi)
> +				  struct object_info *oi,
> +				  int flags)
>  {
> -	int status;
> +	int status = 0;
>  	unsigned long mapsize, size;
>  	void *map;
>  	git_zstream stream;
>  	char hdr[32];
> +	struct strbuf hdrbuf = STRBUF_INIT;
>  
>  	if (oi->delta_base_sha1)
>  		hashclr(oi->delta_base_sha1);
> @@ -2557,17 +2614,29 @@ static int sha1_loose_object_info(const unsigned char *sha1,
>  		return -1;
>  	if (oi->disk_sizep)
>  		*oi->disk_sizep = mapsize;
> -	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
> -		status = error("unable to unpack %s header",
> -			       sha1_to_hex(sha1));
> -	else if ((status = parse_sha1_header(hdr, &size)) < 0)
> -		status = error("unable to parse %s header", sha1_to_hex(sha1));
> -	else if (oi->sizep)
> +	if ((flags & LOOKUP_LITERALLY)) {
> +		if (unpack_sha1_header_literally(&stream, map, mapsize, &hdrbuf) < 0)
> +			status = error("unable to unpack %s header with --literally",
> +				       sha1_to_hex(sha1));
> +		else if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
> +			status = error("unable to parse %s header", sha1_to_hex(sha1));
> +	} else {
> +		if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
> +			status = error("unable to unpack %s header",
> +				       sha1_to_hex(sha1));
> +		else if ((status = parse_sha1_header(hdr, &size)) < 0)
> +			status = error("unable to parse %s header", sha1_to_hex(sha1));
> +	}
> +	if (oi->sizep)
>  		*oi->sizep = size;

Have you considered calling parse_sha1_header_extended() for both
literally and normal cases?  Then you wouldn't have to do any of the
"if (oi->blah) *oi->blah = value", no?

>  	git_inflate_end(&stream);
>  	munmap(map, mapsize);
>  	if (oi->typep)
>  		*oi->typep = status;

Likewise.

> +	if (oi->typename && !(oi->typename->len))
> +		strbuf_addstr(oi->typename, typename(status));

Likewise.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4 2/2] sha1_file: refactor sha1_file.c to support 'cat-file --literally'
  2015-03-17 20:10   ` Junio C Hamano
@ 2015-03-18 19:35     ` karthik nayak
  0 siblings, 0 replies; 7+ messages in thread
From: karthik nayak @ 2015-03-18 19:35 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git



On 03/18/2015 01:40 AM, Junio C Hamano wrote:
> Karthik Nayak <karthik.188@gmail.com> writes:
>
>> Subject: Re: [PATCH v4 2/2] sha1_file: refactor sha1_file.c to support 'cat-file --literally'
>
>> Modify sha1_loose_object_info() to support 'cat-file --literally'
>> by accepting flags and also make changes to copy the type to
>> object_info::typename.
>
> It would be more descriptive to mention the central point on the
> title regarding what it takes to "support cat-file --literally".
>
> For example:
>
>    sha1_file.c: support reading from a loose object of a bogus type
>
>    Update sha1_loose_object_info() to optionally allow it to read
>    from a loose object file of unknown/bogus type; as the function
>    usually returns the type of the object it read in the form of enum
>    for known types, add an optional "typename" field to receive the
>    name of the type in textual form.
>
> By the way, I think your 1/2 would not even compile unless you have
> this change; the patches in these two patch series must be swapped,
> no?
>
Noted. Yes it wouldn't. I thought both would be applied together and 
didn't give it much thought, but yeah, I should pay more attention to that.
>> diff --git a/sha1_file.c b/sha1_file.c
>> index 69a60ec..e31e9e2 100644
>> --- a/sha1_file.c
>> +++ b/sha1_file.c
>> @@ -1564,6 +1564,36 @@ int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long ma
>>   	return git_inflate(stream, 0);
>>   }
>>
>> +static int unpack_sha1_header_literally(git_zstream *stream, unsigned char *map,
>> +					unsigned long mapsize,
>> +					struct strbuf *header)
>> +{
>> +	unsigned char buffer[32], *cp;
>> +	unsigned long bufsiz = sizeof(buffer);
>> +	int status;
>> +
>> +	/* Get the data stream */
>> +	memset(stream, 0, sizeof(*stream));
>> +	stream->next_in = map;
>> +	stream->avail_in = mapsize;
>> +	stream->next_out = buffer;
>> +	stream->avail_out = bufsiz;
>> +
>> +	git_inflate_init(stream);
>> +
>> +	do {
>> +		status = git_inflate(stream, 0);
>> +		strbuf_add(header, buffer, stream->next_out - buffer);
>> +		for (cp = buffer; cp < stream->next_out; cp++)
>> +			if (!*cp)
>> +				/* Found the NUL at the end of the header */
>> +				return 0;
>> +		stream->next_out = buffer;
>> +		stream->avail_out = bufsiz;
>> +	} while (status == Z_OK);
>> +	return -1;
>> +}
>> +
>
> There is nothing "literal" about this function.
>
> The only difference between the original and this one is that this
> uses a strbuf, instead of a buffer of a fixed size allocated by the
> caller, to return the early part of the inflated data.
>
> I wonder if it incurs too much overhead to the existing callers if
> you reimplementated unpack_sha1_header() as a thin wrapper around
> this function, something like
>
> 	int unpack_sha1_header(git_zstream *stream,
> 				unsigned char *map, unsigned long mapsize,
>                                  void *buffer, unsigned long bufsiz)
> 	{
>                  int status;
> 		struct strbuf header = STRBUF_INIT;
>
>                  status = unpack_sha1_header_to_strbuf(stream, map, mapsize, &header);
> 		if (bufsiz < header.len)
>                  	die("object header too long");
> 		memcpy(buffer, header.buf, header.len);
>                  return status;
> 	}
>      			
> Note that I am *not* suggesting to do this blindly.  If there is no
> downside from performance point of view, however, the above would be
> a way to avoid code duplication.
>
> Another way to avoid code duplication may be to implement
> unpack_sha1_header_to_strbuf() in terms of unpack_sha1_header(),
> perhaps like this:
>
> 	static int unpack_sha1_header_to_strbuf(...)
>          {
> 		unsigned char buffer[32], *cp;
>                  unsigned long bufsiz = sizeof(buffer);
>          	int status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);
>
> 		if (status)
>                  	return status;
> 		do {
> 			strbuf_add(header, buffer, stream->next_out - buffer);
> 			for (cp = buffer; cp < stream->next_out; cp++)
> 				if (!*cp)
> 					/* Found the NUL at the end of the header */
> 					return 0;
> 			stream->next_out = buffer;
> 			stream->avail_out = bufsiz;
> 		} while (status == Z_OK);
> 		return -1;
> 	}
>
> which may be more in line with how we read data from loose objects.

Agreed there is code duplication with unpack_sha1_header() and 
unpack_sha1_header_extended(). But I thought there would be a 
performance trade off, Any way I could test this?
Also the second suggestion you have given seems to be more practical, As 
there is no performance overhead, if called by existing code.
>
>> +int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
>> +			       int flags)
>
> Unless we are taking advantage of the fact that MSB is special in
> signed integral types, I would prefer to see us use an unsigned type
> to store these bits in a variable of an integral type.  That would
> let the readers assume that they have fewer tricky things possibly
> going on in the code (also see the footnote to $gmane/263751).

Thanks for the link. Will change to unsigned.
>
>> @@ -1652,12 +1674,45 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep)
>>   			size = size * 10 + c;
>>   		}
>>   	}
>> -	*sizep = size;
>> +
>> +	type = type_from_string_gently(typename.buf, -1, 1);
>
> Doesn't the code know how long the typename is at this point?
Yes, will change.
>
>> +	if (oi->sizep)
>> +		*oi->sizep = size;
>> +	if (oi->typename)
>> +		strbuf_addstr(oi->typename, typename.buf);
>
> Likewise.  Perhaps strbuf_addbuf()?
That also calls strbuf_add(). But does improve readability
>
>> +	if (oi->typep)
>> +		*oi->typep = type;
>
> Do you want to store -1 to *oi->typep when the caller asked to do
> the "literally" thing?  Shouldn't it match what is returned from
> this function?
Yes it should. Will make changes.
>
>> +	return *hdr ? -1 : type;
>> +}
>> +
>> +/*
>> + * We used to just use "sscanf()", but that's actually way
>> + * too permissive for what we want to check. So do an anal
>> + * object header parse by hand. Calls the extended function.
>> + */
>
> The comment "let's do better than sscanf() by parsing ourselves"
> applies to the implementation of _extended() function, not this one,
> no?  It is clear to everybody that it "Calls the extended function",
> so why mention it?
>
>> +int parse_sha1_header(const char *hdr, unsigned long *sizep)
>> +{
>> +	struct object_info oi;
>> +
>> +	oi.sizep = sizep;
>> +	oi.typename = NULL;
>> +	oi.typep = NULL;
>> +	return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
>>   }
>
>> @@ -2524,13 +2579,15 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
>>   }
>>
>>   static int sha1_loose_object_info(const unsigned char *sha1,
>> -				  struct object_info *oi)
>> +				  struct object_info *oi,
>> +				  int flags)
>>   {
>> -	int status;
>> +	int status = 0;
>>   	unsigned long mapsize, size;
>>   	void *map;
>>   	git_zstream stream;
>>   	char hdr[32];
>> +	struct strbuf hdrbuf = STRBUF_INIT;
>>
>>   	if (oi->delta_base_sha1)
>>   		hashclr(oi->delta_base_sha1);
>> @@ -2557,17 +2614,29 @@ static int sha1_loose_object_info(const unsigned char *sha1,
>>   		return -1;
>>   	if (oi->disk_sizep)
>>   		*oi->disk_sizep = mapsize;
>> -	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
>> -		status = error("unable to unpack %s header",
>> -			       sha1_to_hex(sha1));
>> -	else if ((status = parse_sha1_header(hdr, &size)) < 0)
>> -		status = error("unable to parse %s header", sha1_to_hex(sha1));
>> -	else if (oi->sizep)
>> +	if ((flags & LOOKUP_LITERALLY)) {
>> +		if (unpack_sha1_header_literally(&stream, map, mapsize, &hdrbuf) < 0)
>> +			status = error("unable to unpack %s header with --literally",
>> +				       sha1_to_hex(sha1));
>> +		else if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
>> +			status = error("unable to parse %s header", sha1_to_hex(sha1));
>> +	} else {
>> +		if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
>> +			status = error("unable to unpack %s header",
>> +				       sha1_to_hex(sha1));
>> +		else if ((status = parse_sha1_header(hdr, &size)) < 0)
>> +			status = error("unable to parse %s header", sha1_to_hex(sha1));
>> +	}
>> +	if (oi->sizep)
>>   		*oi->sizep = size;
>
> Have you considered calling parse_sha1_header_extended() for both
> literally and normal cases?  Then you wouldn't have to do any of the
> "if (oi->blah) *oi->blah = value", no?
>
>>   	git_inflate_end(&stream);
>>   	munmap(map, mapsize);
>>   	if (oi->typep)
>>   		*oi->typep = status;
>
> Likewise.
>
>> +	if (oi->typename && !(oi->typename->len))
>> +		strbuf_addstr(oi->typename, typename(status));
>
> Likewise.
>

No. I have not considered that. will look into this.
Thanks for the suggestions.

Regards
-Karthik

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-03-18 19:35 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-17  5:13 [PATCH v4 0/2] cat-file: add a '--literally' option karthik nayak
2015-03-17  5:16 ` [PATCH v4 1/2] cat-file: teach cat-file " Karthik Nayak
2015-03-17  6:51   ` Eric Sunshine
2015-03-17 13:59     ` Karthik Nayak
2015-03-17  5:16 ` [PATCH v4 2/2] sha1_file: refactor sha1_file.c to support 'cat-file --literally' Karthik Nayak
2015-03-17 20:10   ` Junio C Hamano
2015-03-18 19:35     ` karthik nayak

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.