All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] cat-file --literally
@ 2015-02-25 11:06 karthik nayak
  2015-02-25 11:07 ` [PATCH 1/2] sha1_file: Add sha1_object_type_literally and export it Karthik Nayak
  2015-02-25 11:08 ` [PATCH 2/2] cat-file: add --literally option Karthik Nayak
  0 siblings, 2 replies; 11+ messages in thread
From: karthik nayak @ 2015-02-25 11:06 UTC (permalink / raw)
  To: Git List

These changes introduce a "--literally" option to "cat-file" to
allow us to read the type of any loose object, mostly the ones
created using the "--literally" option of "hash-object".

For now it works with the "-t" option as :

     git cat-object -t --literally <object_sha>

Can be made to work with "-e" and "-s" option in the future.

The first patch adds "sha1_object_type_literally()" which is
called by "cat-object" to print the type.
The second patch adds the "--literally" option to "cat-file".

These patches are in response for the patches made here :
http://thread.gmane.org/gmane.comp.version-control.git/243560/focus=243628

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/2] sha1_file: Add sha1_object_type_literally and export it.
  2015-02-25 11:06 [PATCH 0/2] cat-file --literally karthik nayak
@ 2015-02-25 11:07 ` Karthik Nayak
  2015-02-25 18:22   ` David Turner
                     ` (2 more replies)
  2015-02-25 11:08 ` [PATCH 2/2] cat-file: add --literally option Karthik Nayak
  1 sibling, 3 replies; 11+ messages in thread
From: Karthik Nayak @ 2015-02-25 11:07 UTC (permalink / raw)
  To: git; +Cc: Karthik Nayak

sha1_object_type_literally takes a sha value and
gives the type of the given loose object, used by
git cat-file -t --literally.

Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 cache.h     |  1 +
 sha1_file.c | 27 +++++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/cache.h b/cache.h
index 4d02efc..48086b9 100644
--- a/cache.h
+++ b/cache.h
@@ -872,6 +872,7 @@ extern int git_open_noatime(const char *name);
 extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size);
 extern int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz);
 extern int parse_sha1_header(const char *hdr, unsigned long *sizep);
+extern int sha1_object_type_literally(const unsigned char *sha1, char *type);
 
 /* global flag to enable extra checks when accessing packed objects */
 extern int do_check_packed_object_crc;
diff --git a/sha1_file.c b/sha1_file.c
index 69a60ec..34b4810 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2635,6 +2635,33 @@ int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
 	return type;
 }
 
+int sha1_object_type_literally(const unsigned char *sha1, char *type)
+{
+	int status = 0;
+	unsigned long mapsize;
+	void *map;
+	git_zstream stream;
+	char hdr[32];
+	int i;
+
+	map = map_sha1_file(sha1, &mapsize);
+	if (!map)
+		return -1;
+	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
+		status = error("unable to unpack %s header",
+			       sha1_to_hex(sha1));
+
+	for (i = 0; i < 32; i++) {
+		if (hdr[i] == ' ') {
+			type[i] = '\0';
+			break;
+		}
+		type[i] = hdr[i];
+	}
+
+	return status;
+}
+
 static void *read_packed_sha1(const unsigned char *sha1,
 			      enum object_type *type, unsigned long *size)
 {
-- 
2.3.1.129.g11acff1.dirty

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/2] cat-file: add --literally option.
  2015-02-25 11:06 [PATCH 0/2] cat-file --literally karthik nayak
  2015-02-25 11:07 ` [PATCH 1/2] sha1_file: Add sha1_object_type_literally and export it Karthik Nayak
@ 2015-02-25 11:08 ` Karthik Nayak
  2015-02-25 22:14   ` Eric Sunshine
  1 sibling, 1 reply; 11+ messages in thread
From: Karthik Nayak @ 2015-02-25 11:08 UTC (permalink / raw)
  To: git; +Cc: Karthik Nayak

Add --literally option which when used with -t option
gives the type of the object given, irrelevant of the type
and its contents.

Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 builtin/cat-file.c | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index df99df4..1db94fe 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -107,6 +107,29 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 	return 0;
 }
 
+static int cat_one_file_literally(int opt, const char *exp_type, const char *obj_name)
+{
+	unsigned char sha1[20];
+	unsigned char type[32];
+	struct object_context obj_context;
+	struct object_info oi = {NULL};
+	int retval = 0;
+
+	if (get_sha1_with_context(obj_name, 0, sha1, &obj_context))
+		die("Not a valid object name %s", obj_name);
+
+	retval = has_sha1_file(sha1);
+	if (!retval)
+		return retval;
+
+	if(sha1_object_type_literally(sha1, type))
+		die("git cat-file -t --literally %s: invalid object", obj_name);
+
+	printf("%s\n", type);
+
+	return retval;
+}
+
 struct expand_data {
 	unsigned char sha1[20];
 	enum object_type type;
@@ -324,7 +347,7 @@ static int batch_objects(struct batch_options *opt)
 
 static const char * const cat_file_usage[] = {
 	N_("git cat-file (-t | -s | -e | -p | <type> | --textconv) <object>"),
-	N_("git cat-file (--batch | --batch-check) < <list-of-objects>"),
+	N_("git cat-file (-t|-s|-e|-p|<type>|--textconv|-t --literally) <object>"),
 	NULL
 };
 
@@ -359,6 +382,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 	int opt = 0;
 	const char *exp_type = NULL, *obj_name = NULL;
 	struct batch_options batch = {0};
+	int literally = 0;
 
 	const struct option options[] = {
 		OPT_GROUP(N_("<type> can be one of: blob, tree, commit, tag")),
@@ -369,6 +393,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
 		OPT_SET_INT(0, "textconv", &opt,
 			    N_("for blob objects, run textconv on object's content"), 'c'),
+		OPT_BOOL( 0, "literally", &literally,
+			  N_("show the type of the gicen loose object, use for debugging")),
 		{ OPTION_CALLBACK, 0, "batch", &batch, "format",
 			N_("show info and content of objects fed from the standard input"),
 			PARSE_OPT_OPTARG, batch_option_callback },
@@ -380,7 +406,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 
 	git_config(git_cat_file_config, NULL);
 
-	if (argc != 3 && argc != 2)
+	if (argc != 3 && argc != 2 && (!literally && argc != 4))
 		usage_with_options(cat_file_usage, options);
 
 	argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
@@ -405,5 +431,10 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 	if (batch.enabled)
 		return batch_objects(&batch);
 
+	if (literally && opt == 't')
+		return cat_one_file_literally(opt, exp_type, obj_name);
+	else if (literally)
+		usage_with_options(cat_file_usage, options);
+
 	return cat_one_file(opt, exp_type, obj_name);
 }
-- 
2.3.1.129.g11acff1.dirty

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] sha1_file: Add sha1_object_type_literally and export it.
  2015-02-25 11:07 ` [PATCH 1/2] sha1_file: Add sha1_object_type_literally and export it Karthik Nayak
@ 2015-02-25 18:22   ` David Turner
  2015-02-25 19:59     ` karthik nayak
  2015-02-25 21:32   ` Junio C Hamano
  2015-02-25 21:55   ` Eric Sunshine
  2 siblings, 1 reply; 11+ messages in thread
From: David Turner @ 2015-02-25 18:22 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: git

On Wed, 2015-02-25 at 16:37 +0530, Karthik Nayak wrote:
> +	unsigned long mapsize;
> ...
> +	map = map_sha1_file(sha1, &mapsize);

I know this is a pre-existing issue, but I'm not sure "unsigned long" is
the right type here.  Shouldn't it be a size_t?  

> +	if (!map)
> +		return -1;
> +	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
> +		status = error("unable to unpack %s header",
> +			       sha1_to_hex(sha1));
> +
> +	for (i = 0; i < 32; i++) {

This number should probably be a constant.

> +		if (hdr[i] == ' ') {
> +			type[i] = '\0';
> +			break;
> +		}
> +		type[i] = hdr[i];
> +	}

type might end up without a trailing \0 here in the case where hdr has
no space in it.  Is this possible?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] sha1_file: Add sha1_object_type_literally and export it.
  2015-02-25 18:22   ` David Turner
@ 2015-02-25 19:59     ` karthik nayak
  2015-02-25 20:15       ` David Turner
  0 siblings, 1 reply; 11+ messages in thread
From: karthik nayak @ 2015-02-25 19:59 UTC (permalink / raw)
  To: David Turner; +Cc: git



On 02/25/2015 11:52 PM, David Turner wrote:
> On Wed, 2015-02-25 at 16:37 +0530, Karthik Nayak wrote:
>> +	unsigned long mapsize;
>> ...
>> +	map = map_sha1_file(sha1, &mapsize);
>
> I know this is a pre-existing issue, but I'm not sure "unsigned long" is
> the right type here.  Shouldn't it be a size_t?
I got the type from the function definition of map_sha1_file(), which is 
"void *map_sha1_file(const unsigned char *sha1, unsigned long *size)"
>
>> +	if (!map)
>> +		return -1;
>> +	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
>> +		status = error("unable to unpack %s header",
>> +			       sha1_to_hex(sha1));
>> +
>> +	for (i = 0; i < 32; i++) {
>
> This number should probably be a constant.
Do you want me to define it as a preprocessor directive?
>
>> +		if (hdr[i] == ' ') {
>> +			type[i] = '\0';
>> +			break;
>> +		}
>> +		type[i] = hdr[i];
>> +	}
>
> type might end up without a trailing \0 here in the case where hdr has
> no space in it.  Is this possible?
What's possible is when the object type name is greater than 32bytes 
"hdr" will not be able to hold the whole type, its a tradeoff, I guess I 
should put a null terminator at the end of "hdr". What do you suggest?
>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] sha1_file: Add sha1_object_type_literally and export it.
  2015-02-25 19:59     ` karthik nayak
@ 2015-02-25 20:15       ` David Turner
  0 siblings, 0 replies; 11+ messages in thread
From: David Turner @ 2015-02-25 20:15 UTC (permalink / raw)
  To: karthik nayak; +Cc: git

On Thu, 2015-02-26 at 01:29 +0530, karthik nayak wrote:
> 
> On 02/25/2015 11:52 PM, David Turner wrote:
> > On Wed, 2015-02-25 at 16:37 +0530, Karthik Nayak wrote:
> >> +	unsigned long mapsize;
> >> ...
> >> +	map = map_sha1_file(sha1, &mapsize);
> >
> > I know this is a pre-existing issue, but I'm not sure "unsigned long" is
> > the right type here.  Shouldn't it be a size_t?
> I got the type from the function definition of map_sha1_file(), which is 
> "void *map_sha1_file(const unsigned char *sha1, unsigned long *size)"

Yep. That's why I describe it as a pre-existing issue - it's not your
fault that the type is funny.  But it might be worth cleaning up while
you're in here.  Or it might not -- maybe I'm totally off base and the
type is correct.

> >
> >> +	if (!map)
> >> +		return -1;
> >> +	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
> >> +		status = error("unable to unpack %s header",
> >> +			       sha1_to_hex(sha1));
> >> +
> >> +	for (i = 0; i < 32; i++) {
> >
> > This number should probably be a constant.
> Do you want me to define it as a preprocessor directive?

Sounds good to me.

> >> +		if (hdr[i] == ' ') {
> >> +			type[i] = '\0';
> >> +			break;
> >> +		}
> >> +		type[i] = hdr[i];
> >> +	}
> >
> > type might end up without a trailing \0 here in the case where hdr has
> > no space in it.  Is this possible?
> What's possible is when the object type name is greater than 32bytes 
> "hdr" will not be able to hold the whole type, its a tradeoff, I guess I 
> should put a null terminator at the end of "hdr". What do you suggest?

Is it an error for the object type name to be > 32 bytes?  If there is
no max length, then maybe you should use the strbuf API for this.
Silently truncating is probably the wrong thing to do, and leaving it
unterminated is dangerous unless callers know how to handle that case.
If it is an error, then you should return an error code.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] sha1_file: Add sha1_object_type_literally and export it.
  2015-02-25 11:07 ` [PATCH 1/2] sha1_file: Add sha1_object_type_literally and export it Karthik Nayak
  2015-02-25 18:22   ` David Turner
@ 2015-02-25 21:32   ` Junio C Hamano
  2015-02-25 22:44     ` Junio C Hamano
  2015-02-25 21:55   ` Eric Sunshine
  2 siblings, 1 reply; 11+ messages in thread
From: Junio C Hamano @ 2015-02-25 21:32 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: git

Karthik Nayak <karthik.188@gmail.com> writes:

> +int sha1_object_type_literally(const unsigned char *sha1, char *type)
> +{
> +	int status = 0;
> +	unsigned long mapsize;
> +	void *map;
> +	git_zstream stream;
> +	char hdr[32];
> +	int i;
> +
> +	map = map_sha1_file(sha1, &mapsize);
> +	if (!map)
> +		return -1;

I am not sure if it is a good idea to introduce a function with the
above signature and the semantics in the first place.  It is OK to
assume that objects with funny typenames only appear as loose
objects, but it is not OK for "cat-file --literally" to fail to work
on objects that are of kosher types.

What should "git cat-file --literally -t HEAD~2" do?  I think it
should say "commit" as long as my current history is at least 3
commits deep, even when my repository is fully packed.  But I
suspect the above code does not do that.  

Looking at how we collect information on normal objects, it may make
more sense to model this after sha1_loose_object_info(), with a
tweak to struct object_info datatype, and integrate it into
sha1_object_info_extended() may make more sense, perhaps along the
lines of the attached patch.

The new helper would mimick what sha1_loose_object_info() is doing,
in that it may be used to learn on-disk size, object size, typename
string (returned in oi->typename strbuf that is optional).  There is
no sensible value to stuff in oi->typep if the incoming object name
refers to the experimental invalid object, so perhaps you will store
OBJ_NONE or something there and the "cat-file --literally" would use
the oi->typename to learn the name of the "type".


 cache.h     | 1 +
 sha1_file.c | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/cache.h b/cache.h
index 4d02efc..72b7cfa 100644
--- a/cache.h
+++ b/cache.h
@@ -1296,6 +1296,7 @@ struct object_info {
 	unsigned long *sizep;
 	unsigned long *disk_sizep;
 	unsigned char *delta_base_sha1;
+	struct strbuf *typename;
 
 	/* Response */
 	enum {
diff --git a/sha1_file.c b/sha1_file.c
index 69a60ec..2c0e83a 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2568,6 +2568,8 @@ static int sha1_loose_object_info(const unsigned char *sha1,
 	munmap(map, mapsize);
 	if (oi->typep)
 		*oi->typep = status;
+	if (oi->typename && 0 < status && typename(status))
+		strbuf_add(oi->typename, typename(status));
 	return 0;
 }
 
@@ -2593,6 +2595,13 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 	}
 
 	if (!find_pack_entry(real, &e)) {
+		/* Have we been asked to check possibly invalid ones? */
+		if ((flags & LOOKUP_LITERALLY) &&
+		    !sha1_object_info_literally(real, oi)) {
+			oi->whence = OI_LOOSE;
+			return 0;
+		}
+
 		/* Most likely it's a loose object. */
 		if (!sha1_loose_object_info(real, oi)) {
 			oi->whence = OI_LOOSE;

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] sha1_file: Add sha1_object_type_literally and export it.
  2015-02-25 11:07 ` [PATCH 1/2] sha1_file: Add sha1_object_type_literally and export it Karthik Nayak
  2015-02-25 18:22   ` David Turner
  2015-02-25 21:32   ` Junio C Hamano
@ 2015-02-25 21:55   ` Eric Sunshine
  2015-02-26 15:07     ` Karthik Nayak
  2 siblings, 1 reply; 11+ messages in thread
From: Eric Sunshine @ 2015-02-25 21:55 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: Git List

I had written a longer review but was interrupted for a several hours,
and upon returning found that David and Junio covered many of the same
issues or overrode comments I was making, so the below review is pared
down quite a bit. Junio's proposed approach negates all of the below
review comments, but they may still be meaningful if kept in mind for
future submissions.

On Wed, Feb 25, 2015 at 6:07 AM, Karthik Nayak <karthik.188@gmail.com> wrote:
> sha1_file: Add sha1_object_type_literally and export it.

Style: downcase "Add"; drop terminating period.

> sha1_object_type_literally takes a sha value and
> gives the type of the given loose object, used by
> git cat-file -t --literally.
>
> Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> ---
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -2635,6 +2635,33 @@ int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
>         return type;
>  }
>
> +int sha1_object_type_literally(const unsigned char *sha1, char *type)

This functionality is very specific to the --literally option you're
adding to cat-file, so it would make more sense to make it private to
builtin/cat-file.c rather than publishing it globally.

Also, this is an unsafe contract. The caller does not know how many
bytes to allocate for 'type', and this new function may write past the
end of the buffer. It is more common to also pass in the size of the
'type' buffer and ensure that you do not write beyond that. Or, if
this is intended for wider consumption, pass in a strbuf instead.

> +{
> +       int status = 0;
> +       unsigned long mapsize;
> +       void *map;
> +       git_zstream stream;
> +       char hdr[32];
> +       int i;
> +
> +       map = map_sha1_file(sha1, &mapsize);
> +       if (!map)
> +               return -1;
> +       if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
> +               status = error("unable to unpack %s header",
> +                              sha1_to_hex(sha1));

Since 'hdr' unpacking failed, shouldn't you be returning at this point
rather than continuing to the 'hdr' processing loop?

> +       for (i = 0; i < 32; i++) {
> +               if (hdr[i] == ' ') {
> +                       type[i] = '\0';
> +                       break;
> +               }
> +               type[i] = hdr[i];
> +       }

David already mentioned that this loop is suspect. Perhaps take a look
at, sha1_file.c:parse_sha1_header() for an example of cleaner logic.

> +
> +       return status;
> +}
> +
>  static void *read_packed_sha1(const unsigned char *sha1,
>                               enum object_type *type, unsigned long *size)
>  {
> --
> 2.3.1.129.g11acff1.dirty

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] cat-file: add --literally option.
  2015-02-25 11:08 ` [PATCH 2/2] cat-file: add --literally option Karthik Nayak
@ 2015-02-25 22:14   ` Eric Sunshine
  0 siblings, 0 replies; 11+ messages in thread
From: Eric Sunshine @ 2015-02-25 22:14 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: Git List

On Wed, Feb 25, 2015 at 6:08 AM, Karthik Nayak <karthik.188@gmail.com> wrote:
> cat-file: add --literally option.

Style: drop terminating period.

> Add --literally option which when used with -t option
> gives the type of the object given, irrelevant of the type
> and its contents.

Confusing: "gives the type [...] irrelevant of the type"?

> Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> ---
> diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> index df99df4..1db94fe 100644
> --- a/builtin/cat-file.c
> +++ b/builtin/cat-file.c
> @@ -107,6 +107,29 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
>         return 0;
>  }
>
> +static int cat_one_file_literally(int opt, const char *exp_type, const char *obj_name)

Unused arguments: 'opt' and 'exp_type'

This function duplicates enough functionality of cat_one_file() that
it almost seems that it could be folded into the 't' case in that
function, but for the sake of review, let's assume that you'll keep it
separate.

> +{
> +       unsigned char sha1[20];
> +       unsigned char type[32];
> +       struct object_context obj_context;
> +       struct object_info oi = {NULL};

Unused variable: 'oi'

> +       int retval = 0;
> +
> +       if (get_sha1_with_context(obj_name, 0, sha1, &obj_context))

You never take advantage of the extra context in 'obj_context', so
perhaps get_sha1() would be preferable?

> +               die("Not a valid object name %s", obj_name);
> +
> +       retval = has_sha1_file(sha1);
> +       if (!retval)
> +               return retval;

Shouldn't this case be emitting some sort of diagnostic rather than
exiting silently?

> +       if(sha1_object_type_literally(sha1, type))

Style: space after 'if'

> +               die("git cat-file -t --literally %s: invalid object", obj_name);

Inconsistent error message style with the die() just above this one.
(cat_one_file() has its own inconsistencies, but new code need not
follow suit.)

> +
> +       printf("%s\n", type);
> +
> +       return retval;
> +}
> +
>  struct expand_data {
>         unsigned char sha1[20];
>         enum object_type type;
> @@ -324,7 +347,7 @@ static int batch_objects(struct batch_options *opt)
>
>  static const char * const cat_file_usage[] = {
>         N_("git cat-file (-t | -s | -e | -p | <type> | --textconv) <object>"),
> -       N_("git cat-file (--batch | --batch-check) < <list-of-objects>"),
> +       N_("git cat-file (-t|-s|-e|-p|<type>|--textconv|-t --literally) <object>"),

This is strange. According to this diff, you've removed the --batch
and --batch-check options; and you're showing the -t, -s, -e, etc.
options twice.

>         NULL
>  };
>
> @@ -359,6 +382,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
>         int opt = 0;
>         const char *exp_type = NULL, *obj_name = NULL;
>         struct batch_options batch = {0};
> +       int literally = 0;

Mental note: 'literally' is initialized to false (0).

>         const struct option options[] = {
>                 OPT_GROUP(N_("<type> can be one of: blob, tree, commit, tag")),
> @@ -369,6 +393,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
>                 OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
>                 OPT_SET_INT(0, "textconv", &opt,
>                             N_("for blob objects, run textconv on object's content"), 'c'),
> +               OPT_BOOL( 0, "literally", &literally,
> +                         N_("show the type of the gicen loose object, use for debugging")),

s/gicen/given/

>                 { OPTION_CALLBACK, 0, "batch", &batch, "format",
>                         N_("show info and content of objects fed from the standard input"),
>                         PARSE_OPT_OPTARG, batch_option_callback },
> @@ -380,7 +406,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
>
>         git_config(git_cat_file_config, NULL);
>
> -       if (argc != 3 && argc != 2)
> +       if (argc != 3 && argc != 2 && (!literally && argc != 4))

This can't be correct. At this point, 'literally' is unconditionally
false (0). It's value won't (potentially) change until the options are
processed, which doesn't happen until parse_options() is invoked
_after_ this conditional.

>                 usage_with_options(cat_file_usage, options);
>
>         argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
> @@ -405,5 +431,10 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
>         if (batch.enabled)
>                 return batch_objects(&batch);
>
> +       if (literally && opt == 't')
> +               return cat_one_file_literally(opt, exp_type, obj_name);
> +       else if (literally)
> +               usage_with_options(cat_file_usage, options);
> +
>         return cat_one_file(opt, exp_type, obj_name);
>  }
> --
> 2.3.1.129.g11acff1.dirty

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] sha1_file: Add sha1_object_type_literally and export it.
  2015-02-25 21:32   ` Junio C Hamano
@ 2015-02-25 22:44     ` Junio C Hamano
  0 siblings, 0 replies; 11+ messages in thread
From: Junio C Hamano @ 2015-02-25 22:44 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: git

Junio C Hamano <gitster@pobox.com> writes:

> Looking at how we collect information on normal objects, it may make
> more sense to model this after sha1_loose_object_info(), with a
> tweak to struct object_info datatype, and integrate it into
> sha1_object_info_extended() may make more sense, perhaps along the
> lines of the attached patch.
>
> The new helper would mimick what sha1_loose_object_info() is doing,
> in that it may be used to learn on-disk size, object size, typename
> string (returned in oi->typename strbuf that is optional).  There is
> no sensible value to stuff in oi->typep if the incoming object name
> refers to the experimental invalid object, so perhaps you will store
> OBJ_NONE or something there and the "cat-file --literally" would use
> the oi->typename to learn the name of the "type".

You may be able to even reuse most of the sha1_loose_object_info()
by doing something like this illustration (read: incomplete) patch:

 * add an optional typename pointer to object_info request structure
   for the caller to ask sha1_object_info() to fill.

 * unpack_sha1_header() takes advantage of the fact that the object
   header of a usual object of known type would fit within 32 bytes,
   and that otherwise the object is invalid anyway.  A literal
   reader cannot afford to rely on these assumptions, so introduce a
   reader that can read into a strbuf, and use it instead from
   sha1_loose_object_info() when the caller wants to deal with
   invalid object with a possibly overlong header.

 * teach sha1_object_info_extended() pass the "flags" parameter from
   the caller down the callchain to sha1_loose_object_info().


 cache.h     |  3 ++-
 sha1_file.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/cache.h b/cache.h
index 4d02efc..34ede34 100644
--- a/cache.h
+++ b/cache.h
@@ -828,8 +828,8 @@ char *strip_path_suffix(const char *path, const char *suffix);
 int daemon_avoid_alias(const char *path);
 extern int is_ntfs_dotgit(const char *name);
 
-/* object replacement */
 #define LOOKUP_REPLACE_OBJECT 1
+#define LOOKUP_LITERALLY      2
 extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag);
 static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size)
 {
@@ -1296,6 +1296,7 @@ struct object_info {
 	unsigned long *sizep;
 	unsigned long *disk_sizep;
 	unsigned char *delta_base_sha1;
+	struct strbuf *typename;
 
 	/* Response */
 	enum {
diff --git a/sha1_file.c b/sha1_file.c
index 69a60ec..0f6783e 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1564,6 +1564,36 @@ int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long ma
 	return git_inflate(stream, 0);
 }
 
+static int unpack_sha1_header_literally(git_zstream *stream, unsigned char *map,
+					unsigned long mapsize,
+					struct strbuf *header)
+{
+	unsigned char buffer[32], *cp;
+	unsigned long bufsiz = sizeof(buffer);
+	int status;
+
+	/* Get the data stream */
+	memset(stream, 0, sizeof(*stream));
+	stream->next_in = map;
+	stream->avail_in = mapsize;
+	stream->next_out = buffer;
+	stream->avail_out = bufsiz;
+
+	git_inflate_init(stream);
+
+	do {
+		status = git_inflate(stream, 0);
+		strbuf_add(header, buffer, stream->next_out - buffer);
+		for (cp = buffer; cp < stream->next_out; cp++)
+			if (!*cp)
+				/* Found the NUL at the end of the header */
+				return 0;
+		stream->next_out = buffer;
+		stream->avail_out = bufsiz;
+	} while (status == Z_OK);
+	return -1;
+}
+
 static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
 {
 	int bytes = strlen(buffer) + 1;
@@ -2524,13 +2554,16 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
 }
 
 static int sha1_loose_object_info(const unsigned char *sha1,
-				  struct object_info *oi)
+				  struct object_info *oi,
+				  unsigned flags)
 {
 	int status;
 	unsigned long mapsize, size;
 	void *map;
 	git_zstream stream;
 	char hdr[32];
+	struct strbuf hdrbuf = STRBUF_INIT;
+	char *hdrp;
 
 	if (oi->delta_base_sha1)
 		hashclr(oi->delta_base_sha1);
@@ -2557,10 +2590,21 @@ static int sha1_loose_object_info(const unsigned char *sha1,
 		return -1;
 	if (oi->disk_sizep)
 		*oi->disk_sizep = mapsize;
-	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
-		status = error("unable to unpack %s header",
-			       sha1_to_hex(sha1));
-	else if ((status = parse_sha1_header(hdr, &size)) < 0)
+	if ((flags & LOOKUP_LITERALLY)) {
+		if (unpack_sha1_header_literally(&stream, map, mapsize, &hdrbuf) < 0)
+			status = error("unable to unpack %s header",
+				       sha1_to_hex(sha1));
+		hdrp = hdrbuf.buf;
+	} else {
+		if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
+			status = error("unable to unpack %s header",
+				       sha1_to_hex(sha1));
+		hdrp = hdr;
+	}
+
+	if (status)
+		; /* we already have error condition */
+	else if ((status = parse_sha1_header(hdrp, &size)) < 0)
 		status = error("unable to parse %s header", sha1_to_hex(sha1));
 	else if (oi->sizep)
 		*oi->sizep = size;
@@ -2568,6 +2612,16 @@ static int sha1_loose_object_info(const unsigned char *sha1,
 	munmap(map, mapsize);
 	if (oi->typep)
 		*oi->typep = status;
+	if (oi->typename) {
+		if (0 <= status && typename(status))
+			strbuf_addstr(oi->typename, typename(status));
+		else if ((flags & LOOKUP_LITERALLY)) {
+			size_t typelen = strcspn(hdrbuf.buf, " ");
+			strbuf_add(oi->typename, hdrbuf.buf, typelen);
+		}
+	}
+	if (hdrp == hdrbuf.buf)
+		strbuf_release(&hdrbuf);
 	return 0;
 }
 
@@ -2594,7 +2648,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
 
 	if (!find_pack_entry(real, &e)) {
 		/* Most likely it's a loose object. */
-		if (!sha1_loose_object_info(real, oi)) {
+		if (!sha1_loose_object_info(real, oi, flags)) {
 			oi->whence = OI_LOOSE;
 			return 0;
 		}

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] sha1_file: Add sha1_object_type_literally and export it.
  2015-02-25 21:55   ` Eric Sunshine
@ 2015-02-26 15:07     ` Karthik Nayak
  0 siblings, 0 replies; 11+ messages in thread
From: Karthik Nayak @ 2015-02-26 15:07 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Git List

On Wed, 2015-02-25 at 16:55 -0500, Eric Sunshine wrote:
> I had written a longer review but was interrupted for a several hours,
> and upon returning found that David and Junio covered many of the same
> issues or overrode comments I was making, so the below review is pared
> down quite a bit. Junio's proposed approach negates all of the below
> review comments, but they may still be meaningful if kept in mind for
> future submissions.
> 
> On Wed, Feb 25, 2015 at 6:07 AM, Karthik Nayak <karthik.188@gmail.com> wrote:
> > sha1_file: Add sha1_object_type_literally and export it.
> 
> Style: downcase "Add"; drop terminating period.
> 
> > sha1_object_type_literally takes a sha value and
> > gives the type of the given loose object, used by
> > git cat-file -t --literally.
> >
> > Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> > ---
> > --- a/sha1_file.c
> > +++ b/sha1_file.c
> > @@ -2635,6 +2635,33 @@ int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
> >         return type;
> >  }
> >
> > +int sha1_object_type_literally(const unsigned char *sha1, char *type)
> 
> This functionality is very specific to the --literally option you're
> adding to cat-file, so it would make more sense to make it private to
> builtin/cat-file.c rather than publishing it globally.
> 
> Also, this is an unsafe contract. The caller does not know how many
> bytes to allocate for 'type', and this new function may write past the
> end of the buffer. It is more common to also pass in the size of the
> 'type' buffer and ensure that you do not write beyond that. Or, if
> this is intended for wider consumption, pass in a strbuf instead.
> 
> > +{
> > +       int status = 0;
> > +       unsigned long mapsize;
> > +       void *map;
> > +       git_zstream stream;
> > +       char hdr[32];
> > +       int i;
> > +
> > +       map = map_sha1_file(sha1, &mapsize);
> > +       if (!map)
> > +               return -1;
> > +       if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
> > +               status = error("unable to unpack %s header",
> > +                              sha1_to_hex(sha1));
> 
> Since 'hdr' unpacking failed, shouldn't you be returning at this point
> rather than continuing to the 'hdr' processing loop?
> 
> > +       for (i = 0; i < 32; i++) {
> > +               if (hdr[i] == ' ') {
> > +                       type[i] = '\0';
> > +                       break;
> > +               }
> > +               type[i] = hdr[i];
> > +       }
> 
> David already mentioned that this loop is suspect. Perhaps take a look
> at, sha1_file.c:parse_sha1_header() for an example of cleaner logic.
> 
> > +
> > +       return status;
> > +}
> > +
> >  static void *read_packed_sha1(const unsigned char *sha1,
> >                               enum object_type *type, unsigned long *size)
> >  {
> > --
> > 2.3.1.129.g11acff1.dirty

Thanks for all your inputs, I will work on the points you've mentioned
considering what David and Junio also have mentioned.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-02-26 15:07 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-25 11:06 [PATCH 0/2] cat-file --literally karthik nayak
2015-02-25 11:07 ` [PATCH 1/2] sha1_file: Add sha1_object_type_literally and export it Karthik Nayak
2015-02-25 18:22   ` David Turner
2015-02-25 19:59     ` karthik nayak
2015-02-25 20:15       ` David Turner
2015-02-25 21:32   ` Junio C Hamano
2015-02-25 22:44     ` Junio C Hamano
2015-02-25 21:55   ` Eric Sunshine
2015-02-26 15:07     ` Karthik Nayak
2015-02-25 11:08 ` [PATCH 2/2] cat-file: add --literally option Karthik Nayak
2015-02-25 22:14   ` Eric Sunshine

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.