All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] [GSOC] ref-filter: add contents:raw atom
@ 2021-05-20  8:49 ZheNing Hu via GitGitGadget
  2021-05-20 11:29 ` Hariom verma
  2021-05-20 16:20 ` Christian Couder
  0 siblings, 2 replies; 8+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-05-20  8:49 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Add new formatting option %(contents:raw), which will
print all the object contents without any changes.
It will help further to migrate all cat-file formatting
logic from cat-file to ref-filter.

Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
    [GSOC] ref-filter: add contents:raw atom
    
    Learn from Olga's %(raw):
    https://github.com/git/git/pull/568/commits/bf22dae7ca387dbc92c5586c92e60cd395099399
    
    We can add a %(contents:raw) atom to ref-filter, which can output object
    contents without any change.
    
    %(contents:raw) can work on the refs which point to blob,tree,commit,tag
    objects.
    
    It also support %(*contents:raw) to dereference.
    
    With %(cotent:raw), we can later provide support for printing the
    content of the "raw" object for cat-file --batch.
    
    Why not just use Olga's %(raw)? Because %(contents) can output object
    contents already, we can reuse it, instead of using another atom %(raw).

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-958%2Fadlternative%2Fref-filter-raw-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-958/adlternative/ref-filter-raw-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/958

 ref-filter.c            | 27 +++++++++++++++++++++------
 t/t6300-for-each-ref.sh | 31 +++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index e2eac50d9508..fc384a194c0c 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -133,7 +133,7 @@ static struct used_atom {
 			unsigned int nobracket : 1, push : 1, push_remote : 1;
 		} remote_ref;
 		struct {
-			enum { C_BARE, C_BODY, C_BODY_DEP, C_LENGTH, C_LINES,
+			enum { C_RAW, C_BARE, C_BODY, C_BODY_DEP, C_LENGTH, C_LINES,
 			       C_SIG, C_SUB, C_SUB_SANITIZE, C_TRAILERS } option;
 			struct process_trailer_options trailer_opts;
 			unsigned int nlines;
@@ -347,6 +347,8 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
 {
 	if (!arg)
 		atom->u.contents.option = C_BARE;
+	else if (!strcmp(arg, "raw"))
+		atom->u.contents.option = C_RAW;
 	else if (!strcmp(arg, "body"))
 		atom->u.contents.option = C_BODY;
 	else if (!strcmp(arg, "size"))
@@ -1292,7 +1294,8 @@ static void append_lines(struct strbuf *out, const char *buf, unsigned long size
 }
 
 /* See grab_values */
-static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
+static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf,
+				   unsigned long buf_size, enum object_type object_type)
 {
 	int i;
 	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
@@ -1312,6 +1315,13 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
 		    !starts_with(name, "trailers") &&
 		    !starts_with(name, "contents"))
 			continue;
+		if (atom->u.contents.option == C_RAW) {
+			v->s = xmemdupz(buf, buf_size);
+			continue;
+		}
+		if (object_type != OBJ_TAG && object_type != OBJ_COMMIT)
+			continue;
+
 		if (!subpos)
 			find_subpos(buf,
 				    &subpos, &sublen,
@@ -1374,25 +1384,30 @@ static void fill_missing_values(struct atom_value *val)
  * pointed at by the ref itself; otherwise it is the object the
  * ref (which is a tag) refers to.
  */
-static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf)
+static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
 {
+	void *buf = data->content;
+	unsigned long buf_size = data->size;
+
 	switch (obj->type) {
 	case OBJ_TAG:
 		grab_tag_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, buf, buf_size, obj->type);
 		grab_person("tagger", val, deref, buf);
 		break;
 	case OBJ_COMMIT:
 		grab_commit_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, buf, buf_size, obj->type);
 		grab_person("author", val, deref, buf);
 		grab_person("committer", val, deref, buf);
 		break;
 	case OBJ_TREE:
 		/* grab_tree_values(val, deref, obj, buf, sz); */
+		grab_sub_body_contents(val, deref, buf, buf_size, obj->type);
 		break;
 	case OBJ_BLOB:
 		/* grab_blob_values(val, deref, obj, buf, sz); */
+		grab_sub_body_contents(val, deref, buf, buf_size, obj->type);
 		break;
 	default:
 		die("Eh?  Object of type %d?", obj->type);
@@ -1614,7 +1629,7 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
 					       oid_to_hex(&oi->oid), ref->refname);
 		}
-		grab_values(ref->value, deref, *obj, oi->content);
+		grab_values(ref->value, deref, *obj, oi);
 	}
 
 	grab_common_values(ref->value, deref, oi);
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 9e0214076b4d..baa3a40a70b1 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -686,6 +686,17 @@ test_atom refs/tags/signed-empty contents:body ''
 test_atom refs/tags/signed-empty contents:signature "$sig"
 test_atom refs/tags/signed-empty contents "$sig"
 
+test_expect_success 'basic atom: refs/tags/signed-empty contents:raw' '
+	git cat-file tag refs/tags/signed-empty >expected &&
+	git for-each-ref --format="%(contents:raw)" refs/tags/signed-empty >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	echo "" >>expected.clean &&
+	test_cmp expected.clean actual.clean
+'
+
+test_atom refs/tags/signed-empty *contents:raw $(git cat-file commit HEAD)
+
 test_atom refs/tags/signed-short subject 'subject line'
 test_atom refs/tags/signed-short subject:sanitize 'subject-line'
 test_atom refs/tags/signed-short contents:subject 'subject line'
@@ -695,6 +706,15 @@ test_atom refs/tags/signed-short contents:signature "$sig"
 test_atom refs/tags/signed-short contents "subject line
 $sig"
 
+test_expect_success 'basic atom: refs/tags/signed-short contents:raw' '
+	git cat-file tag refs/tags/signed-short >expected &&
+	git for-each-ref --format="%(contents:raw)" refs/tags/signed-short >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	echo "" >>expected.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_atom refs/tags/signed-long subject 'subject line'
 test_atom refs/tags/signed-long subject:sanitize 'subject-line'
 test_atom refs/tags/signed-long contents:subject 'subject line'
@@ -708,6 +728,15 @@ test_atom refs/tags/signed-long contents "subject line
 body contents
 $sig"
 
+test_expect_success 'basic atom: refs/tags/signed-long contents:raw' '
+	git cat-file tag refs/tags/signed-long >expected &&
+	git for-each-ref --format="%(contents:raw)" refs/tags/signed-long >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	echo "" >>expected.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_expect_success 'set up refs pointing to tree and blob' '
 	git update-ref refs/mytrees/first refs/heads/main^{tree} &&
 	git update-ref refs/myblobs/first refs/heads/main:one
@@ -718,6 +747,7 @@ test_atom refs/mytrees/first contents:subject ""
 test_atom refs/mytrees/first body ""
 test_atom refs/mytrees/first contents:body ""
 test_atom refs/mytrees/first contents:signature ""
+test_atom refs/mytrees/first contents:raw $(git cat-file tree refs/mytrees/first)
 test_atom refs/mytrees/first contents ""
 
 test_atom refs/myblobs/first subject ""
@@ -725,6 +755,7 @@ test_atom refs/myblobs/first contents:subject ""
 test_atom refs/myblobs/first body ""
 test_atom refs/myblobs/first contents:body ""
 test_atom refs/myblobs/first contents:signature ""
+test_atom refs/myblobs/first contents:raw $(git cat-file blob refs/myblobs/first)
 test_atom refs/myblobs/first contents ""
 
 test_expect_success 'set up multiple-sort tags' '

base-commit: 97eea85a0a1ec66d356567808a1e4ca2367e0ce7
-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [GSOC] ref-filter: add contents:raw atom
  2021-05-20  8:49 [PATCH] [GSOC] ref-filter: add contents:raw atom ZheNing Hu via GitGitGadget
@ 2021-05-20 11:29 ` Hariom verma
  2021-05-20 14:34   ` ZheNing Hu
  2021-05-20 16:20 ` Christian Couder
  1 sibling, 1 reply; 8+ messages in thread
From: Hariom verma @ 2021-05-20 11:29 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget
  Cc: git, Junio C Hamano, Christian Couder, ZheNing Hu

Hi ZheNing,

On Thu, May 20, 2021 at 2:19 PM ZheNing Hu via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: ZheNing Hu <adlternative@gmail.com>
>
> diff --git a/ref-filter.c b/ref-filter.c
> index e2eac50d9508..fc384a194c0c 100644
> --- a/ref-filter.c
> +++ b/ref-filter.c
> @@ -133,7 +133,7 @@ static struct used_atom {
>                         unsigned int nobracket : 1, push : 1, push_remote : 1;
>                 } remote_ref;
>                 struct {
> -                       enum { C_BARE, C_BODY, C_BODY_DEP, C_LENGTH, C_LINES,
> +                       enum { C_RAW, C_BARE, C_BODY, C_BODY_DEP, C_LENGTH, C_LINES,

Maybe it's better if you can preserve the alphabetical order?

Thanks.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [GSOC] ref-filter: add contents:raw atom
  2021-05-20 11:29 ` Hariom verma
@ 2021-05-20 14:34   ` ZheNing Hu
  0 siblings, 0 replies; 8+ messages in thread
From: ZheNing Hu @ 2021-05-20 14:34 UTC (permalink / raw)
  To: Hariom verma
  Cc: ZheNing Hu via GitGitGadget, git, Junio C Hamano, Christian Couder

Hariom verma <hariom18599@gmail.com> 于2021年5月20日周四 下午7:29写道:
>
> Hi ZheNing,
>
> On Thu, May 20, 2021 at 2:19 PM ZheNing Hu via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
> >
> > From: ZheNing Hu <adlternative@gmail.com>
> >
> > diff --git a/ref-filter.c b/ref-filter.c
> > index e2eac50d9508..fc384a194c0c 100644
> > --- a/ref-filter.c
> > +++ b/ref-filter.c
> > @@ -133,7 +133,7 @@ static struct used_atom {
> >                         unsigned int nobracket : 1, push : 1, push_remote : 1;
> >                 } remote_ref;
> >                 struct {
> > -                       enum { C_BARE, C_BODY, C_BODY_DEP, C_LENGTH, C_LINES,
> > +                       enum { C_RAW, C_BARE, C_BODY, C_BODY_DEP, C_LENGTH, C_LINES,
>
> Maybe it's better if you can preserve the alphabetical order?
>
> Thanks.

Thanks, this is indeed my negligence.
--
ZheNing Hu

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [GSOC] ref-filter: add contents:raw atom
  2021-05-20  8:49 [PATCH] [GSOC] ref-filter: add contents:raw atom ZheNing Hu via GitGitGadget
  2021-05-20 11:29 ` Hariom verma
@ 2021-05-20 16:20 ` Christian Couder
  2021-05-21  4:43   ` ZheNing Hu
  1 sibling, 1 reply; 8+ messages in thread
From: Christian Couder @ 2021-05-20 16:20 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget; +Cc: git, Junio C Hamano, Hariom Verma, ZheNing Hu

On Thu, May 20, 2021 at 10:49 AM ZheNing Hu via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: ZheNing Hu <adlternative@gmail.com>
>
> Add new formatting option %(contents:raw), which will
> print all the object contents without any changes.

Maybe you could tell how it would be different from %(contents), or in
other words what are the changes that %(contents) makes.

Isn't %(contents) only printing the commit message or the tag message
of a commit or a tag respectively? If %(contents:raw) would print all
the object contents, it could feel strange that it is actually
printing more than %(contents).

Also is %(contents:raw) supposed to print something for a blob or a
tree, while I guess %(contents) is printing nothing for them?

> It will help further to migrate all cat-file formatting
> logic from cat-file to ref-filter.
>
> Signed-off-by: ZheNing Hu <adlternative@gmail.com>

It looks like you rewrote the patch nearly completely, but if you
based your patch on, or got inspired by, Olga's work, it might be nice
to acknowledge this using a trailer (for example "Based-on-patch-by:
..." or "Helped-by:...").

> ---
>     [GSOC] ref-filter: add contents:raw atom
>
>     Learn from Olga's %(raw):
>     https://github.com/git/git/pull/568/commits/bf22dae7ca387dbc92c5586c92e60cd395099399
>
>     We can add a %(contents:raw) atom to ref-filter, which can output object
>     contents without any change.
>
>     %(contents:raw) can work on the refs which point to blob,tree,commit,tag
>     objects.
>
>     It also support %(*contents:raw) to dereference.
>
>     With %(cotent:raw), we can later provide support for printing the

s/cotent/contents/

>     content of the "raw" object for cat-file --batch.

> @@ -1292,7 +1294,8 @@ static void append_lines(struct strbuf *out, const char *buf, unsigned long size
>  }
>
>  /* See grab_values */
> -static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
> +static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf,
> +                                  unsigned long buf_size, enum object_type object_type)
>  {
>         int i;
>         const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
> @@ -1312,6 +1315,13 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
>                     !starts_with(name, "trailers") &&
>                     !starts_with(name, "contents"))
>                         continue;
> +               if (atom->u.contents.option == C_RAW) {
> +                       v->s = xmemdupz(buf, buf_size);
> +                       continue;
> +               }
> +               if (object_type != OBJ_TAG && object_type != OBJ_COMMIT)
> +                       continue;

When seeing the 2 lines above, I am guessing that, before this patch,
grab_sub_body_contents() couldn't actually be called when object_type
was OBJ_BLOB or OBJ_TREE, but you have made other changes elsewhere so
that now it can. As only the atom->u.contents.option == C_RAW case is
relevant in this case, you added this check. Let's see if I am
right...

>                 if (!subpos)
>                         find_subpos(buf,
>                                     &subpos, &sublen,
> @@ -1374,25 +1384,30 @@ static void fill_missing_values(struct atom_value *val)
>   * pointed at by the ref itself; otherwise it is the object the
>   * ref (which is a tag) refers to.
>   */
> -static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf)
> +static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
>  {
> +       void *buf = data->content;
> +       unsigned long buf_size = data->size;
> +
>         switch (obj->type) {
>         case OBJ_TAG:
>                 grab_tag_values(val, deref, obj);
> -               grab_sub_body_contents(val, deref, buf);
> +               grab_sub_body_contents(val, deref, buf, buf_size, obj->type);
>                 grab_person("tagger", val, deref, buf);
>                 break;
>         case OBJ_COMMIT:
>                 grab_commit_values(val, deref, obj);
> -               grab_sub_body_contents(val, deref, buf);
> +               grab_sub_body_contents(val, deref, buf, buf_size, obj->type);
>                 grab_person("author", val, deref, buf);
>                 grab_person("committer", val, deref, buf);
>                 break;
>         case OBJ_TREE:
>                 /* grab_tree_values(val, deref, obj, buf, sz); */
> +               grab_sub_body_contents(val, deref, buf, buf_size, obj->type);
>                 break;
>         case OBJ_BLOB:
>                 /* grab_blob_values(val, deref, obj, buf, sz); */
> +               grab_sub_body_contents(val, deref, buf, buf_size, obj->type);

...ok, I was right above. The issue now is that I wonder if
grab_sub_body_contents() is still a good name for a function that can
be called for a blob or a tree which does not really have a body.

>                 break;
>         default:
>                 die("Eh?  Object of type %d?", obj->type);
> @@ -1614,7 +1629,7 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
>                         return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
>                                                oid_to_hex(&oi->oid), ref->refname);
>                 }
> -               grab_values(ref->value, deref, *obj, oi->content);
> +               grab_values(ref->value, deref, *obj, oi);
>         }
>
>         grab_common_values(ref->value, deref, oi);
> diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
> index 9e0214076b4d..baa3a40a70b1 100755
> --- a/t/t6300-for-each-ref.sh
> +++ b/t/t6300-for-each-ref.sh
> @@ -686,6 +686,17 @@ test_atom refs/tags/signed-empty contents:body ''
>  test_atom refs/tags/signed-empty contents:signature "$sig"
>  test_atom refs/tags/signed-empty contents "$sig"
>
> +test_expect_success 'basic atom: refs/tags/signed-empty contents:raw' '
> +       git cat-file tag refs/tags/signed-empty >expected &&
> +       git for-each-ref --format="%(contents:raw)" refs/tags/signed-empty >actual &&
> +       sanitize_pgp <expected >expected.clean &&
> +       sanitize_pgp <actual >actual.clean &&
> +       echo "" >>expected.clean &&
> +       test_cmp expected.clean actual.clean
> +'

For an empty tag %(contents:raw) should produce nothing, ok.

> +test_atom refs/tags/signed-empty *contents:raw $(git cat-file commit HEAD)

Maybe use single quotes around *contents:raw?

The rest looks good to me. Thanks!

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [GSOC] ref-filter: add contents:raw atom
  2021-05-20 16:20 ` Christian Couder
@ 2021-05-21  4:43   ` ZheNing Hu
  2021-05-21  9:10     ` Christian Couder
  0 siblings, 1 reply; 8+ messages in thread
From: ZheNing Hu @ 2021-05-21  4:43 UTC (permalink / raw)
  To: Christian Couder
  Cc: ZheNing Hu via GitGitGadget, git, Junio C Hamano, Hariom Verma

Christian Couder <christian.couder@gmail.com> 于2021年5月21日周五 上午12:20写道:
>
> On Thu, May 20, 2021 at 10:49 AM ZheNing Hu via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
> >
> > From: ZheNing Hu <adlternative@gmail.com>
> >
> > Add new formatting option %(contents:raw), which will
> > print all the object contents without any changes.
>
> Maybe you could tell how it would be different from %(contents), or in
> other words what are the changes that %(contents) makes.
>
> Isn't %(contents) only printing the commit message or the tag message
> of a commit or a tag respectively? If %(contents:raw) would print all
> the object contents, it could feel strange that it is actually
> printing more than %(contents).
>

Okay, some explanations are indeed missing here:

%(contents) will discard the metadata part of the object file,
and only print the data contents part of it. %(contents:raw)
can will not discard the metadata part of the object file, this
 means that it can print the "raw" content of an object.

In addition, %(contents:raw) can support print commit, blob,
tag, tree objects contents which %(contents) can only support
commit,tag objects.

E.g:

git for-each-ref --format=%(contents:raw) refs/heads/foo

will have the same output as:

git rev-parse refs/heads/foo | git cat-file --batch

> Also is %(contents:raw) supposed to print something for a blob or a
> tree, while I guess %(contents) is printing nothing for them?
>

Now my thoughts are:
Let %(contents) learn to print four kinds of objects.
and then let %(contents:raw) learn to print metadata.
I will split it into two patches.

> > It will help further to migrate all cat-file formatting
> > logic from cat-file to ref-filter.
> >
> > Signed-off-by: ZheNing Hu <adlternative@gmail.com>
>
> It looks like you rewrote the patch nearly completely, but if you
> based your patch on, or got inspired by, Olga's work, it might be nice
> to acknowledge this using a trailer (for example "Based-on-patch-by:
> ..." or "Helped-by:...").
>

Okay, "Based-on-patch-by" would be more appropriate here.

> > @@ -1312,6 +1315,13 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
> >                     !starts_with(name, "trailers") &&
> >                     !starts_with(name, "contents"))
> >                         continue;
> > +               if (atom->u.contents.option == C_RAW) {
> > +                       v->s = xmemdupz(buf, buf_size);
> > +                       continue;
> > +               }
> > +               if (object_type != OBJ_TAG && object_type != OBJ_COMMIT)
> > +                       continue;
>
> When seeing the 2 lines above, I am guessing that, before this patch,
> grab_sub_body_contents() couldn't actually be called when object_type
> was OBJ_BLOB or OBJ_TREE, but you have made other changes elsewhere so
> that now it can. As only the atom->u.contents.option == C_RAW case is
> relevant in this case, you added this check. Let's see if I am
> right...
>
> >                 if (!subpos)
> >                         find_subpos(buf,
> >                                     &subpos, &sublen,
> > @@ -1374,25 +1384,30 @@ static void fill_missing_values(struct atom_value *val)
> >   * pointed at by the ref itself; otherwise it is the object the
> >   * ref (which is a tag) refers to.
> >   */
> > -static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf)
> > +static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
> >  {
> > +       void *buf = data->content;
> > +       unsigned long buf_size = data->size;
> > +
> >         switch (obj->type) {
> >         case OBJ_TAG:
> >                 grab_tag_values(val, deref, obj);
> > -               grab_sub_body_contents(val, deref, buf);
> > +               grab_sub_body_contents(val, deref, buf, buf_size, obj->type);
> >                 grab_person("tagger", val, deref, buf);
> >                 break;
> >         case OBJ_COMMIT:
> >                 grab_commit_values(val, deref, obj);
> > -               grab_sub_body_contents(val, deref, buf);
> > +               grab_sub_body_contents(val, deref, buf, buf_size, obj->type);
> >                 grab_person("author", val, deref, buf);
> >                 grab_person("committer", val, deref, buf);
> >                 break;
> >         case OBJ_TREE:
> >                 /* grab_tree_values(val, deref, obj, buf, sz); */
> > +               grab_sub_body_contents(val, deref, buf, buf_size, obj->type);
> >                 break;
> >         case OBJ_BLOB:
> >                 /* grab_blob_values(val, deref, obj, buf, sz); */
> > +               grab_sub_body_contents(val, deref, buf, buf_size, obj->type);
>
> ...ok, I was right above. The issue now is that I wonder if
> grab_sub_body_contents() is still a good name for a function that can
> be called for a blob or a tree which does not really have a body.
>

Makes sense, It might be better to use the new name: grab_contents().

> >                 break;
> >         default:
> >                 die("Eh?  Object of type %d?", obj->type);
> > @@ -1614,7 +1629,7 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
> >                         return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
> >                                                oid_to_hex(&oi->oid), ref->refname);
> >                 }
> > -               grab_values(ref->value, deref, *obj, oi->content);
> > +               grab_values(ref->value, deref, *obj, oi);
> >         }
> >
> >         grab_common_values(ref->value, deref, oi);
> > diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
> > index 9e0214076b4d..baa3a40a70b1 100755
> > --- a/t/t6300-for-each-ref.sh
> > +++ b/t/t6300-for-each-ref.sh
> > @@ -686,6 +686,17 @@ test_atom refs/tags/signed-empty contents:body ''
> >  test_atom refs/tags/signed-empty contents:signature "$sig"
> >  test_atom refs/tags/signed-empty contents "$sig"
> >
> > +test_expect_success 'basic atom: refs/tags/signed-empty contents:raw' '
> > +       git cat-file tag refs/tags/signed-empty >expected &&
> > +       git for-each-ref --format="%(contents:raw)" refs/tags/signed-empty >actual &&
> > +       sanitize_pgp <expected >expected.clean &&
> > +       sanitize_pgp <actual >actual.clean &&
> > +       echo "" >>expected.clean &&
> > +       test_cmp expected.clean actual.clean
> > +'
>
> For an empty tag %(contents:raw) should produce nothing, ok.
>
> > +test_atom refs/tags/signed-empty *contents:raw $(git cat-file commit HEAD)
>
> Maybe use single quotes around *contents:raw?
>
> The rest looks good to me. Thanks!

Good suggestion. thank!
--
ZheNing Hu

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [GSOC] ref-filter: add contents:raw atom
  2021-05-21  4:43   ` ZheNing Hu
@ 2021-05-21  9:10     ` Christian Couder
  2021-05-21 13:12       ` ZheNing Hu
  0 siblings, 1 reply; 8+ messages in thread
From: Christian Couder @ 2021-05-21  9:10 UTC (permalink / raw)
  To: ZheNing Hu; +Cc: ZheNing Hu via GitGitGadget, git, Junio C Hamano, Hariom Verma

On Fri, May 21, 2021 at 6:44 AM ZheNing Hu <adlternative@gmail.com> wrote:
>
> Christian Couder <christian.couder@gmail.com> 于2021年5月21日周五 上午12:20写道:
> >
> > On Thu, May 20, 2021 at 10:49 AM ZheNing Hu via GitGitGadget
> > <gitgitgadget@gmail.com> wrote:
> > >
> > > From: ZheNing Hu <adlternative@gmail.com>
> > >
> > > Add new formatting option %(contents:raw), which will
> > > print all the object contents without any changes.
> >
> > Maybe you could tell how it would be different from %(contents), or in
> > other words what are the changes that %(contents) makes.
> >
> > Isn't %(contents) only printing the commit message or the tag message
> > of a commit or a tag respectively? If %(contents:raw) would print all
> > the object contents, it could feel strange that it is actually
> > printing more than %(contents).
>
> Okay, some explanations are indeed missing here:

By the way I forgot to say that your patch might want to also properly
document %(contents:raw). If the doc was updated as part of the patch,
maybe the whole patch would be easier to understand.

> %(contents) will discard the metadata part of the object file,

It's not clear what you mean when you talk about metadata in objects.
Are you taking about only the headers, like the "tree XXX", "parent
YYY", etc lines in commits, and the "object OOO", "type TTT", etc
lines in tags? Or does it also includes the lines in tree objects that
reference other trees or blobs?

> and only print the data contents part of it. %(contents:raw)
> can will not discard the metadata part of the object file, this

s/can//

>  means that it can print the "raw" content of an object.

The above seems to be changing the definition of %(contents) (as
previously it was only the commit message of a commit or the tag
message a tag) to explain %(contents:raw)...

> In addition, %(contents:raw) can support print commit, blob,
> tag, tree objects contents which %(contents) can only support
> commit,tag objects.

...but this is saying that the definition of %(contents) actually
doesn't change. This doesn't seem logical to me.

If %(contents:raw) can support any kind of object (commit, blob, tag
or tree) then it would be logical that %(contents) also support any
kind of object.

So if you really want to define %(contents:raw) as the raw object
contents, you might want to consider a preparatory patch that would
first change the definition of %(contents) to be the object contents
without the headers. This would keep the same output for any commit or
tag. But for blobs and trees it would print the whole content of the
object in the same way as `git cat-file -p` does, instead of nothing.

I think this acceptable as refs rarely point to something other than
commits, so people are not likely to rely on the fact that %(contents)
currently prints nothing for blobs and trees.

> E.g:
>
> git for-each-ref --format=%(contents:raw) refs/heads/foo
>
> will have the same output as:
>
> git rev-parse refs/heads/foo | git cat-file --batch

Ok, I think that would indeed be useful.

> > Also is %(contents:raw) supposed to print something for a blob or a
> > tree, while I guess %(contents) is printing nothing for them?
>
> Now my thoughts are:
> Let %(contents) learn to print four kinds of objects.
> and then let %(contents:raw) learn to print metadata.
> I will split it into two patches.

Yeah, great!

> > > It will help further to migrate all cat-file formatting
> > > logic from cat-file to ref-filter.
> > >
> > > Signed-off-by: ZheNing Hu <adlternative@gmail.com>
> >
> > It looks like you rewrote the patch nearly completely, but if you
> > based your patch on, or got inspired by, Olga's work, it might be nice
> > to acknowledge this using a trailer (for example "Based-on-patch-by:
> > ..." or "Helped-by:...").
>
> Okay, "Based-on-patch-by" would be more appropriate here.

Nice!

> > > @@ -1312,6 +1315,13 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
> > >                     !starts_with(name, "trailers") &&
> > >                     !starts_with(name, "contents"))
> > >                         continue;
> > > +               if (atom->u.contents.option == C_RAW) {
> > > +                       v->s = xmemdupz(buf, buf_size);
> > > +                       continue;
> > > +               }
> > > +               if (object_type != OBJ_TAG && object_type != OBJ_COMMIT)
> > > +                       continue;
> >
> > When seeing the 2 lines above, I am guessing that, before this patch,
> > grab_sub_body_contents() couldn't actually be called when object_type
> > was OBJ_BLOB or OBJ_TREE, but you have made other changes elsewhere so
> > that now it can. As only the atom->u.contents.option == C_RAW case is
> > relevant in this case, you added this check. Let's see if I am
> > right...
> >
> > >                 if (!subpos)
> > >                         find_subpos(buf,
> > >                                     &subpos, &sublen,
> > > @@ -1374,25 +1384,30 @@ static void fill_missing_values(struct atom_value *val)
> > >   * pointed at by the ref itself; otherwise it is the object the
> > >   * ref (which is a tag) refers to.
> > >   */
> > > -static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf)
> > > +static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
> > >  {
> > > +       void *buf = data->content;
> > > +       unsigned long buf_size = data->size;
> > > +
> > >         switch (obj->type) {
> > >         case OBJ_TAG:
> > >                 grab_tag_values(val, deref, obj);
> > > -               grab_sub_body_contents(val, deref, buf);
> > > +               grab_sub_body_contents(val, deref, buf, buf_size, obj->type);
> > >                 grab_person("tagger", val, deref, buf);
> > >                 break;
> > >         case OBJ_COMMIT:
> > >                 grab_commit_values(val, deref, obj);
> > > -               grab_sub_body_contents(val, deref, buf);
> > > +               grab_sub_body_contents(val, deref, buf, buf_size, obj->type);
> > >                 grab_person("author", val, deref, buf);
> > >                 grab_person("committer", val, deref, buf);
> > >                 break;
> > >         case OBJ_TREE:
> > >                 /* grab_tree_values(val, deref, obj, buf, sz); */
> > > +               grab_sub_body_contents(val, deref, buf, buf_size, obj->type);
> > >                 break;
> > >         case OBJ_BLOB:
> > >                 /* grab_blob_values(val, deref, obj, buf, sz); */
> > > +               grab_sub_body_contents(val, deref, buf, buf_size, obj->type);
> >
> > ...ok, I was right above. The issue now is that I wonder if
> > grab_sub_body_contents() is still a good name for a function that can
> > be called for a blob or a tree which does not really have a body.
>
> Makes sense, It might be better to use the new name: grab_contents().

Yeah.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [GSOC] ref-filter: add contents:raw atom
  2021-05-21  9:10     ` Christian Couder
@ 2021-05-21 13:12       ` ZheNing Hu
  2021-05-21 13:39         ` ZheNing Hu
  0 siblings, 1 reply; 8+ messages in thread
From: ZheNing Hu @ 2021-05-21 13:12 UTC (permalink / raw)
  To: Christian Couder
  Cc: ZheNing Hu via GitGitGadget, git, Junio C Hamano, Hariom Verma

Christian Couder <christian.couder@gmail.com> 于2021年5月21日周五 下午5:10写道:
>
> > Okay, some explanations are indeed missing here:
>
> By the way I forgot to say that your patch might want to also properly
> document %(contents:raw). If the doc was updated as part of the patch,
> maybe the whole patch would be easier to understand.
>

Indeed so.

> > %(contents) will discard the metadata part of the object file,
>
> It's not clear what you mean when you talk about metadata in objects.
> Are you taking about only the headers, like the "tree XXX", "parent
> YYY", etc lines in commits, and the "object OOO", "type TTT", etc
> lines in tags? Or does it also includes the lines in tree objects that
> reference other trees or blobs?
>

Well, the metadata here only refers to the header of the commit and
tag objects, "tree XXX", "parent YYY"...  The lines in tree objects that
reference other trees or blobs will not output. What we have to do here
is to simulate the behavior of `git cat-file --batch`. But we encounter a
very serious problem here:

Now we want to use for-each-ref to print a ref point to a tree:

git for-each-ref --format="%(contents:raw)" refs/mytrees/first

will output:

100644 one

but

git cat-file tree refs/mytrees/first

will output:

100644 onem�cֈ��q�D�֧hJ)E-100644 two.t���0�+��VjC��eV�ӈq

which is the compressed data, it may contains '\0'.

Whne we use `append_atom()` to add the contents of the tree object
to the buffer, notice that it uses `strbuf_addstr()`, the underlying call
is `strlen()`, which truncates the data we added. Can we have any good
remedies? For example, record the length of "v->s" by "v->s_size" and
use `strbuf_addstr(&state->stack->output, v->s, v->s_size)`?

> > and only print the data contents part of it. %(contents:raw)
> > can will not discard the metadata part of the object file, this
>
> s/can//
>
> >  means that it can print the "raw" content of an object.
>
> The above seems to be changing the definition of %(contents) (as
> previously it was only the commit message of a commit or the tag
> message a tag) to explain %(contents:raw)...
>
> > In addition, %(contents:raw) can support print commit, blob,
> > tag, tree objects contents which %(contents) can only support
> > commit,tag objects.
>
> ...but this is saying that the definition of %(contents) actually
> doesn't change. This doesn't seem logical to me.
>
> If %(contents:raw) can support any kind of object (commit, blob, tag
> or tree) then it would be logical that %(contents) also support any
> kind of object.
>
> So if you really want to define %(contents:raw) as the raw object
> contents, you might want to consider a preparatory patch that would
> first change the definition of %(contents) to be the object contents
> without the headers. This would keep the same output for any commit or
> tag. But for blobs and trees it would print the whole content of the
> object in the same way as `git cat-file -p` does, instead of nothing.
>

Yes, I agree with you. Another thing worth mentioning is:
%(contents:raw) or %(contents) is not similar to "git cat-file -p foo"
they are more like "git cat-file <type> foo" or
"git rev-parse foo | git cat-file --batch", "git cat-file -p foo" It will
process the original data:
For example, a tree object, with "git cat-file -p",
will use "ls-tree" to output all the sub-trees and blobs it connects.

$ git cat-file -p refs/mytrees/first
100644 blob 6dde63d6888cec71ea82449bd6a7684a29452d7f    one
100644 blob f719efd430d52bcfc8566a43b2eb655688d38871    two.t

but with "git cat-file <type>" will output uncompressed data:

git cat-file tree refs/mytrees/first
100644 onem�cֈ��q�D�֧hJ)E-100644 two.t���0�+��VjC��eV�ӈq%

> I think this acceptable as refs rarely point to something other than
> commits, so people are not likely to rely on the fact that %(contents)
> currently prints nothing for blobs and trees.
>
> > E.g:
> >
> > git for-each-ref --format=%(contents:raw) refs/heads/foo
> >
> > will have the same output as:
> >
> > git rev-parse refs/heads/foo | git cat-file --batch
>
> Ok, I think that would indeed be useful.
>
> > > Also is %(contents:raw) supposed to print something for a blob or a
> > > tree, while I guess %(contents) is printing nothing for them?
> >
> > Now my thoughts are:
> > Let %(contents) learn to print four kinds of objects.
> > and then let %(contents:raw) learn to print metadata.
> > I will split it into two patches.
>
> Yeah, great!
>

Thanks!
--
ZheNing Hu

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [GSOC] ref-filter: add contents:raw atom
  2021-05-21 13:12       ` ZheNing Hu
@ 2021-05-21 13:39         ` ZheNing Hu
  0 siblings, 0 replies; 8+ messages in thread
From: ZheNing Hu @ 2021-05-21 13:39 UTC (permalink / raw)
  To: Christian Couder
  Cc: ZheNing Hu via GitGitGadget, git, Junio C Hamano, Hariom Verma

ZheNing Hu <adlternative@gmail.com> 于2021年5月21日周五 下午9:12写道:
>
>
> Now we want to use for-each-ref to print a ref point to a tree:
>
> git for-each-ref --format="%(contents:raw)" refs/mytrees/first
>
> will output:
>
> 100644 one
>
> but
>
> git cat-file tree refs/mytrees/first
>
> will output:
>
> 100644 onem�cֈ��q�D�֧hJ)E-100644 two.t���0�+��VjC��eV�ӈq
>
> which is the compressed data, it may contains '\0'.
>
> Whne we use `append_atom()` to add the contents of the tree object
> to the buffer, notice that it uses `strbuf_addstr()`, the underlying call
> is `strlen()`, which truncates the data we added. Can we have any good
> remedies? For example, record the length of "v->s" by "v->s_size" and
> use `strbuf_addstr(&state->stack->output, v->s, v->s_size)`?
>

This is not a good method, because `quote_formatting()` in `append_atom()`
will also cause this truncation problem.

--
ZheNing Hu

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-05-21 13:39 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-20  8:49 [PATCH] [GSOC] ref-filter: add contents:raw atom ZheNing Hu via GitGitGadget
2021-05-20 11:29 ` Hariom verma
2021-05-20 14:34   ` ZheNing Hu
2021-05-20 16:20 ` Christian Couder
2021-05-21  4:43   ` ZheNing Hu
2021-05-21  9:10     ` Christian Couder
2021-05-21 13:12       ` ZheNing Hu
2021-05-21 13:39         ` ZheNing Hu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.