All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/8] [GSOC][RFC] cat-file: reuse ref-filter logic
@ 2021-06-12 11:14 ZheNing Hu via GitGitGadget
  2021-06-12 11:14 ` [PATCH 1/8] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
                   ` (8 more replies)
  0 siblings, 9 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-12 11:14 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu

This patch series make cat-file reuse ref-filter logic, which based on
5a5b5f78 ([GSOC] ref-filter: add %(rest) atom)

 1. Modified the logic of cat-file --batch, use verify_ref_format() and
    format_ref_array_item() to get object data.
 2. Re-implement --textconv, --filters.

Now cat-file can support most ref-filter atoms, like %(tree), %(parent),
%(if)...

There is still an unresolved issue: performance overhead is very large, so
that when we use:

git cat-file --batch --batch-all-objects >/dev/null

on git.git, it may fail.

ZheNing Hu (8):
  [GSOC] ref-filter: add obj-type check in grab contents
  [GSOC] ref-filter: add %(raw) atom
  [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  [GSOC] ref-filter: add %(rest) atom
  [GSOC] ref-filter: teach get_object() return useful value
  [GSOC] cat-file: reuse ref-filter logic
  [GSOC] cat-file: reuse err buf in batch_objet_write()
  [GSOC] cat-file: re-implement --textconv, --filters options

 Documentation/git-cat-file.txt     |   6 +
 Documentation/git-for-each-ref.txt |   9 +
 builtin/cat-file.c                 | 267 ++++++------------------
 builtin/tag.c                      |   2 +-
 ref-filter.c                       | 320 +++++++++++++++++++++++------
 ref-filter.h                       |  13 +-
 t/t1006-cat-file.sh                | 252 +++++++++++++++++++++++
 t/t3203-branch-output.sh           |   4 +
 t/t6300-for-each-ref.sh            | 211 +++++++++++++++++++
 t/t6301-for-each-ref-errors.sh     |   2 +-
 t/t7004-tag.sh                     |   4 +
 t/t7030-verify-tag.sh              |   4 +
 12 files changed, 818 insertions(+), 276 deletions(-)


base-commit: 1197f1a46360d3ae96bd9c15908a3a6f8e562207
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-980%2Fadlternative%2Fcat-file-batch-refactor-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-980/adlternative/cat-file-batch-refactor-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/980
-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH 1/8] [GSOC] ref-filter: add obj-type check in grab contents
  2021-06-12 11:14 [PATCH 0/8] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
@ 2021-06-12 11:14 ` ZheNing Hu via GitGitGadget
  2021-06-17  7:04   ` Ævar Arnfjörð Bjarmason
  2021-06-12 11:14 ` [PATCH 2/8] [GSOC] ref-filter: add %(raw) atom ZheNing Hu via GitGitGadget
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-12 11:14 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Only tag and commit objects use `grab_sub_body_contents()` to grab
object contents in the current codebase.  We want to teach the
function to also handle blobs and trees to get their raw data,
without parsing a blob (whose contents looks like a commit or a tag)
incorrectly as a commit or a tag.

Skip the block of code that is specific to handling commits and tags
early when the given object is of a wrong type to help later
addition to handle other types of objects in this function.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 4db0e40ff4c6..5cee6512fbaf 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1356,11 +1356,12 @@ static void append_lines(struct strbuf *out, const char *buf, unsigned long size
 }
 
 /* See grab_values */
-static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
+static void grab_sub_body_contents(struct atom_value *val, int deref, struct expand_data *data)
 {
 	int i;
 	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
 	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
+	void *buf = data->content;
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		struct used_atom *atom = &used_atom[i];
@@ -1371,10 +1372,13 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
 			continue;
 		if (deref)
 			name++;
-		if (strcmp(name, "body") &&
-		    !starts_with(name, "subject") &&
-		    !starts_with(name, "trailers") &&
-		    !starts_with(name, "contents"))
+
+		if ((data->type != OBJ_TAG &&
+		     data->type != OBJ_COMMIT) ||
+		    (strcmp(name, "body") &&
+		     !starts_with(name, "subject") &&
+		     !starts_with(name, "trailers") &&
+		     !starts_with(name, "contents")))
 			continue;
 		if (!subpos)
 			find_subpos(buf,
@@ -1438,17 +1442,19 @@ static void fill_missing_values(struct atom_value *val)
  * pointed at by the ref itself; otherwise it is the object the
  * ref (which is a tag) refers to.
  */
-static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf)
+static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
 {
+	void *buf = data->content;
+
 	switch (obj->type) {
 	case OBJ_TAG:
 		grab_tag_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, data);
 		grab_person("tagger", val, deref, buf);
 		break;
 	case OBJ_COMMIT:
 		grab_commit_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, data);
 		grab_person("author", val, deref, buf);
 		grab_person("committer", val, deref, buf);
 		break;
@@ -1678,7 +1684,7 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
 					       oid_to_hex(&oi->oid), ref->refname);
 		}
-		grab_values(ref->value, deref, *obj, oi->content);
+		grab_values(ref->value, deref, *obj, oi);
 	}
 
 	grab_common_values(ref->value, deref, oi);
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH 2/8] [GSOC] ref-filter: add %(raw) atom
  2021-06-12 11:14 [PATCH 0/8] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  2021-06-12 11:14 ` [PATCH 1/8] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
@ 2021-06-12 11:14 ` ZheNing Hu via GitGitGadget
  2021-06-17  7:10   ` Ævar Arnfjörð Bjarmason
  2021-06-12 11:14 ` [PATCH 3/8] [GSOC] ref-filter: use non-const ref_format in *_atom_parser() ZheNing Hu via GitGitGadget
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-12 11:14 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.

The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).

E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.

Therefore, add a new member in `struct atom_value`: `s_size`, which
can record raw object size, it can help us add raw object data to
the buffer or compare two buffers which contain raw object data.

Beyond, `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, `--perl` because if our binary raw data is passed to a variable
in the host language, the host language may not support arbitrary binary
data in the variables of its string type.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-for-each-ref.txt |   9 ++
 ref-filter.c                       | 139 +++++++++++++++----
 t/t6300-for-each-ref.sh            | 207 +++++++++++++++++++++++++++++
 3 files changed, 328 insertions(+), 27 deletions(-)

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index 2ae2478de706..7f1f0a1ca3b6 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -235,6 +235,15 @@ and `date` to extract the named component.  For email fields (`authoremail`,
 without angle brackets, and `:localpart` to get the part before the `@` symbol
 out of the trimmed email.
 
+The raw data in an object is `raw`.
+
+raw:size::
+	The raw data size of the object.
+
+Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
+`--perl` because the host language may not support arbitrary binary data in the
+variables of its string type.
+
 The message in a commit or a tag object is `contents`, from which
 `contents:<part>` can be used to extract various parts out of:
 
diff --git a/ref-filter.c b/ref-filter.c
index 5cee6512fbaf..7822be903071 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -144,6 +144,7 @@ enum atom_type {
 	ATOM_BODY,
 	ATOM_TRAILERS,
 	ATOM_CONTENTS,
+	ATOM_RAW,
 	ATOM_UPSTREAM,
 	ATOM_PUSH,
 	ATOM_SYMREF,
@@ -189,6 +190,9 @@ static struct used_atom {
 			struct process_trailer_options trailer_opts;
 			unsigned int nlines;
 		} contents;
+		struct {
+			enum { RAW_BARE, RAW_LENGTH } option;
+		} raw_data;
 		struct {
 			cmp_status cmp_status;
 			const char *str;
@@ -426,6 +430,18 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
+static int raw_atom_parser(const struct ref_format *format, struct used_atom *atom,
+				const char *arg, struct strbuf *err)
+{
+	if (!arg)
+		atom->u.raw_data.option = RAW_BARE;
+	else if (!strcmp(arg, "size"))
+		atom->u.raw_data.option = RAW_LENGTH;
+	else
+		return strbuf_addf_ret(err, -1, _("unrecognized %%(raw) argument: %s"), arg);
+	return 0;
+}
+
 static int oid_atom_parser(const struct ref_format *format, struct used_atom *atom,
 			   const char *arg, struct strbuf *err)
 {
@@ -586,6 +602,7 @@ static struct {
 	[ATOM_BODY] = { "body", SOURCE_OBJ, FIELD_STR, body_atom_parser },
 	[ATOM_TRAILERS] = { "trailers", SOURCE_OBJ, FIELD_STR, trailers_atom_parser },
 	[ATOM_CONTENTS] = { "contents", SOURCE_OBJ, FIELD_STR, contents_atom_parser },
+	[ATOM_RAW] = { "raw", SOURCE_OBJ, FIELD_STR, raw_atom_parser },
 	[ATOM_UPSTREAM] = { "upstream", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
 	[ATOM_PUSH] = { "push", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
 	[ATOM_SYMREF] = { "symref", SOURCE_NONE, FIELD_STR, refname_atom_parser },
@@ -620,12 +637,15 @@ struct ref_formatting_state {
 
 struct atom_value {
 	const char *s;
+	size_t s_size;
 	int (*handler)(struct atom_value *atomv, struct ref_formatting_state *state,
 		       struct strbuf *err);
 	uintmax_t value; /* used for sorting when not FIELD_STR */
 	struct used_atom *atom;
 };
 
+#define ATOM_VALUE_S_SIZE_INIT (-1)
+
 /*
  * Used to parse format string and sort specifiers
  */
@@ -644,13 +664,6 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 		return strbuf_addf_ret(err, -1, _("malformed field name: %.*s"),
 				       (int)(ep-atom), atom);
 
-	/* Do we have the atom already used elsewhere? */
-	for (i = 0; i < used_atom_cnt; i++) {
-		int len = strlen(used_atom[i].name);
-		if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
-			return i;
-	}
-
 	/*
 	 * If the atom name has a colon, strip it and everything after
 	 * it off - it specifies the format for this entry, and
@@ -660,6 +673,13 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 	arg = memchr(sp, ':', ep - sp);
 	atom_len = (arg ? arg : ep) - sp;
 
+	/* Do we have the atom already used elsewhere? */
+	for (i = 0; i < used_atom_cnt; i++) {
+		int len = strlen(used_atom[i].name);
+		if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
+			return i;
+	}
+
 	/* Is the atom a valid one? */
 	for (i = 0; i < ARRAY_SIZE(valid_atom); i++) {
 		int len = strlen(valid_atom[i].name);
@@ -709,11 +729,14 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 	return at;
 }
 
-static void quote_formatting(struct strbuf *s, const char *str, int quote_style)
+static void quote_formatting(struct strbuf *s, const char *str, size_t len, int quote_style)
 {
 	switch (quote_style) {
 	case QUOTE_NONE:
-		strbuf_addstr(s, str);
+		if (len != ATOM_VALUE_S_SIZE_INIT)
+			strbuf_add(s, str, len);
+		else
+			strbuf_addstr(s, str);
 		break;
 	case QUOTE_SHELL:
 		sq_quote_buf(s, str);
@@ -740,9 +763,12 @@ static int append_atom(struct atom_value *v, struct ref_formatting_state *state,
 	 * encountered.
 	 */
 	if (!state->stack->prev)
-		quote_formatting(&state->stack->output, v->s, state->quote_style);
+		quote_formatting(&state->stack->output, v->s, v->s_size, state->quote_style);
 	else
-		strbuf_addstr(&state->stack->output, v->s);
+		if (v->s_size != ATOM_VALUE_S_SIZE_INIT)
+			strbuf_add(&state->stack->output, v->s, v->s_size);
+		else
+			strbuf_addstr(&state->stack->output, v->s);
 	return 0;
 }
 
@@ -842,21 +868,23 @@ static int if_atom_handler(struct atom_value *atomv, struct ref_formatting_state
 	return 0;
 }
 
-static int is_empty(const char *s)
+static int is_empty(struct strbuf *buf)
 {
-	while (*s != '\0') {
-		if (!isspace(*s))
-			return 0;
-		s++;
-	}
-	return 1;
-}
+	const char *cur = buf->buf;
+	const char *end = buf->buf + buf->len;
+
+	while (cur != end && (isspace(*cur)))
+		cur++;
+
+	return cur == end;
+ }
 
 static int then_atom_handler(struct atom_value *atomv, struct ref_formatting_state *state,
 			     struct strbuf *err)
 {
 	struct ref_formatting_stack *cur = state->stack;
 	struct if_then_else *if_then_else = NULL;
+	size_t str_len = 0;
 
 	if (cur->at_end == if_then_else_handler)
 		if_then_else = (struct if_then_else *)cur->at_end_data;
@@ -867,18 +895,22 @@ static int then_atom_handler(struct atom_value *atomv, struct ref_formatting_sta
 	if (if_then_else->else_atom_seen)
 		return strbuf_addf_ret(err, -1, _("format: %%(then) atom used after %%(else)"));
 	if_then_else->then_atom_seen = 1;
+	if (if_then_else->str)
+		str_len = strlen(if_then_else->str);
 	/*
 	 * If the 'equals' or 'notequals' attribute is used then
 	 * perform the required comparison. If not, only non-empty
 	 * strings satisfy the 'if' condition.
 	 */
 	if (if_then_else->cmp_status == COMPARE_EQUAL) {
-		if (!strcmp(if_then_else->str, cur->output.buf))
+		if (str_len == cur->output.len &&
+		    !memcmp(if_then_else->str, cur->output.buf, cur->output.len))
 			if_then_else->condition_satisfied = 1;
 	} else if (if_then_else->cmp_status == COMPARE_UNEQUAL) {
-		if (strcmp(if_then_else->str, cur->output.buf))
+		if (str_len != cur->output.len ||
+		    memcmp(if_then_else->str, cur->output.buf, cur->output.len))
 			if_then_else->condition_satisfied = 1;
-	} else if (cur->output.len && !is_empty(cur->output.buf))
+	} else if (cur->output.len && !is_empty(&cur->output))
 		if_then_else->condition_satisfied = 1;
 	strbuf_reset(&cur->output);
 	return 0;
@@ -924,7 +956,7 @@ static int end_atom_handler(struct atom_value *atomv, struct ref_formatting_stat
 	 * only on the topmost supporting atom.
 	 */
 	if (!current->prev->prev) {
-		quote_formatting(&s, current->output.buf, state->quote_style);
+		quote_formatting(&s, current->output.buf, current->output.len, state->quote_style);
 		strbuf_swap(&current->output, &s);
 	}
 	strbuf_release(&s);
@@ -974,6 +1006,10 @@ int verify_ref_format(struct ref_format *format)
 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
 		if (at < 0)
 			die("%s", err.buf);
+		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
+		    used_atom[at].u.raw_data.option == RAW_BARE)
+			die(_("--format=%.*s cannot be used with"
+			      "--python, --shell, --tcl, --perl"), (int)(ep - sp - 2), sp + 2);
 		cp = ep + 1;
 
 		if (skip_prefix(used_atom[at].name, "color:", &color))
@@ -1362,17 +1398,29 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, struct exp
 	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
 	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
 	void *buf = data->content;
+	unsigned long buf_size = data->size;
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		struct used_atom *atom = &used_atom[i];
 		const char *name = atom->name;
 		struct atom_value *v = &val[i];
+		enum atom_type atom_type = atom->atom_type;
 
 		if (!!deref != (*name == '*'))
 			continue;
 		if (deref)
 			name++;
 
+		if (atom_type == ATOM_RAW) {
+			if (atom->u.raw_data.option == RAW_BARE) {
+				v->s = xmemdupz(buf, buf_size);
+				v->s_size = buf_size;
+			} else if (atom->u.raw_data.option == RAW_LENGTH) {
+				v->s = xstrfmt("%"PRIuMAX, (uintmax_t)buf_size);
+			}
+			continue;
+		}
+
 		if ((data->type != OBJ_TAG &&
 		     data->type != OBJ_COMMIT) ||
 		    (strcmp(name, "body") &&
@@ -1460,9 +1508,11 @@ static void grab_values(struct atom_value *val, int deref, struct object *obj, s
 		break;
 	case OBJ_TREE:
 		/* grab_tree_values(val, deref, obj, buf, sz); */
+		grab_sub_body_contents(val, deref, data);
 		break;
 	case OBJ_BLOB:
 		/* grab_blob_values(val, deref, obj, buf, sz); */
+		grab_sub_body_contents(val, deref, data);
 		break;
 	default:
 		die("Eh?  Object of type %d?", obj->type);
@@ -1766,6 +1816,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 		const char *refname;
 		struct branch *branch = NULL;
 
+		v->s_size = ATOM_VALUE_S_SIZE_INIT;
 		v->handler = append_atom;
 		v->atom = atom;
 
@@ -2369,6 +2420,19 @@ static int compare_detached_head(struct ref_array_item *a, struct ref_array_item
 	return 0;
 }
 
+static int memcasecmp(const void *vs1, const void *vs2, size_t n)
+{
+	const char *s1 = vs1, *s2 = vs2;
+	const char *end = s1 + n;
+
+	for (; s1 < end; s1++, s2++) {
+		int diff = tolower(*s1) - tolower(*s2);
+		if (diff)
+			return diff;
+	}
+	return 0;
+}
+
 static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, struct ref_array_item *b)
 {
 	struct atom_value *va, *vb;
@@ -2389,10 +2453,30 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru
 	} else if (s->sort_flags & REF_SORTING_VERSION) {
 		cmp = versioncmp(va->s, vb->s);
 	} else if (cmp_type == FIELD_STR) {
-		int (*cmp_fn)(const char *, const char *);
-		cmp_fn = s->sort_flags & REF_SORTING_ICASE
-			? strcasecmp : strcmp;
-		cmp = cmp_fn(va->s, vb->s);
+		if (va->s_size == ATOM_VALUE_S_SIZE_INIT &&
+		    vb->s_size == ATOM_VALUE_S_SIZE_INIT) {
+			int (*cmp_fn)(const char *, const char *);
+			cmp_fn = s->sort_flags & REF_SORTING_ICASE
+				? strcasecmp : strcmp;
+			cmp = cmp_fn(va->s, vb->s);
+		} else {
+			size_t a_size = va->s_size == ATOM_VALUE_S_SIZE_INIT ?
+					strlen(va->s) : va->s_size;
+			size_t b_size = vb->s_size == ATOM_VALUE_S_SIZE_INIT ?
+					strlen(vb->s) : vb->s_size;
+			int (*cmp_fn)(const void *, const void *, size_t);
+			cmp_fn = s->sort_flags & REF_SORTING_ICASE
+				? memcasecmp : memcmp;
+
+			cmp = cmp_fn(va->s, vb->s, b_size > a_size ?
+				     a_size : b_size);
+			if (!cmp) {
+				if (a_size > b_size)
+					cmp = 1;
+				else if (a_size < b_size)
+					cmp = -1;
+			}
+		}
 	} else {
 		if (va->value < vb->value)
 			cmp = -1;
@@ -2492,6 +2576,7 @@ int format_ref_array_item(struct ref_array_item *info,
 	}
 	if (format->need_color_reset_at_eol) {
 		struct atom_value resetv;
+		resetv.s_size = ATOM_VALUE_S_SIZE_INIT;
 		resetv.s = GIT_COLOR_RESET;
 		if (append_atom(&resetv, &state, error_buf)) {
 			pop_stack_element(&state.stack);
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 9e0214076b4d..e2867de791e7 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -130,6 +130,8 @@ test_atom head parent:short=10 ''
 test_atom head numparent 0
 test_atom head object ''
 test_atom head type ''
+test_atom head raw "$(git cat-file commit refs/heads/main)
+"
 test_atom head '*objectname' ''
 test_atom head '*objecttype' ''
 test_atom head author 'A U Thor <author@example.com> 1151968724 +0200'
@@ -221,6 +223,15 @@ test_atom tag contents 'Tagging at 1151968727
 '
 test_atom tag HEAD ' '
 
+test_expect_success 'basic atom: refs/tags/testtag *raw' '
+	git cat-file commit refs/tags/testtag^{} >expected &&
+	git for-each-ref --format="%(*raw)" refs/tags/testtag >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	echo "" >>expected.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_expect_success 'Check invalid atoms names are errors' '
 	test_must_fail git for-each-ref --format="%(INVALID)" refs/heads
 '
@@ -686,6 +697,15 @@ test_atom refs/tags/signed-empty contents:body ''
 test_atom refs/tags/signed-empty contents:signature "$sig"
 test_atom refs/tags/signed-empty contents "$sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-empty raw' '
+	git cat-file tag refs/tags/signed-empty >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-empty >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	echo "" >>expected.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_atom refs/tags/signed-short subject 'subject line'
 test_atom refs/tags/signed-short subject:sanitize 'subject-line'
 test_atom refs/tags/signed-short contents:subject 'subject line'
@@ -695,6 +715,15 @@ test_atom refs/tags/signed-short contents:signature "$sig"
 test_atom refs/tags/signed-short contents "subject line
 $sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-short raw' '
+	git cat-file tag refs/tags/signed-short >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-short >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	echo "" >>expected.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_atom refs/tags/signed-long subject 'subject line'
 test_atom refs/tags/signed-long subject:sanitize 'subject-line'
 test_atom refs/tags/signed-long contents:subject 'subject line'
@@ -708,6 +737,15 @@ test_atom refs/tags/signed-long contents "subject line
 body contents
 $sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-long raw' '
+	git cat-file tag refs/tags/signed-long >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-long >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	echo "" >>expected.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_expect_success 'set up refs pointing to tree and blob' '
 	git update-ref refs/mytrees/first refs/heads/main^{tree} &&
 	git update-ref refs/myblobs/first refs/heads/main:one
@@ -720,6 +758,16 @@ test_atom refs/mytrees/first contents:body ""
 test_atom refs/mytrees/first contents:signature ""
 test_atom refs/mytrees/first contents ""
 
+test_expect_success 'basic atom: refs/mytrees/first raw' '
+	git cat-file tree refs/mytrees/first >expected &&
+	echo "" >>expected &&
+	git for-each-ref --format="%(raw)" refs/mytrees/first >actual &&
+	test_cmp expected actual &&
+	git cat-file -s refs/mytrees/first >expected &&
+	git for-each-ref --format="%(raw:size)" refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
 test_atom refs/myblobs/first subject ""
 test_atom refs/myblobs/first contents:subject ""
 test_atom refs/myblobs/first body ""
@@ -727,6 +775,165 @@ test_atom refs/myblobs/first contents:body ""
 test_atom refs/myblobs/first contents:signature ""
 test_atom refs/myblobs/first contents ""
 
+test_expect_success 'basic atom: refs/myblobs/first raw' '
+	git cat-file blob refs/myblobs/first >expected &&
+	echo "" >>expected &&
+	git for-each-ref --format="%(raw)" refs/myblobs/first >actual &&
+	test_cmp expected actual &&
+	git cat-file -s refs/myblobs/first >expected &&
+	git for-each-ref --format="%(raw:size)" refs/myblobs/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'set up refs pointing to binary blob' '
+	printf "a\0b\0c" >blob1 &&
+	printf "a\0c\0b" >blob2 &&
+	printf "\0a\0b\0c" >blob3 &&
+	printf "abc" >blob4 &&
+	printf "\0 \0 \0 " >blob5 &&
+	printf "\0 \0a\0 " >blob6 &&
+	printf "  " >blob7 &&
+	>blob8 &&
+	git hash-object blob1 -w | xargs git update-ref refs/myblobs/blob1 &&
+	git hash-object blob2 -w | xargs git update-ref refs/myblobs/blob2 &&
+	git hash-object blob3 -w | xargs git update-ref refs/myblobs/blob3 &&
+	git hash-object blob4 -w | xargs git update-ref refs/myblobs/blob4 &&
+	git hash-object blob5 -w | xargs git update-ref refs/myblobs/blob5 &&
+	git hash-object blob6 -w | xargs git update-ref refs/myblobs/blob6 &&
+	git hash-object blob7 -w | xargs git update-ref refs/myblobs/blob7 &&
+	git hash-object blob8 -w | xargs git update-ref refs/myblobs/blob8
+'
+
+test_expect_success 'Verify sorts with raw' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob8
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/myblobs/blob3
+	refs/myblobs/blob7
+	refs/mytrees/first
+	refs/myblobs/first
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob4
+	refs/heads/main
+	EOF
+	git for-each-ref --format="%(refname)" --sort=raw \
+		refs/heads/main refs/myblobs/ refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'Verify sorts with raw:size' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob8
+	refs/myblobs/first
+	refs/myblobs/blob7
+	refs/heads/main
+	refs/myblobs/blob4
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob3
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/mytrees/first
+	EOF
+	git for-each-ref --format="%(refname)" --sort=raw:size \
+		refs/heads/main refs/myblobs/ refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'validate raw atom with %(if:equals)' '
+	cat >expected <<-EOF &&
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	refs/myblobs/blob4
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	EOF
+	git for-each-ref --format="%(if:equals=abc)%(raw)%(then)%(refname)%(else)not equals%(end)" \
+		refs/myblobs/ refs/heads/ >actual &&
+	test_cmp expected actual
+'
+test_expect_success 'validate raw atom with %(if:notequals)' '
+	cat >expected <<-EOF &&
+	refs/heads/ambiguous
+	refs/heads/main
+	refs/heads/newtag
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob3
+	equals
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/myblobs/blob7
+	refs/myblobs/blob8
+	refs/myblobs/first
+	EOF
+	git for-each-ref --format="%(if:notequals=abc)%(raw)%(then)%(refname)%(else)equals%(end)" \
+		refs/myblobs/ refs/heads/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'empty raw refs with %(if)' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob1 not empty
+	refs/myblobs/blob2 not empty
+	refs/myblobs/blob3 not empty
+	refs/myblobs/blob4 not empty
+	refs/myblobs/blob5 not empty
+	refs/myblobs/blob6 not empty
+	refs/myblobs/blob7 empty
+	refs/myblobs/blob8 empty
+	refs/myblobs/first not empty
+	EOF
+	git for-each-ref --format="%(refname) %(if)%(raw)%(then)not empty%(else)empty%(end)" \
+		refs/myblobs/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success '%(raw) with --python must failed' '
+	test_must_fail git for-each-ref --format="%(raw)" --python
+'
+
+test_expect_success '%(raw) with --tcl must failed' '
+	test_must_fail git for-each-ref --format="%(raw)" --tcl
+'
+
+test_expect_success '%(raw) with --perl must failed' '
+	test_must_fail git for-each-ref --format="%(raw)" --perl
+'
+
+test_expect_success '%(raw) with --shell must failed' '
+	test_must_fail git for-each-ref --format="%(raw)" --shell
+'
+
+test_expect_success '%(raw) with --shell and --sort=raw must failed' '
+	test_must_fail git for-each-ref --format="%(raw)" --sort=raw --shell
+'
+
+test_expect_success '%(raw:size) with --shell' '
+	git for-each-ref --format="%(raw:size)" | while read line
+	do
+		echo "'\''$line'\''" >>expect
+	done &&
+	git for-each-ref --format="%(raw:size)" --shell >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'for-each-ref --format compare with cat-file --batch' '
+	git rev-parse refs/mytrees/first | git cat-file --batch >expected &&
+	git for-each-ref --format="%(objectname) %(objecttype) %(objectsize)
+%(raw)" refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
 test_expect_success 'set up multiple-sort tags' '
 	for when in 100000 200000
 	do
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH 3/8] [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  2021-06-12 11:14 [PATCH 0/8] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  2021-06-12 11:14 ` [PATCH 1/8] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
  2021-06-12 11:14 ` [PATCH 2/8] [GSOC] ref-filter: add %(raw) atom ZheNing Hu via GitGitGadget
@ 2021-06-12 11:14 ` ZheNing Hu via GitGitGadget
  2021-06-12 11:14 ` [PATCH 4/8] [GSOC] ref-filter: add %(rest) atom ZheNing Hu via GitGitGadget
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-12 11:14 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Use non-const ref_format in *_atom_parser(), which can help us
modify the members of ref_format in *_atom_parser().

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/tag.c |  2 +-
 ref-filter.c  | 44 ++++++++++++++++++++++----------------------
 ref-filter.h  |  4 ++--
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/builtin/tag.c b/builtin/tag.c
index 82fcfc098242..452558ec9575 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -146,7 +146,7 @@ static int verify_tag(const char *name, const char *ref,
 		      const struct object_id *oid, void *cb_data)
 {
 	int flags;
-	const struct ref_format *format = cb_data;
+	struct ref_format *format = cb_data;
 	flags = GPG_VERIFY_VERBOSE;
 
 	if (format->format)
diff --git a/ref-filter.c b/ref-filter.c
index 7822be903071..af8c15aef44d 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -226,7 +226,7 @@ static int strbuf_addf_ret(struct strbuf *sb, int ret, const char *fmt, ...)
 	return ret;
 }
 
-static int color_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int color_atom_parser(struct ref_format *format, struct used_atom *atom,
 			     const char *color_value, struct strbuf *err)
 {
 	if (!color_value)
@@ -264,7 +264,7 @@ static int refname_atom_parser_internal(struct refname_atom *atom, const char *a
 	return 0;
 }
 
-static int remote_ref_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int remote_ref_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	struct string_list params = STRING_LIST_INIT_DUP;
@@ -311,7 +311,7 @@ static int remote_ref_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int objecttype_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int objecttype_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -323,7 +323,7 @@ static int objecttype_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int objectsize_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int objectsize_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	if (!arg) {
@@ -343,7 +343,7 @@ static int objectsize_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int deltabase_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int deltabase_atom_parser(struct ref_format *format, struct used_atom *atom,
 				 const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -355,7 +355,7 @@ static int deltabase_atom_parser(const struct ref_format *format, struct used_at
 	return 0;
 }
 
-static int body_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int body_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -364,7 +364,7 @@ static int body_atom_parser(const struct ref_format *format, struct used_atom *a
 	return 0;
 }
 
-static int subject_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int subject_atom_parser(struct ref_format *format, struct used_atom *atom,
 			       const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -376,7 +376,7 @@ static int subject_atom_parser(const struct ref_format *format, struct used_atom
 	return 0;
 }
 
-static int trailers_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int trailers_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	atom->u.contents.trailer_opts.no_divider = 1;
@@ -402,7 +402,7 @@ static int trailers_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
-static int contents_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int contents_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -430,7 +430,7 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
-static int raw_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int raw_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -442,7 +442,7 @@ static int raw_atom_parser(const struct ref_format *format, struct used_atom *at
 	return 0;
 }
 
-static int oid_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int oid_atom_parser(struct ref_format *format, struct used_atom *atom,
 			   const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -461,7 +461,7 @@ static int oid_atom_parser(const struct ref_format *format, struct used_atom *at
 	return 0;
 }
 
-static int person_email_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int person_email_atom_parser(struct ref_format *format, struct used_atom *atom,
 				    const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -475,7 +475,7 @@ static int person_email_atom_parser(const struct ref_format *format, struct used
 	return 0;
 }
 
-static int refname_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int refname_atom_parser(struct ref_format *format, struct used_atom *atom,
 			       const char *arg, struct strbuf *err)
 {
 	return refname_atom_parser_internal(&atom->u.refname, arg, atom->name, err);
@@ -492,7 +492,7 @@ static align_type parse_align_position(const char *s)
 	return -1;
 }
 
-static int align_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int align_atom_parser(struct ref_format *format, struct used_atom *atom,
 			     const char *arg, struct strbuf *err)
 {
 	struct align *align = &atom->u.align;
@@ -544,7 +544,7 @@ static int align_atom_parser(const struct ref_format *format, struct used_atom *
 	return 0;
 }
 
-static int if_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int if_atom_parser(struct ref_format *format, struct used_atom *atom,
 			  const char *arg, struct strbuf *err)
 {
 	if (!arg) {
@@ -559,7 +559,7 @@ static int if_atom_parser(const struct ref_format *format, struct used_atom *ato
 	return 0;
 }
 
-static int head_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int head_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *unused_err)
 {
 	atom->u.head = resolve_refdup("HEAD", RESOLVE_REF_READING, NULL, NULL);
@@ -570,7 +570,7 @@ static struct {
 	const char *name;
 	info_source source;
 	cmp_type cmp_type;
-	int (*parser)(const struct ref_format *format, struct used_atom *atom,
+	int (*parser)(struct ref_format *format, struct used_atom *atom,
 		      const char *arg, struct strbuf *err);
 } valid_atom[] = {
 	[ATOM_REFNAME] = { "refname", SOURCE_NONE, FIELD_STR, refname_atom_parser },
@@ -649,7 +649,7 @@ struct atom_value {
 /*
  * Used to parse format string and sort specifiers
  */
-static int parse_ref_filter_atom(const struct ref_format *format,
+static int parse_ref_filter_atom(struct ref_format *format,
 				 const char *atom, const char *ep,
 				 struct strbuf *err)
 {
@@ -2546,9 +2546,9 @@ static void append_literal(const char *cp, const char *ep, struct ref_formatting
 }
 
 int format_ref_array_item(struct ref_array_item *info,
-			   const struct ref_format *format,
-			   struct strbuf *final_buf,
-			   struct strbuf *error_buf)
+			  struct ref_format *format,
+			  struct strbuf *final_buf,
+			  struct strbuf *error_buf)
 {
 	const char *cp, *sp, *ep;
 	struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
@@ -2593,7 +2593,7 @@ int format_ref_array_item(struct ref_array_item *info,
 }
 
 void pretty_print_ref(const char *name, const struct object_id *oid,
-		      const struct ref_format *format)
+		      struct ref_format *format)
 {
 	struct ref_array_item *ref_item;
 	struct strbuf output = STRBUF_INIT;
diff --git a/ref-filter.h b/ref-filter.h
index baf72a718965..74fb423fc89f 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -116,7 +116,7 @@ void ref_array_sort(struct ref_sorting *sort, struct ref_array *array);
 void ref_sorting_set_sort_flags_all(struct ref_sorting *sorting, unsigned int mask, int on);
 /*  Based on the given format and quote_style, fill the strbuf */
 int format_ref_array_item(struct ref_array_item *info,
-			  const struct ref_format *format,
+			  struct ref_format *format,
 			  struct strbuf *final_buf,
 			  struct strbuf *error_buf);
 /*  Parse a single sort specifier and add it to the list */
@@ -137,7 +137,7 @@ void setup_ref_filter_porcelain_msg(void);
  * name must be a fully qualified refname.
  */
 void pretty_print_ref(const char *name, const struct object_id *oid,
-		      const struct ref_format *format);
+		      struct ref_format *format);
 
 /*
  * Push a single ref onto the array; this can be used to construct your own
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH 4/8] [GSOC] ref-filter: add %(rest) atom
  2021-06-12 11:14 [PATCH 0/8] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                   ` (2 preceding siblings ...)
  2021-06-12 11:14 ` [PATCH 3/8] [GSOC] ref-filter: use non-const ref_format in *_atom_parser() ZheNing Hu via GitGitGadget
@ 2021-06-12 11:14 ` ZheNing Hu via GitGitGadget
  2021-06-12 11:14 ` [PATCH 5/8] [GSOC] ref-filter: teach get_object() return useful value ZheNing Hu via GitGitGadget
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-12 11:14 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

In order to let "cat-file --batch=%(rest)" use the ref-filter
interface, add %(rest) atom for ref-filter. "git for-each-ref",
"git branch", "git tag" and "git verify-tag" will reject %(rest)
by default.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c             | 21 +++++++++++++++++++++
 ref-filter.h             |  5 ++++-
 t/t3203-branch-output.sh |  4 ++++
 t/t6300-for-each-ref.sh  |  4 ++++
 t/t7004-tag.sh           |  4 ++++
 t/t7030-verify-tag.sh    |  4 ++++
 6 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/ref-filter.c b/ref-filter.c
index af8c15aef44d..8868cf98f090 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -157,6 +157,7 @@ enum atom_type {
 	ATOM_IF,
 	ATOM_THEN,
 	ATOM_ELSE,
+	ATOM_REST,
 };
 
 /*
@@ -559,6 +560,15 @@ static int if_atom_parser(struct ref_format *format, struct used_atom *atom,
 	return 0;
 }
 
+static int rest_atom_parser(struct ref_format *format, struct used_atom *atom,
+			    const char *arg, struct strbuf *err)
+{
+	if (arg)
+		return strbuf_addf_ret(err, -1, _("%%(rest) does not take arguments"));
+	format->use_rest = 1;
+	return 0;
+}
+
 static int head_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *unused_err)
 {
@@ -615,6 +625,7 @@ static struct {
 	[ATOM_IF] = { "if", SOURCE_NONE, FIELD_STR, if_atom_parser },
 	[ATOM_THEN] = { "then", SOURCE_NONE },
 	[ATOM_ELSE] = { "else", SOURCE_NONE },
+	[ATOM_REST] = { "rest", SOURCE_NONE, FIELD_STR, rest_atom_parser },
 	/*
 	 * Please update $__git_ref_fieldlist in git-completion.bash
 	 * when you add new atoms
@@ -1006,6 +1017,9 @@ int verify_ref_format(struct ref_format *format)
 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
 		if (at < 0)
 			die("%s", err.buf);
+		if (used_atom[at].atom_type == ATOM_REST)
+			die("this command reject atom %%(%.*s)", (int)(ep - sp - 2), sp + 2);
+
 		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
 		    used_atom[at].u.raw_data.option == RAW_BARE)
 			die(_("--format=%.*s cannot be used with"
@@ -1920,6 +1934,12 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 			v->handler = else_atom_handler;
 			v->s = xstrdup("");
 			continue;
+		} else if (atom_type == ATOM_REST) {
+			if (ref->rest)
+				v->s = xstrdup(ref->rest);
+			else
+				v->s = xstrdup("");
+			continue;
 		} else
 			continue;
 
@@ -2137,6 +2157,7 @@ static struct ref_array_item *new_ref_array_item(const char *refname,
 
 	FLEX_ALLOC_STR(ref, refname, refname);
 	oidcpy(&ref->objectname, oid);
+	ref->rest = NULL;
 
 	return ref;
 }
diff --git a/ref-filter.h b/ref-filter.h
index 74fb423fc89f..9dc07476a584 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -38,6 +38,7 @@ struct ref_sorting {
 
 struct ref_array_item {
 	struct object_id objectname;
+	const char *rest;
 	int flag;
 	unsigned int kind;
 	const char *symref;
@@ -76,14 +77,16 @@ struct ref_format {
 	 * verify_ref_format() afterwards to finalize.
 	 */
 	const char *format;
+	const char *rest;
 	int quote_style;
+	int use_rest;
 	int use_color;
 
 	/* Internal state to ref-filter */
 	int need_color_reset_at_eol;
 };
 
-#define REF_FORMAT_INIT { NULL, 0, -1 }
+#define REF_FORMAT_INIT { NULL, NULL, 0, 0, -1 }
 
 /*  Macros for checking --merged and --no-merged options */
 #define _OPT_MERGED_NO_MERGED(option, filter, h) \
diff --git a/t/t3203-branch-output.sh b/t/t3203-branch-output.sh
index 5325b9f67a00..2780ec8803fd 100755
--- a/t/t3203-branch-output.sh
+++ b/t/t3203-branch-output.sh
@@ -340,6 +340,10 @@ test_expect_success 'git branch --format option' '
 	test_cmp expect actual
 '
 
+test_expect_success 'git branch with --format=%(rest) must failed' '
+	test_must_fail git branch --format="%(rest)" >actual
+'
+
 test_expect_success 'worktree colors correct' '
 	cat >expect <<-EOF &&
 	* <GREEN>(HEAD detached from fromtag)<RESET>
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index e2867de791e7..8c97c3b877c6 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -1187,6 +1187,10 @@ test_expect_success 'basic atom: head contents:trailers' '
 	test_cmp expect actual.clean
 '
 
+test_expect_success 'basic atom: rest must failed' '
+	test_must_fail git for-each-ref --format="%(rest)" refs/heads/main
+'
+
 test_expect_success 'trailer parsing not fooled by --- line' '
 	git commit --allow-empty -F - <<-\EOF &&
 	this is the subject
diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
index 2f72c5c6883e..9fc4c4323949 100755
--- a/t/t7004-tag.sh
+++ b/t/t7004-tag.sh
@@ -1998,6 +1998,10 @@ test_expect_success '--format should list tags as per format given' '
 	test_cmp expect actual
 '
 
+test_expect_success 'git tag -l with --format="%(rest)" must failed' '
+	test_must_fail git tag -l --format="%(rest)" "v1*"
+'
+
 test_expect_success "set up color tests" '
 	echo "<RED>v1.0<RESET>" >expect.color &&
 	echo "v1.0" >expect.bare &&
diff --git a/t/t7030-verify-tag.sh b/t/t7030-verify-tag.sh
index 3cefde9602bf..785b32eb88f9 100755
--- a/t/t7030-verify-tag.sh
+++ b/t/t7030-verify-tag.sh
@@ -194,6 +194,10 @@ test_expect_success GPG 'verifying tag with --format' '
 	test_cmp expect actual
 '
 
+test_expect_success GPG 'verifying tag with --format="%(rest)" must failed' '
+	test_must_fail git verify-tag --format="%(rest)" "fourth-signed"
+'
+
 test_expect_success GPG 'verifying a forged tag with --format should fail silently' '
 	test_must_fail git verify-tag --format="tagname : %(tag)" $(cat forged1.tag) >actual-forged &&
 	test_must_be_empty actual-forged
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH 5/8] [GSOC] ref-filter: teach get_object() return useful value
  2021-06-12 11:14 [PATCH 0/8] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                   ` (3 preceding siblings ...)
  2021-06-12 11:14 ` [PATCH 4/8] [GSOC] ref-filter: add %(rest) atom ZheNing Hu via GitGitGadget
@ 2021-06-12 11:14 ` ZheNing Hu via GitGitGadget
  2021-06-12 20:09   ` Christian Couder
  2021-06-17  7:22   ` Ævar Arnfjörð Bjarmason
  2021-06-12 11:14 ` [PATCH 6/8] [GSOC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                   ` (3 subsequent siblings)
  8 siblings, 2 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-12 11:14 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Let `populate_value()`, `get_ref_atom_value()` and
`format_ref_array_item()` get the return value of `get_value()`
correctly. This can help us later let `cat-file --batch` get the
correct error message and return value of `get_value()`.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 8868cf98f090..420c0bf9384f 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1808,7 +1808,7 @@ static char *get_worktree_path(const struct used_atom *atom, const struct ref_ar
 static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 {
 	struct object *obj;
-	int i;
+	int i, ret = 0;
 	struct object_info empty = OBJECT_INFO_INIT;
 
 	CALLOC_ARRAY(ref->value, used_atom_cnt);
@@ -1965,8 +1965,8 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 
 
 	oi.oid = ref->objectname;
-	if (get_object(ref, 0, &obj, &oi, err))
-		return -1;
+	if ((ret = get_object(ref, 0, &obj, &oi, err)))
+		return ret;
 
 	/*
 	 * If there is no atom that wants to know about tagged
@@ -1997,9 +1997,11 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 static int get_ref_atom_value(struct ref_array_item *ref, int atom,
 			      struct atom_value **v, struct strbuf *err)
 {
+	int ret = 0;
+
 	if (!ref->value) {
-		if (populate_value(ref, err))
-			return -1;
+		if ((ret = populate_value(ref, err)))
+			return ret;
 		fill_missing_values(ref->value);
 	}
 	*v = &ref->value[atom];
@@ -2573,6 +2575,7 @@ int format_ref_array_item(struct ref_array_item *info,
 {
 	const char *cp, *sp, *ep;
 	struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
+	int ret = 0;
 
 	state.quote_style = format->quote_style;
 	push_stack_element(&state.stack);
@@ -2585,10 +2588,10 @@ int format_ref_array_item(struct ref_array_item *info,
 		if (cp < sp)
 			append_literal(cp, sp, &state);
 		pos = parse_ref_filter_atom(format, sp + 2, ep, error_buf);
-		if (pos < 0 || get_ref_atom_value(info, pos, &atomv, error_buf) ||
+		if (pos < 0 || (ret = get_ref_atom_value(info, pos, &atomv, error_buf)) ||
 		    atomv->handler(atomv, &state, error_buf)) {
 			pop_stack_element(&state.stack);
-			return -1;
+			return ret ? ret : -1;
 		}
 	}
 	if (*cp) {
@@ -2610,7 +2613,7 @@ int format_ref_array_item(struct ref_array_item *info,
 	}
 	strbuf_addbuf(final_buf, &state.stack->output);
 	pop_stack_element(&state.stack);
-	return 0;
+	return ret;
 }
 
 void pretty_print_ref(const char *name, const struct object_id *oid,
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH 6/8] [GSOC] cat-file: reuse ref-filter logic
  2021-06-12 11:14 [PATCH 0/8] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                   ` (4 preceding siblings ...)
  2021-06-12 11:14 ` [PATCH 5/8] [GSOC] ref-filter: teach get_object() return useful value ZheNing Hu via GitGitGadget
@ 2021-06-12 11:14 ` ZheNing Hu via GitGitGadget
  2021-06-12 11:14 ` [PATCH 7/8] [GSOC] cat-file: reuse err buf in batch_objet_write() ZheNing Hu via GitGitGadget
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-12 11:14 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

In order to let cat-file use ref-filter logic, the following
methods are used:

1. Add `cat_file_mode` member in struct `ref_format`, this can
help us reject atoms in verify_ref_format() which cat-file
cannot use, e.g. `%(refname)`, `%(push)`, `%(upstream)`...
2. Change the type of member `format` in struct `batch_options`
to `ref_format`, We can add format data in it.
3. Let `batch_objects()` add atoms to format, and use
`verify_ref_format()` to check atoms.
4. Use `has_object_file()` in `batch_one_object()` to check
whether the input object exists.
5. Use `format_ref_array_item()` in `batch_object_write()` to
get the formatted data corresponding to the object. If the
return value of `format_ref_array_item()` is equals to zero,
use `batch_write()` to print object data; else if the return
value less than zero, use `die()` to print the error message
and exit; else return value greater than zero, only print the
error message, but not exit.
6. Let get_object() return 1 and print "<oid> missing" instead
of returning -1 and printing "missing object <oid> for <refname>",
this can help `format_ref_array_item()` just report that the
object is missing without letting Git exit.

Most of the atoms in `for-each-ref --format` are now supported,
such as `%(tree)`, `%(parent)`, `%(author)`, `%(tagger)`, `%(if)`,
`%(then)`, `%(else)`, `%(end)`. But these atoms will be rejected:
`%(refname)`, `%(symref)`, `%(upstream)`, `%(push)`, `%(worktreepath)`,
`%(flag)`, `%(HEAD)`, because our objects don't have refname.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-cat-file.txt |   6 +
 builtin/cat-file.c             | 250 +++++++-------------------------
 ref-filter.c                   |  15 +-
 ref-filter.h                   |   3 +-
 t/t1006-cat-file.sh            | 252 +++++++++++++++++++++++++++++++++
 t/t6301-for-each-ref-errors.sh |   2 +-
 6 files changed, 324 insertions(+), 204 deletions(-)

diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index 4eb0421b3fd9..ef8ab952b2fa 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -226,6 +226,12 @@ newline. The available atoms are:
 	after that first run of whitespace (i.e., the "rest" of the
 	line) are output in place of the `%(rest)` atom.
 
+Note that most of the atoms in `for-each-ref --format` are now supported,
+such as `%(tree)`, `%(parent)`, `%(author)`, `%(tagger)`, `%(if)`,
+`%(then)`, `%(else)`, `%(end)`. But these atoms will be rejected:
+`%(refname)`, `%(symref)`, `%(upstream)`, `%(push)`, `%(worktreepath)`,
+`%(flag)`, `%(HEAD)`. See linkgit:git-for-each-ref[1].
+
 If no format is specified, the default format is `%(objectname)
 %(objecttype) %(objectsize)`.
 
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 5ebf13359e83..0bc524e656e1 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -16,6 +16,7 @@
 #include "packfile.h"
 #include "object-store.h"
 #include "promisor-remote.h"
+#include "ref-filter.h"
 
 struct batch_options {
 	int enabled;
@@ -25,7 +26,7 @@ struct batch_options {
 	int all_objects;
 	int unordered;
 	int cmdmode; /* may be 'w' or 'c' for --filters or --textconv */
-	const char *format;
+	struct ref_format format;
 };
 
 static const char *force_path;
@@ -195,99 +196,10 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
 
 struct expand_data {
 	struct object_id oid;
-	enum object_type type;
-	unsigned long size;
-	off_t disk_size;
 	const char *rest;
-	struct object_id delta_base_oid;
-
-	/*
-	 * If mark_query is true, we do not expand anything, but rather
-	 * just mark the object_info with items we wish to query.
-	 */
-	int mark_query;
-
-	/*
-	 * Whether to split the input on whitespace before feeding it to
-	 * get_sha1; this is decided during the mark_query phase based on
-	 * whether we have a %(rest) token in our format.
-	 */
 	int split_on_whitespace;
-
-	/*
-	 * After a mark_query run, this object_info is set up to be
-	 * passed to oid_object_info_extended. It will point to the data
-	 * elements above, so you can retrieve the response from there.
-	 */
-	struct object_info info;
-
-	/*
-	 * This flag will be true if the requested batch format and options
-	 * don't require us to call oid_object_info, which can then be
-	 * optimized out.
-	 */
-	unsigned skip_object_info : 1;
 };
 
-static int is_atom(const char *atom, const char *s, int slen)
-{
-	int alen = strlen(atom);
-	return alen == slen && !memcmp(atom, s, alen);
-}
-
-static void expand_atom(struct strbuf *sb, const char *atom, int len,
-			void *vdata)
-{
-	struct expand_data *data = vdata;
-
-	if (is_atom("objectname", atom, len)) {
-		if (!data->mark_query)
-			strbuf_addstr(sb, oid_to_hex(&data->oid));
-	} else if (is_atom("objecttype", atom, len)) {
-		if (data->mark_query)
-			data->info.typep = &data->type;
-		else
-			strbuf_addstr(sb, type_name(data->type));
-	} else if (is_atom("objectsize", atom, len)) {
-		if (data->mark_query)
-			data->info.sizep = &data->size;
-		else
-			strbuf_addf(sb, "%"PRIuMAX , (uintmax_t)data->size);
-	} else if (is_atom("objectsize:disk", atom, len)) {
-		if (data->mark_query)
-			data->info.disk_sizep = &data->disk_size;
-		else
-			strbuf_addf(sb, "%"PRIuMAX, (uintmax_t)data->disk_size);
-	} else if (is_atom("rest", atom, len)) {
-		if (data->mark_query)
-			data->split_on_whitespace = 1;
-		else if (data->rest)
-			strbuf_addstr(sb, data->rest);
-	} else if (is_atom("deltabase", atom, len)) {
-		if (data->mark_query)
-			data->info.delta_base_oid = &data->delta_base_oid;
-		else
-			strbuf_addstr(sb,
-				      oid_to_hex(&data->delta_base_oid));
-	} else
-		die("unknown format element: %.*s", len, atom);
-}
-
-static size_t expand_format(struct strbuf *sb, const char *start, void *data)
-{
-	const char *end;
-
-	if (*start != '(')
-		return 0;
-	end = strchr(start + 1, ')');
-	if (!end)
-		die("format element '%s' does not end in ')'", start);
-
-	expand_atom(sb, start + 1, end - start - 1, data);
-
-	return end - start + 1;
-}
-
 static void batch_write(struct batch_options *opt, const void *data, int len)
 {
 	if (opt->buffer_output) {
@@ -297,86 +209,31 @@ static void batch_write(struct batch_options *opt, const void *data, int len)
 		write_or_die(1, data, len);
 }
 
-static void print_object_or_die(struct batch_options *opt, struct expand_data *data)
-{
-	const struct object_id *oid = &data->oid;
-
-	assert(data->info.typep);
-
-	if (data->type == OBJ_BLOB) {
-		if (opt->buffer_output)
-			fflush(stdout);
-		if (opt->cmdmode) {
-			char *contents;
-			unsigned long size;
-
-			if (!data->rest)
-				die("missing path for '%s'", oid_to_hex(oid));
-
-			if (opt->cmdmode == 'w') {
-				if (filter_object(data->rest, 0100644, oid,
-						  &contents, &size))
-					die("could not convert '%s' %s",
-					    oid_to_hex(oid), data->rest);
-			} else if (opt->cmdmode == 'c') {
-				enum object_type type;
-				if (!textconv_object(the_repository,
-						     data->rest, 0100644, oid,
-						     1, &contents, &size))
-					contents = read_object_file(oid,
-								    &type,
-								    &size);
-				if (!contents)
-					die("could not convert '%s' %s",
-					    oid_to_hex(oid), data->rest);
-			} else
-				BUG("invalid cmdmode: %c", opt->cmdmode);
-			batch_write(opt, contents, size);
-			free(contents);
-		} else {
-			stream_blob(oid);
-		}
-	}
-	else {
-		enum object_type type;
-		unsigned long size;
-		void *contents;
-
-		contents = read_object_file(oid, &type, &size);
-		if (!contents)
-			die("object %s disappeared", oid_to_hex(oid));
-		if (type != data->type)
-			die("object %s changed type!?", oid_to_hex(oid));
-		if (data->info.sizep && size != data->size)
-			die("object %s changed size!?", oid_to_hex(oid));
-
-		batch_write(opt, contents, size);
-		free(contents);
-	}
-}
 
 static void batch_object_write(const char *obj_name,
 			       struct strbuf *scratch,
 			       struct batch_options *opt,
 			       struct expand_data *data)
 {
-	if (!data->skip_object_info &&
-	    oid_object_info_extended(the_repository, &data->oid, &data->info,
-				     OBJECT_INFO_LOOKUP_REPLACE) < 0) {
-		printf("%s missing\n",
-		       obj_name ? obj_name : oid_to_hex(&data->oid));
-		fflush(stdout);
-		return;
-	}
+	int ret = 0;
+	struct strbuf err = STRBUF_INIT;
+	struct ref_array_item item = { data->oid, data->rest };
 
 	strbuf_reset(scratch);
-	strbuf_expand(scratch, opt->format, expand_format, data);
-	strbuf_addch(scratch, '\n');
-	batch_write(opt, scratch->buf, scratch->len);
 
-	if (opt->print_contents) {
-		print_object_or_die(opt, data);
-		batch_write(opt, "\n", 1);
+	ret = format_ref_array_item(&item, &opt->format, scratch, &err);
+	if (!ret) {
+		strbuf_addch(scratch, '\n');
+		batch_write(opt, scratch->buf, scratch->len);
+		strbuf_release(&err);
+	} else if (ret < 0) {
+		die("%s\n", err.buf);
+		strbuf_release(&err);
+	} else {
+		/* when ret > 0 , don't call die and print the err to stdout*/
+		printf("%s\n", err.buf);
+		fflush(stdout);
+		strbuf_release(&err);
 	}
 }
 
@@ -428,6 +285,13 @@ static void batch_one_object(const char *obj_name,
 		return;
 	}
 
+	if (!has_object_file(&data->oid)) {
+		printf("%s missing\n",
+		       obj_name ? obj_name : oid_to_hex(&data->oid));
+		fflush(stdout);
+		return;
+	}
+
 	batch_object_write(obj_name, scratch, opt, data);
 }
 
@@ -488,42 +352,34 @@ static int batch_unordered_packed(const struct object_id *oid,
 	return batch_unordered_object(oid, data);
 }
 
-static int batch_objects(struct batch_options *opt)
+static const char * const cat_file_usage[] = {
+	N_("git cat-file (-t [--allow-unknown-type] | -s [--allow-unknown-type] | -e | -p | <type> | --textconv | --filters) [--path=<path>] <object>"),
+	N_("git cat-file (--batch[=<format>] | --batch-check[=<format>]) [--follow-symlinks] [--textconv | --filters]"),
+	NULL
+};
+
+static int batch_objects(struct batch_options *opt, const struct option *options)
 {
 	struct strbuf input = STRBUF_INIT;
 	struct strbuf output = STRBUF_INIT;
+	struct strbuf format = STRBUF_INIT;
 	struct expand_data data;
 	int save_warning;
 	int retval = 0;
 
-	if (!opt->format)
-		opt->format = "%(objectname) %(objecttype) %(objectsize)";
-
-	/*
-	 * Expand once with our special mark_query flag, which will prime the
-	 * object_info to be handed to oid_object_info_extended for each
-	 * object.
-	 */
 	memset(&data, 0, sizeof(data));
-	data.mark_query = 1;
-	strbuf_expand(&output, opt->format, expand_format, &data);
-	data.mark_query = 0;
-	strbuf_release(&output);
-	if (opt->cmdmode)
-		data.split_on_whitespace = 1;
-
-	if (opt->all_objects) {
-		struct object_info empty = OBJECT_INFO_INIT;
-		if (!memcmp(&data.info, &empty, sizeof(empty)))
-			data.skip_object_info = 1;
-	}
-
-	/*
-	 * If we are printing out the object, then always fill in the type,
-	 * since we will want to decide whether or not to stream.
-	 */
+	if (!opt->format.format)
+		strbuf_addstr(&format, "%(objectname) %(objecttype) %(objectsize)");
+	else
+		strbuf_addstr(&format, opt->format.format);
 	if (opt->print_contents)
-		data.info.typep = &data.type;
+		strbuf_addstr(&format, "\n%(raw)");
+	opt->format.format = format.buf;
+	if (verify_ref_format(&opt->format))
+		usage_with_options(cat_file_usage, options);
+
+	if (opt->cmdmode || opt->format.use_rest)
+		data.split_on_whitespace = 1;
 
 	if (opt->all_objects) {
 		struct object_cb_data cb;
@@ -556,6 +412,7 @@ static int batch_objects(struct batch_options *opt)
 			oid_array_clear(&sa);
 		}
 
+		strbuf_release(&format);
 		strbuf_release(&output);
 		return 0;
 	}
@@ -587,19 +444,13 @@ static int batch_objects(struct batch_options *opt)
 
 		batch_one_object(input.buf, &output, opt, &data);
 	}
-
+	strbuf_release(&format);
 	strbuf_release(&input);
 	strbuf_release(&output);
 	warn_on_object_refname_ambiguity = save_warning;
 	return retval;
 }
 
-static const char * const cat_file_usage[] = {
-	N_("git cat-file (-t [--allow-unknown-type] | -s [--allow-unknown-type] | -e | -p | <type> | --textconv | --filters) [--path=<path>] <object>"),
-	N_("git cat-file (--batch[=<format>] | --batch-check[=<format>]) [--follow-symlinks] [--textconv | --filters]"),
-	NULL
-};
-
 static int git_cat_file_config(const char *var, const char *value, void *cb)
 {
 	if (userdiff_config(var, value) < 0)
@@ -622,7 +473,7 @@ static int batch_option_callback(const struct option *opt,
 
 	bo->enabled = 1;
 	bo->print_contents = !strcmp(opt->long_name, "batch");
-	bo->format = arg;
+	bo->format.format = arg;
 
 	return 0;
 }
@@ -631,7 +482,9 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 {
 	int opt = 0;
 	const char *exp_type = NULL, *obj_name = NULL;
-	struct batch_options batch = {0};
+	struct batch_options batch = {
+		.format = REF_FORMAT_INIT
+	};
 	int unknown_type = 0;
 
 	const struct option options[] = {
@@ -670,6 +523,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 	git_config(git_cat_file_config, NULL);
 
 	batch.buffer_output = -1;
+	batch.format.cat_file_mode = 1;
 	argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
 
 	if (opt) {
@@ -713,7 +567,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		batch.buffer_output = batch.all_objects;
 
 	if (batch.enabled)
-		return batch_objects(&batch);
+		return batch_objects(&batch, options);
 
 	if (unknown_type && opt != 't' && opt != 's')
 		die("git cat-file --allow-unknown-type: use with -s or -t");
diff --git a/ref-filter.c b/ref-filter.c
index 420c0bf9384f..d4c88d496698 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1017,8 +1017,15 @@ int verify_ref_format(struct ref_format *format)
 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
 		if (at < 0)
 			die("%s", err.buf);
-		if (used_atom[at].atom_type == ATOM_REST)
-			die("this command reject atom %%(%.*s)", (int)(ep - sp - 2), sp + 2);
+		if ((!format->cat_file_mode && used_atom[at].atom_type == ATOM_REST) ||
+		    (format->cat_file_mode && (used_atom[at].atom_type == ATOM_FLAG ||
+					       used_atom[at].atom_type == ATOM_HEAD ||
+					       used_atom[at].atom_type == ATOM_PUSH ||
+					       used_atom[at].atom_type == ATOM_REFNAME ||
+					       used_atom[at].atom_type == ATOM_SYMREF ||
+					       used_atom[at].atom_type == ATOM_UPSTREAM ||
+					       used_atom[at].atom_type == ATOM_WORKTREEPATH)))
+			die(_("this command reject atom %%(%.*s)"), (int)(ep - sp - 2), sp + 2);
 
 		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
 		    used_atom[at].u.raw_data.option == RAW_BARE)
@@ -1735,8 +1742,8 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 	}
 	if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
 				     OBJECT_INFO_LOOKUP_REPLACE))
-		return strbuf_addf_ret(err, -1, _("missing object %s for %s"),
-				       oid_to_hex(&oi->oid), ref->refname);
+		return strbuf_addf_ret(err, 1, _("%s missing"),
+				       oid_to_hex(&oi->oid));
 	if (oi->info.disk_sizep && oi->disk_size < 0)
 		BUG("Object size is less than zero.");
 
diff --git a/ref-filter.h b/ref-filter.h
index 9dc07476a584..bece9583cf18 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -78,6 +78,7 @@ struct ref_format {
 	 */
 	const char *format;
 	const char *rest;
+	int cat_file_mode;
 	int quote_style;
 	int use_rest;
 	int use_color;
@@ -86,7 +87,7 @@ struct ref_format {
 	int need_color_reset_at_eol;
 };
 
-#define REF_FORMAT_INIT { NULL, NULL, 0, 0, -1 }
+#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }
 
 /*  Macros for checking --merged and --no-merged options */
 #define _OPT_MERGED_NO_MERGED(option, filter, h) \
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 5d2dc99b74ad..5efa7397cfbc 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -586,4 +586,256 @@ test_expect_success 'cat-file --unordered works' '
 	test_cmp expect actual
 '
 
+. "$TEST_DIRECTORY"/lib-gpg.sh
+. "$TEST_DIRECTORY"/lib-terminal.sh
+
+test_expect_success 'cat-file --batch|--batch-check setup' '
+	echo 1>blob1 &&
+	printf "a\0b\0\c" >blob2 &&
+	git add blob1 blob2 &&
+	git commit -m "Commit Message" &&
+	git branch -M main &&
+	git tag -a -m "v0.0.0" testtag &&
+	git update-ref refs/myblobs/blob1 HEAD:blob1 &&
+	git update-ref refs/myblobs/blob2 HEAD:blob2 &&
+	git update-ref refs/mytrees/tree1 HEAD^{tree}
+'
+
+batch_test_atom() {
+	if test "$3" = "fail"
+	then
+		test_expect_${4:-success} $PREREQ "basic atom: $1 $2 mast failed" "
+			test_must_fail git cat-file --batch-check='$2' >bad <<-EOF
+			$1
+			EOF
+		"
+	else
+		test_expect_${4:-success} $PREREQ "basic atom: $1 $2" "
+			git for-each-ref --format='$2' $1 >expected &&
+			git cat-file --batch-check='$2' >actual <<-EOF &&
+			$1
+			EOF
+			sanitize_pgp <actual >actual.clean &&
+			cmp expected actual.clean
+		"
+	fi
+}
+
+batch_test_atom refs/heads/main '%(refname)' fail
+batch_test_atom refs/heads/main '%(refname:)' fail
+batch_test_atom refs/heads/main '%(refname:short)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=1)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=2)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=-1)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=-2)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=1)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=2)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=-1)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=-2)' fail
+batch_test_atom refs/heads/main '%(refname:strip=1)' fail
+batch_test_atom refs/heads/main '%(refname:strip=2)' fail
+batch_test_atom refs/heads/main '%(refname:strip=-1)' fail
+batch_test_atom refs/heads/main '%(refname:strip=-2)' fail
+batch_test_atom refs/heads/main '%(upstream)' fail
+batch_test_atom refs/heads/main '%(upstream:short)' fail
+batch_test_atom refs/heads/main '%(upstream:lstrip=2)' fail
+batch_test_atom refs/heads/main '%(upstream:lstrip=-2)' fail
+batch_test_atom refs/heads/main '%(upstream:rstrip=2)' fail
+batch_test_atom refs/heads/main '%(upstream:rstrip=-2)' fail
+batch_test_atom refs/heads/main '%(upstream:strip=2)' fail
+batch_test_atom refs/heads/main '%(upstream:strip=-2)' fail
+batch_test_atom refs/heads/main '%(push)' fail
+batch_test_atom refs/heads/main '%(push:short)' fail
+batch_test_atom refs/heads/main '%(push:lstrip=1)' fail
+batch_test_atom refs/heads/main '%(push:lstrip=-1)' fail
+batch_test_atom refs/heads/main '%(push:rstrip=1)' fail
+batch_test_atom refs/heads/main '%(push:rstrip=-1)' fail
+batch_test_atom refs/heads/main '%(push:strip=1)' fail
+batch_test_atom refs/heads/main '%(push:strip=-1)' fail
+batch_test_atom refs/heads/main '%(objecttype)'
+batch_test_atom refs/heads/main '%(objectsize)'
+batch_test_atom refs/heads/main '%(objectsize:disk)'
+batch_test_atom refs/heads/main '%(deltabase)'
+batch_test_atom refs/heads/main '%(objectname)'
+batch_test_atom refs/heads/main '%(objectname:short)'
+batch_test_atom refs/heads/main '%(objectname:short=1)'
+batch_test_atom refs/heads/main '%(objectname:short=10)'
+batch_test_atom refs/heads/main '%(tree)'
+batch_test_atom refs/heads/main '%(tree:short)'
+batch_test_atom refs/heads/main '%(tree:short=1)'
+batch_test_atom refs/heads/main '%(tree:short=10)'
+batch_test_atom refs/heads/main '%(parent)'
+batch_test_atom refs/heads/main '%(parent:short)'
+batch_test_atom refs/heads/main '%(parent:short=1)'
+batch_test_atom refs/heads/main '%(parent:short=10)'
+batch_test_atom refs/heads/main '%(numparent)'
+batch_test_atom refs/heads/main '%(object)'
+batch_test_atom refs/heads/main '%(type)'
+batch_test_atom refs/heads/main '%(raw)'
+batch_test_atom refs/heads/main '%(*objectname)'
+batch_test_atom refs/heads/main '%(*objecttype)'
+batch_test_atom refs/heads/main '%(author)'
+batch_test_atom refs/heads/main '%(authorname)'
+batch_test_atom refs/heads/main '%(authoremail)'
+batch_test_atom refs/heads/main '%(authoremail:trim)'
+batch_test_atom refs/heads/main '%(authoremail:localpart)'
+batch_test_atom refs/heads/main '%(authordate)'
+batch_test_atom refs/heads/main '%(committer)'
+batch_test_atom refs/heads/main '%(committername)'
+batch_test_atom refs/heads/main '%(committeremail)'
+batch_test_atom refs/heads/main '%(committeremail:trim)'
+batch_test_atom refs/heads/main '%(committeremail:localpart)'
+batch_test_atom refs/heads/main '%(committerdate)'
+batch_test_atom refs/heads/main '%(tag)'
+batch_test_atom refs/heads/main '%(tagger)'
+batch_test_atom refs/heads/main '%(taggername)'
+batch_test_atom refs/heads/main '%(taggeremail)'
+batch_test_atom refs/heads/main '%(taggeremail:trim)'
+batch_test_atom refs/heads/main '%(taggeremail:localpart)'
+batch_test_atom refs/heads/main '%(taggerdate)'
+batch_test_atom refs/heads/main '%(creator)'
+batch_test_atom refs/heads/main '%(creatordate)'
+batch_test_atom refs/heads/main '%(subject)'
+batch_test_atom refs/heads/main '%(subject:sanitize)'
+batch_test_atom refs/heads/main '%(contents:subject)'
+batch_test_atom refs/heads/main '%(body)'
+batch_test_atom refs/heads/main '%(contents:body)'
+batch_test_atom refs/heads/main '%(contents:signature)'
+batch_test_atom refs/heads/main '%(contents)'
+batch_test_atom refs/heads/main '%(HEAD)' fail
+batch_test_atom refs/heads/main '%(upstream:track)' fail
+batch_test_atom refs/heads/main '%(upstream:trackshort)' fail
+batch_test_atom refs/heads/main '%(upstream:track,nobracket)' fail
+batch_test_atom refs/heads/main '%(upstream:nobracket,track)' fail
+batch_test_atom refs/heads/main '%(push:track)' fail
+batch_test_atom refs/heads/main '%(push:trackshort)' fail
+batch_test_atom refs/heads/main '%(worktreepath)' fail
+batch_test_atom refs/heads/main '%(symref)' fail
+batch_test_atom refs/heads/main '%(flag)' fail
+
+batch_test_atom refs/tags/testtag '%(refname)' fail
+batch_test_atom refs/tags/testtag '%(refname:short)' fail
+batch_test_atom refs/tags/testtag '%(upstream)' fail
+batch_test_atom refs/tags/testtag '%(push)' fail
+batch_test_atom refs/tags/testtag '%(objecttype)'
+batch_test_atom refs/tags/testtag '%(objectsize)'
+batch_test_atom refs/tags/testtag '%(objectsize:disk)'
+batch_test_atom refs/tags/testtag '%(*objectsize:disk)'
+batch_test_atom refs/tags/testtag '%(deltabase)'
+batch_test_atom refs/tags/testtag '%(*deltabase)'
+batch_test_atom refs/tags/testtag '%(objectname)'
+batch_test_atom refs/tags/testtag '%(objectname:short)'
+batch_test_atom refs/tags/testtag '%(tree)'
+batch_test_atom refs/tags/testtag '%(tree:short)'
+batch_test_atom refs/tags/testtag '%(tree:short=1)'
+batch_test_atom refs/tags/testtag '%(tree:short=10)'
+batch_test_atom refs/tags/testtag '%(parent)'
+batch_test_atom refs/tags/testtag '%(parent:short)'
+batch_test_atom refs/tags/testtag '%(parent:short=1)'
+batch_test_atom refs/tags/testtag '%(parent:short=10)'
+batch_test_atom refs/tags/testtag '%(numparent)'
+batch_test_atom refs/tags/testtag '%(object)'
+batch_test_atom refs/tags/testtag '%(type)'
+batch_test_atom refs/tags/testtag '%(*objectname)'
+batch_test_atom refs/tags/testtag '%(*objecttype)'
+batch_test_atom refs/tags/testtag '%(author)'
+batch_test_atom refs/tags/testtag '%(authorname)'
+batch_test_atom refs/tags/testtag '%(authoremail)'
+batch_test_atom refs/tags/testtag '%(authoremail:trim)'
+batch_test_atom refs/tags/testtag '%(authoremail:localpart)'
+batch_test_atom refs/tags/testtag '%(authordate)'
+batch_test_atom refs/tags/testtag '%(committer)'
+batch_test_atom refs/tags/testtag '%(committername)'
+batch_test_atom refs/tags/testtag '%(committeremail)'
+batch_test_atom refs/tags/testtag '%(committeremail:trim)'
+batch_test_atom refs/tags/testtag '%(committeremail:localpart)'
+batch_test_atom refs/tags/testtag '%(committerdate)'
+batch_test_atom refs/tags/testtag '%(tag)'
+batch_test_atom refs/tags/testtag '%(tagger)'
+batch_test_atom refs/tags/testtag '%(taggername)'
+batch_test_atom refs/tags/testtag '%(taggeremail)'
+batch_test_atom refs/tags/testtag '%(taggeremail:trim)'
+batch_test_atom refs/tags/testtag '%(taggeremail:localpart)'
+batch_test_atom refs/tags/testtag '%(taggerdate)'
+batch_test_atom refs/tags/testtag '%(creator)'
+batch_test_atom refs/tags/testtag '%(creatordate)'
+batch_test_atom refs/tags/testtag '%(subject)'
+batch_test_atom refs/tags/testtag '%(subject:sanitize)'
+batch_test_atom refs/tags/testtag '%(contents:subject)'
+batch_test_atom refs/tags/testtag '%(body)'
+batch_test_atom refs/tags/testtag '%(contents:body)'
+batch_test_atom refs/tags/testtag '%(contents:signature)'
+batch_test_atom refs/tags/testtag '%(contents)'
+batch_test_atom refs/tags/testtag '%(HEAD)' fail
+
+batch_test_atom refs/myblobs/blob1 '%(refname)' fail
+batch_test_atom refs/myblobs/blob1 '%(upstream)' fail
+batch_test_atom refs/myblobs/blob1 '%(push)' fail
+batch_test_atom refs/myblobs/blob1 '%(HEAD)' fail
+
+batch_test_atom refs/myblobs/blob1 '%(objectname)'
+batch_test_atom refs/myblobs/blob1 '%(objecttype)'
+batch_test_atom refs/myblobs/blob1 '%(objectsize)'
+batch_test_atom refs/myblobs/blob1 '%(objectsize:disk)'
+batch_test_atom refs/myblobs/blob1 '%(deltabase)'
+
+batch_test_atom refs/myblobs/blob1 '%(contents)'
+batch_test_atom refs/myblobs/blob2 '%(contents)'
+
+batch_test_atom refs/myblobs/blob1 '%(raw)'
+batch_test_atom refs/myblobs/blob2 '%(raw)'
+batch_test_atom refs/mytrees/tree1 '%(raw)'
+
+batch_test_atom refs/myblobs/blob1 '%(raw:size)'
+batch_test_atom refs/myblobs/blob2 '%(raw:size)'
+batch_test_atom refs/mytrees/tree1 '%(raw:size)'
+
+batch_test_atom refs/myblobs/blob1 '%(if:equals=blob)%(objecttype)%(then)commit%(else)not commit%(end)'
+batch_test_atom refs/myblobs/blob2 '%(if:equals=blob)%(objecttype)%(then)commit%(else)not commit%(end)'
+batch_test_atom refs/mytrees/tree1 '%(if:equals=tree)%(objecttype)%(then)tree%(else)not tree%(end)'
+
+batch_test_atom refs/heads/main '%(align:60) objectname is %(objectname)%(end)|%(objectname)'
+batch_test_atom refs/heads/main '%(align:left,60) objectname is %(objectname)%(end)|%(objectname)'
+batch_test_atom refs/heads/main '%(align:middle,60) objectname is %(objectname)%(end)|%(objectname)'
+batch_test_atom refs/heads/main '%(align:60,right) objectname is %(objectname)%(end)|%(objectname)'
+
+batch_test_atom refs/heads/main 'VALID'
+batch_test_atom refs/heads/main '%(INVALID)' fail
+batch_test_atom refs/heads/main '%(authordate:INVALID)' fail
+
+test_expect_success 'cat-file refs/heads/main refs/tags/testtag %(rest)' '
+	cat >expected <<-EOF &&
+	123 commit 123
+	456 tag 456
+	EOF
+	git cat-file --batch-check="%(rest) %(objecttype) %(rest)" >actual <<-EOF &&
+	refs/heads/main 123
+	refs/tags/testtag 456
+	EOF
+	test_cmp expected actual
+'
+
+batch_test_atom refs/heads/main '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+batch_test_atom refs/tags/testtag '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+batch_test_atom refs/myblobs/blob1 '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+batch_test_atom refs/myblobs/blob2 '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+
+
+test_expect_success 'cat-file --batch equals to --batch-check with atoms' '
+	git cat-file --batch-check="%(objectname) %(objecttype) %(objectsize)
+%(raw)" >expected <<-EOF &&
+	refs/heads/main
+	refs/tags/testtag
+	EOF
+	git cat-file --batch >actual <<-EOF &&
+	refs/heads/main
+	refs/tags/testtag
+	EOF
+	cmp expected actual
+'
+
 test_done
diff --git a/t/t6301-for-each-ref-errors.sh b/t/t6301-for-each-ref-errors.sh
index 40edf9dab534..3553f84a00c1 100755
--- a/t/t6301-for-each-ref-errors.sh
+++ b/t/t6301-for-each-ref-errors.sh
@@ -41,7 +41,7 @@ test_expect_success 'Missing objects are reported correctly' '
 	r=refs/heads/missing &&
 	echo $MISSING >.git/$r &&
 	test_when_finished "rm -f .git/$r" &&
-	echo "fatal: missing object $MISSING for $r" >missing-err &&
+	echo "fatal: $MISSING missing" >missing-err &&
 	test_must_fail git for-each-ref 2>err &&
 	test_cmp missing-err err &&
 	(
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH 7/8] [GSOC] cat-file: reuse err buf in batch_objet_write()
  2021-06-12 11:14 [PATCH 0/8] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                   ` (5 preceding siblings ...)
  2021-06-12 11:14 ` [PATCH 6/8] [GSOC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
@ 2021-06-12 11:14 ` ZheNing Hu via GitGitGadget
  2021-06-17  7:16   ` Ævar Arnfjörð Bjarmason
  2021-06-12 11:14 ` [PATCH 8/8] [GSOC] cat-file: re-implement --textconv, --filters options ZheNing Hu via GitGitGadget
  2021-06-15 13:28 ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  8 siblings, 1 reply; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-12 11:14 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Reuse the `err` buffer in batch_object_write(), as the
buffer `scratch` does. This will reduce the overhead
of multiple allocations of memory of the err buffer.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/cat-file.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 0bc524e656e1..1a73c3d23dde 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -212,33 +212,32 @@ static void batch_write(struct batch_options *opt, const void *data, int len)
 
 static void batch_object_write(const char *obj_name,
 			       struct strbuf *scratch,
+			       struct strbuf *err,
 			       struct batch_options *opt,
 			       struct expand_data *data)
 {
 	int ret = 0;
-	struct strbuf err = STRBUF_INIT;
 	struct ref_array_item item = { data->oid, data->rest };
 
 	strbuf_reset(scratch);
+	strbuf_reset(err);
 
-	ret = format_ref_array_item(&item, &opt->format, scratch, &err);
+	ret = format_ref_array_item(&item, &opt->format, scratch, err);
 	if (!ret) {
 		strbuf_addch(scratch, '\n');
 		batch_write(opt, scratch->buf, scratch->len);
-		strbuf_release(&err);
 	} else if (ret < 0) {
-		die("%s\n", err.buf);
-		strbuf_release(&err);
+		die("%s\n", err->buf);
 	} else {
 		/* when ret > 0 , don't call die and print the err to stdout*/
-		printf("%s\n", err.buf);
+		printf("%s\n", err->buf);
 		fflush(stdout);
-		strbuf_release(&err);
 	}
 }
 
 static void batch_one_object(const char *obj_name,
 			     struct strbuf *scratch,
+			     struct strbuf *err,
 			     struct batch_options *opt,
 			     struct expand_data *data)
 {
@@ -292,7 +291,7 @@ static void batch_one_object(const char *obj_name,
 		return;
 	}
 
-	batch_object_write(obj_name, scratch, opt, data);
+	batch_object_write(obj_name, scratch, err, opt, data);
 }
 
 struct object_cb_data {
@@ -300,13 +299,14 @@ struct object_cb_data {
 	struct expand_data *expand;
 	struct oidset *seen;
 	struct strbuf *scratch;
+	struct strbuf *err;
 };
 
 static int batch_object_cb(const struct object_id *oid, void *vdata)
 {
 	struct object_cb_data *data = vdata;
 	oidcpy(&data->expand->oid, oid);
-	batch_object_write(NULL, data->scratch, data->opt, data->expand);
+	batch_object_write(NULL, data->scratch, data->err, data->opt, data->expand);
 	return 0;
 }
 
@@ -362,6 +362,7 @@ static int batch_objects(struct batch_options *opt, const struct option *options
 {
 	struct strbuf input = STRBUF_INIT;
 	struct strbuf output = STRBUF_INIT;
+	struct strbuf err = STRBUF_INIT;
 	struct strbuf format = STRBUF_INIT;
 	struct expand_data data;
 	int save_warning;
@@ -390,6 +391,7 @@ static int batch_objects(struct batch_options *opt, const struct option *options
 		cb.opt = opt;
 		cb.expand = &data;
 		cb.scratch = &output;
+		cb.err = &err;
 
 		if (opt->unordered) {
 			struct oidset seen = OIDSET_INIT;
@@ -414,6 +416,7 @@ static int batch_objects(struct batch_options *opt, const struct option *options
 
 		strbuf_release(&format);
 		strbuf_release(&output);
+		strbuf_release(&err);
 		return 0;
 	}
 
@@ -442,11 +445,12 @@ static int batch_objects(struct batch_options *opt, const struct option *options
 			data.rest = p;
 		}
 
-		batch_one_object(input.buf, &output, opt, &data);
+		batch_one_object(input.buf, &output, &err, opt, &data);
 	}
 	strbuf_release(&format);
 	strbuf_release(&input);
 	strbuf_release(&output);
+	strbuf_release(&err);
 	warn_on_object_refname_ambiguity = save_warning;
 	return retval;
 }
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH 8/8] [GSOC] cat-file: re-implement --textconv, --filters options
  2021-06-12 11:14 [PATCH 0/8] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                   ` (6 preceding siblings ...)
  2021-06-12 11:14 ` [PATCH 7/8] [GSOC] cat-file: reuse err buf in batch_objet_write() ZheNing Hu via GitGitGadget
@ 2021-06-12 11:14 ` ZheNing Hu via GitGitGadget
  2021-06-17  7:18   ` Ævar Arnfjörð Bjarmason
  2021-06-15 13:28 ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  8 siblings, 1 reply; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-12 11:14 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

After cat-file reuses the ref-filter logic, we re-implement the
functions of --textconv and --filters options.

Add members `use_textconv` and `use_filters` in struct `ref_format`,
and use global variables `use_filters` and `use_textconv` in
`ref-filter.c`, so that we can filter the content of the object
in get_object(). Use `actual_oi` to record the real expand_data:
it may point to the original `oi` or the `act_oi` processed by
`textconv_object()` or `convert_to_working_tree()`. `grab_values()`
will grab the contents of `actual_oi` and `grab_common_values()`
to grab the contents of origin `oi`, this ensures that `%(objectsize)`
still uses the size of the unfiltered data.

In `get_object()`, we made an optimization: Firstly, get the size and
type of the object instead of directly getting the object data.
If using --textconv, after successfully obtaining the filtered object
data, an extra oid_object_info_extended() will be skipped, which can
reduce the cost of object data copy; If using --filter, the data of
the object first will be getted first, and then convert_to_working_tree()
will be used to get the filtered object data.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/cat-file.c |  5 ++++
 ref-filter.c       | 66 ++++++++++++++++++++++++++++++++++++++++++++--
 ref-filter.h       |  5 ++--
 3 files changed, 72 insertions(+), 4 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 1a73c3d23dde..3fde2587201b 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -376,6 +376,11 @@ static int batch_objects(struct batch_options *opt, const struct option *options
 	if (opt->print_contents)
 		strbuf_addstr(&format, "\n%(raw)");
 	opt->format.format = format.buf;
+	if (opt->cmdmode == 'c')
+		opt->format.use_textconv = 1;
+	if (opt->cmdmode == 'w')
+		opt->format.use_filters = 1;
+
 	if (verify_ref_format(&opt->format))
 		usage_with_options(cat_file_usage, options);
 
diff --git a/ref-filter.c b/ref-filter.c
index d4c88d496698..8264ef7d2786 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1,3 +1,4 @@
+#define USE_THE_INDEX_COMPATIBILITY_MACROS
 #include "builtin.h"
 #include "cache.h"
 #include "parse-options.h"
@@ -84,6 +85,9 @@ static struct expand_data {
 	struct object_info info;
 } oi, oi_deref;
 
+int use_filters;
+int use_textconv;
+
 struct ref_to_worktree_entry {
 	struct hashmap_entry ent;
 	struct worktree *wt; /* key is wt->head_ref */
@@ -1027,6 +1031,9 @@ int verify_ref_format(struct ref_format *format)
 					       used_atom[at].atom_type == ATOM_WORKTREEPATH)))
 			die(_("this command reject atom %%(%.*s)"), (int)(ep - sp - 2), sp + 2);
 
+		use_filters = format->use_filters;
+		use_textconv = format->use_textconv;
+
 		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
 		    used_atom[at].u.raw_data.option == RAW_BARE)
 			die(_("--format=%.*s cannot be used with"
@@ -1735,10 +1742,41 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 {
 	/* parse_object_buffer() will set eaten to 0 if free() will be needed */
 	int eaten = 1;
+	struct expand_data *actual_oi = oi;
+	struct expand_data act_oi = {0};
+
 	if (oi->info.contentp) {
 		/* We need to know that to use parse_object_buffer properly */
+		void **temp_contentp = oi->info.contentp;
+		oi->info.contentp = NULL;
 		oi->info.sizep = &oi->size;
 		oi->info.typep = &oi->type;
+
+		/* get the type and size */
+		if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
+					OBJECT_INFO_LOOKUP_REPLACE))
+			return strbuf_addf_ret(err, 1, _("%s missing"),
+					       oid_to_hex(&oi->oid));
+
+		oi->info.sizep = NULL;
+		oi->info.typep = NULL;
+		oi->info.contentp = temp_contentp;
+
+		if (use_textconv) {
+			act_oi = *oi;
+
+			if(!ref->rest)
+				return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
+						       oid_to_hex(&act_oi.oid));
+			if (act_oi.type == OBJ_BLOB) {
+				if (textconv_object(the_repository,
+						    ref->rest, 0100644, &act_oi.oid,
+						    1, (char **)(&act_oi.content), &act_oi.size)) {
+					actual_oi = &act_oi;
+					goto success;
+				}
+			}
+		}
 	}
 	if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
 				     OBJECT_INFO_LOOKUP_REPLACE))
@@ -1748,19 +1786,43 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 		BUG("Object size is less than zero.");
 
 	if (oi->info.contentp) {
-		*obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten);
+		if (use_filters) {
+			if(!ref->rest)
+				return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
+						       oid_to_hex(&oi->oid));
+			if (oi->type == OBJ_BLOB) {
+				struct strbuf strbuf = STRBUF_INIT;
+				struct checkout_metadata meta;
+				act_oi = *oi;
+
+				init_checkout_metadata(&meta, NULL, NULL, &act_oi.oid);
+				if (convert_to_working_tree(&the_index, ref->rest, act_oi.content, act_oi.size, &strbuf, &meta)) {
+					act_oi.size = strbuf.len;
+					act_oi.content = strbuf_detach(&strbuf, NULL);
+					actual_oi = &act_oi;
+				} else {
+					die("could not convert '%s' %s",
+					    oid_to_hex(&oi->oid), ref->rest);
+				}
+			}
+		}
+
+success:
+		*obj = parse_object_buffer(the_repository, &actual_oi->oid, actual_oi->type, actual_oi->size, actual_oi->content, &eaten);
 		if (!*obj) {
 			if (!eaten)
 				free(oi->content);
 			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
 					       oid_to_hex(&oi->oid), ref->refname);
 		}
-		grab_values(ref->value, deref, *obj, oi);
+		grab_values(ref->value, deref, *obj, actual_oi);
 	}
 
 	grab_common_values(ref->value, deref, oi);
 	if (!eaten)
 		free(oi->content);
+	if (actual_oi != oi)
+		free(actual_oi->content);
 	return 0;
 }
 
diff --git a/ref-filter.h b/ref-filter.h
index bece9583cf18..cf7bad4e8b49 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -80,14 +80,15 @@ struct ref_format {
 	const char *rest;
 	int cat_file_mode;
 	int quote_style;
+	int use_textconv;
+	int use_filters;
 	int use_rest;
 	int use_color;
-
 	/* Internal state to ref-filter */
 	int need_color_reset_at_eol;
 };
 
-#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }
+#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, 0, 0, -1 }
 
 /*  Macros for checking --merged and --no-merged options */
 #define _OPT_MERGED_NO_MERGED(option, filter, h) \
-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 5/8] [GSOC] ref-filter: teach get_object() return useful value
  2021-06-12 11:14 ` [PATCH 5/8] [GSOC] ref-filter: teach get_object() return useful value ZheNing Hu via GitGitGadget
@ 2021-06-12 20:09   ` Christian Couder
  2021-06-13 10:11     ` ZheNing Hu
  2021-06-17  7:22   ` Ævar Arnfjörð Bjarmason
  1 sibling, 1 reply; 121+ messages in thread
From: Christian Couder @ 2021-06-12 20:09 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget
  Cc: git, Junio C Hamano, Hariom Verma, Bagas Sanjaya, Jeff King, ZheNing Hu

On Sat, Jun 12, 2021 at 1:14 PM ZheNing Hu via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: ZheNing Hu <adlternative@gmail.com>
>
> Let `populate_value()`, `get_ref_atom_value()` and
> `format_ref_array_item()` get the return value of `get_value()`
> correctly. This can help us later let `cat-file --batch` get the
> correct error message and return value of `get_value()`.

Is it get_object() or get_value()?

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 5/8] [GSOC] ref-filter: teach get_object() return useful value
  2021-06-12 20:09   ` Christian Couder
@ 2021-06-13 10:11     ` ZheNing Hu
  0 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu @ 2021-06-13 10:11 UTC (permalink / raw)
  To: Christian Couder
  Cc: ZheNing Hu via GitGitGadget, git, Junio C Hamano, Hariom Verma,
	Bagas Sanjaya, Jeff King

Christian Couder <christian.couder@gmail.com> 于2021年6月13日周日 上午4:09写道:
>
> On Sat, Jun 12, 2021 at 1:14 PM ZheNing Hu via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
> >
> > From: ZheNing Hu <adlternative@gmail.com>
> >
> > Let `populate_value()`, `get_ref_atom_value()` and
> > `format_ref_array_item()` get the return value of `get_value()`
> > correctly. This can help us later let `cat-file --batch` get the
> > correct error message and return value of `get_value()`.
>
> Is it get_object() or get_value()?

Oh, it's get_object().

Thanks for pointing out:)
--
ZheNing Hu

^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic
  2021-06-12 11:14 [PATCH 0/8] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                   ` (7 preceding siblings ...)
  2021-06-12 11:14 ` [PATCH 8/8] [GSOC] cat-file: re-implement --textconv, --filters options ZheNing Hu via GitGitGadget
@ 2021-06-15 13:28 ` ZheNing Hu via GitGitGadget
  2021-06-15 13:28   ` [PATCH v2 1/9] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
                     ` (11 more replies)
  8 siblings, 12 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-15 13:28 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu

This patch series make cat-file reuse ref-filter logic, which based on
5a5b5f78 ([GSOC] ref-filter: add %(rest) atom)

Change from last version:

 1. Use free_array_item_internal() to solve the memory leak problem.
 2. Change commit message of ([GSOC] ref-filter: teach get_object() return
    useful value).

ZheNing Hu (9):
  [GSOC] ref-filter: add obj-type check in grab contents
  [GSOC] ref-filter: add %(raw) atom
  [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  [GSOC] ref-filter: add %(rest) atom
  [GSOC] ref-filter: teach get_object() return useful value
  [GSOC] ref-filter: introduce free_array_item_internal() function
  [GSOC] cat-file: reuse ref-filter logic
  [GSOC] cat-file: reuse err buf in batch_objet_write()
  [GSOC] cat-file: re-implement --textconv, --filters options

 Documentation/git-cat-file.txt     |   6 +
 Documentation/git-for-each-ref.txt |   9 +
 builtin/cat-file.c                 | 267 ++++++-----------------
 builtin/tag.c                      |   2 +-
 ref-filter.c                       | 331 ++++++++++++++++++++++-------
 ref-filter.h                       |  14 +-
 t/t1006-cat-file.sh                | 252 ++++++++++++++++++++++
 t/t3203-branch-output.sh           |   4 +
 t/t6300-for-each-ref.sh            | 211 ++++++++++++++++++
 t/t6301-for-each-ref-errors.sh     |   2 +-
 t/t7004-tag.sh                     |   4 +
 t/t7030-verify-tag.sh              |   4 +
 12 files changed, 829 insertions(+), 277 deletions(-)


base-commit: 1197f1a46360d3ae96bd9c15908a3a6f8e562207
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-980%2Fadlternative%2Fcat-file-batch-refactor-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-980/adlternative/cat-file-batch-refactor-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/980

Range-diff vs v1:

  1:  48d256db5c34 =  1:  48d256db5c34 [GSOC] ref-filter: add obj-type check in grab contents
  2:  abee6a03becb =  2:  abee6a03becb [GSOC] ref-filter: add %(raw) atom
  3:  c99d1d070a18 =  3:  c99d1d070a18 [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  4:  5a5b5f78aeea =  4:  5a5b5f78aeea [GSOC] ref-filter: add %(rest) atom
  5:  c208b8a45d66 !  5:  49063372e003 [GSOC] ref-filter: teach get_object() return useful value
     @@ Commit message
          [GSOC] ref-filter: teach get_object() return useful value
      
          Let `populate_value()`, `get_ref_atom_value()` and
     -    `format_ref_array_item()` get the return value of `get_value()`
     +    `format_ref_array_item()` get the return value of `get_object()`
          correctly. This can help us later let `cat-file --batch` get the
     -    correct error message and return value of `get_value()`.
     +    correct error message and return value of `get_object()`.
      
          Mentored-by: Christian Couder <christian.couder@gmail.com>
          Mentored-by: Hariom Verma <hariom18599@gmail.com>
  -:  ------------ >  6:  d2f2563eb76a [GSOC] ref-filter: introduce free_array_item_internal() function
  6:  44ebf75e2e93 !  7:  765337a46ab0 [GSOC] cat-file: reuse ref-filter logic
     @@ builtin/cat-file.c: static void batch_write(struct batch_options *opt, const voi
      +	if (!ret) {
      +		strbuf_addch(scratch, '\n');
      +		batch_write(opt, scratch->buf, scratch->len);
     -+		strbuf_release(&err);
      +	} else if (ret < 0) {
      +		die("%s\n", err.buf);
     -+		strbuf_release(&err);
      +	} else {
      +		/* when ret > 0 , don't call die and print the err to stdout*/
      +		printf("%s\n", err.buf);
      +		fflush(stdout);
     -+		strbuf_release(&err);
       	}
     ++	free_array_item_internal(&item);
     ++	strbuf_release(&err);
       }
       
     + static void batch_one_object(const char *obj_name,
      @@ builtin/cat-file.c: static void batch_one_object(const char *obj_name,
       		return;
       	}
     @@ builtin/cat-file.c: static int batch_objects(struct batch_options *opt)
       		return 0;
       	}
      @@ builtin/cat-file.c: static int batch_objects(struct batch_options *opt)
     - 
       		batch_one_object(input.buf, &output, opt, &data);
       	}
     --
     + 
      +	strbuf_release(&format);
       	strbuf_release(&input);
       	strbuf_release(&output);
  7:  d31059c391d0 !  8:  058b304686fd [GSOC] cat-file: reuse err buf in batch_objet_write()
     @@ builtin/cat-file.c: static void batch_write(struct batch_options *opt, const voi
       	if (!ret) {
       		strbuf_addch(scratch, '\n');
       		batch_write(opt, scratch->buf, scratch->len);
     --		strbuf_release(&err);
       	} else if (ret < 0) {
      -		die("%s\n", err.buf);
     --		strbuf_release(&err);
      +		die("%s\n", err->buf);
       	} else {
       		/* when ret > 0 , don't call die and print the err to stdout*/
      -		printf("%s\n", err.buf);
      +		printf("%s\n", err->buf);
       		fflush(stdout);
     --		strbuf_release(&err);
       	}
     + 	free_array_item_internal(&item);
     +-	strbuf_release(&err);
       }
       
       static void batch_one_object(const char *obj_name,
     @@ builtin/cat-file.c: static int batch_objects(struct batch_options *opt, const st
      -		batch_one_object(input.buf, &output, opt, &data);
      +		batch_one_object(input.buf, &output, &err, opt, &data);
       	}
     + 
       	strbuf_release(&format);
       	strbuf_release(&input);
       	strbuf_release(&output);
  8:  0004d5b24a0f !  9:  cbf7d51933ea [GSOC] cat-file: re-implement --textconv, --filters options
     @@ ref-filter.h: struct ref_format {
      +	int use_filters;
       	int use_rest;
       	int use_color;
     --
     - 	/* Internal state to ref-filter */
     + 
     +@@ ref-filter.h: struct ref_format {
       	int need_color_reset_at_eol;
       };
       

-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v2 1/9] [GSOC] ref-filter: add obj-type check in grab contents
  2021-06-15 13:28 ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
@ 2021-06-15 13:28   ` ZheNing Hu via GitGitGadget
  2021-06-15 13:28   ` [PATCH v2 2/9] [GSOC] ref-filter: add %(raw) atom ZheNing Hu via GitGitGadget
                     ` (10 subsequent siblings)
  11 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-15 13:28 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Only tag and commit objects use `grab_sub_body_contents()` to grab
object contents in the current codebase.  We want to teach the
function to also handle blobs and trees to get their raw data,
without parsing a blob (whose contents looks like a commit or a tag)
incorrectly as a commit or a tag.

Skip the block of code that is specific to handling commits and tags
early when the given object is of a wrong type to help later
addition to handle other types of objects in this function.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 4db0e40ff4c6..5cee6512fbaf 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1356,11 +1356,12 @@ static void append_lines(struct strbuf *out, const char *buf, unsigned long size
 }
 
 /* See grab_values */
-static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
+static void grab_sub_body_contents(struct atom_value *val, int deref, struct expand_data *data)
 {
 	int i;
 	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
 	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
+	void *buf = data->content;
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		struct used_atom *atom = &used_atom[i];
@@ -1371,10 +1372,13 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
 			continue;
 		if (deref)
 			name++;
-		if (strcmp(name, "body") &&
-		    !starts_with(name, "subject") &&
-		    !starts_with(name, "trailers") &&
-		    !starts_with(name, "contents"))
+
+		if ((data->type != OBJ_TAG &&
+		     data->type != OBJ_COMMIT) ||
+		    (strcmp(name, "body") &&
+		     !starts_with(name, "subject") &&
+		     !starts_with(name, "trailers") &&
+		     !starts_with(name, "contents")))
 			continue;
 		if (!subpos)
 			find_subpos(buf,
@@ -1438,17 +1442,19 @@ static void fill_missing_values(struct atom_value *val)
  * pointed at by the ref itself; otherwise it is the object the
  * ref (which is a tag) refers to.
  */
-static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf)
+static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
 {
+	void *buf = data->content;
+
 	switch (obj->type) {
 	case OBJ_TAG:
 		grab_tag_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, data);
 		grab_person("tagger", val, deref, buf);
 		break;
 	case OBJ_COMMIT:
 		grab_commit_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, data);
 		grab_person("author", val, deref, buf);
 		grab_person("committer", val, deref, buf);
 		break;
@@ -1678,7 +1684,7 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
 					       oid_to_hex(&oi->oid), ref->refname);
 		}
-		grab_values(ref->value, deref, *obj, oi->content);
+		grab_values(ref->value, deref, *obj, oi);
 	}
 
 	grab_common_values(ref->value, deref, oi);
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v2 2/9] [GSOC] ref-filter: add %(raw) atom
  2021-06-15 13:28 ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  2021-06-15 13:28   ` [PATCH v2 1/9] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
@ 2021-06-15 13:28   ` ZheNing Hu via GitGitGadget
  2021-06-15 13:28   ` [PATCH v2 3/9] [GSOC] ref-filter: use non-const ref_format in *_atom_parser() ZheNing Hu via GitGitGadget
                     ` (9 subsequent siblings)
  11 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-15 13:28 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.

The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).

E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.

Therefore, add a new member in `struct atom_value`: `s_size`, which
can record raw object size, it can help us add raw object data to
the buffer or compare two buffers which contain raw object data.

Beyond, `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, `--perl` because if our binary raw data is passed to a variable
in the host language, the host language may not support arbitrary binary
data in the variables of its string type.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-for-each-ref.txt |   9 ++
 ref-filter.c                       | 139 +++++++++++++++----
 t/t6300-for-each-ref.sh            | 207 +++++++++++++++++++++++++++++
 3 files changed, 328 insertions(+), 27 deletions(-)

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index 2ae2478de706..7f1f0a1ca3b6 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -235,6 +235,15 @@ and `date` to extract the named component.  For email fields (`authoremail`,
 without angle brackets, and `:localpart` to get the part before the `@` symbol
 out of the trimmed email.
 
+The raw data in an object is `raw`.
+
+raw:size::
+	The raw data size of the object.
+
+Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
+`--perl` because the host language may not support arbitrary binary data in the
+variables of its string type.
+
 The message in a commit or a tag object is `contents`, from which
 `contents:<part>` can be used to extract various parts out of:
 
diff --git a/ref-filter.c b/ref-filter.c
index 5cee6512fbaf..7822be903071 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -144,6 +144,7 @@ enum atom_type {
 	ATOM_BODY,
 	ATOM_TRAILERS,
 	ATOM_CONTENTS,
+	ATOM_RAW,
 	ATOM_UPSTREAM,
 	ATOM_PUSH,
 	ATOM_SYMREF,
@@ -189,6 +190,9 @@ static struct used_atom {
 			struct process_trailer_options trailer_opts;
 			unsigned int nlines;
 		} contents;
+		struct {
+			enum { RAW_BARE, RAW_LENGTH } option;
+		} raw_data;
 		struct {
 			cmp_status cmp_status;
 			const char *str;
@@ -426,6 +430,18 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
+static int raw_atom_parser(const struct ref_format *format, struct used_atom *atom,
+				const char *arg, struct strbuf *err)
+{
+	if (!arg)
+		atom->u.raw_data.option = RAW_BARE;
+	else if (!strcmp(arg, "size"))
+		atom->u.raw_data.option = RAW_LENGTH;
+	else
+		return strbuf_addf_ret(err, -1, _("unrecognized %%(raw) argument: %s"), arg);
+	return 0;
+}
+
 static int oid_atom_parser(const struct ref_format *format, struct used_atom *atom,
 			   const char *arg, struct strbuf *err)
 {
@@ -586,6 +602,7 @@ static struct {
 	[ATOM_BODY] = { "body", SOURCE_OBJ, FIELD_STR, body_atom_parser },
 	[ATOM_TRAILERS] = { "trailers", SOURCE_OBJ, FIELD_STR, trailers_atom_parser },
 	[ATOM_CONTENTS] = { "contents", SOURCE_OBJ, FIELD_STR, contents_atom_parser },
+	[ATOM_RAW] = { "raw", SOURCE_OBJ, FIELD_STR, raw_atom_parser },
 	[ATOM_UPSTREAM] = { "upstream", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
 	[ATOM_PUSH] = { "push", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
 	[ATOM_SYMREF] = { "symref", SOURCE_NONE, FIELD_STR, refname_atom_parser },
@@ -620,12 +637,15 @@ struct ref_formatting_state {
 
 struct atom_value {
 	const char *s;
+	size_t s_size;
 	int (*handler)(struct atom_value *atomv, struct ref_formatting_state *state,
 		       struct strbuf *err);
 	uintmax_t value; /* used for sorting when not FIELD_STR */
 	struct used_atom *atom;
 };
 
+#define ATOM_VALUE_S_SIZE_INIT (-1)
+
 /*
  * Used to parse format string and sort specifiers
  */
@@ -644,13 +664,6 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 		return strbuf_addf_ret(err, -1, _("malformed field name: %.*s"),
 				       (int)(ep-atom), atom);
 
-	/* Do we have the atom already used elsewhere? */
-	for (i = 0; i < used_atom_cnt; i++) {
-		int len = strlen(used_atom[i].name);
-		if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
-			return i;
-	}
-
 	/*
 	 * If the atom name has a colon, strip it and everything after
 	 * it off - it specifies the format for this entry, and
@@ -660,6 +673,13 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 	arg = memchr(sp, ':', ep - sp);
 	atom_len = (arg ? arg : ep) - sp;
 
+	/* Do we have the atom already used elsewhere? */
+	for (i = 0; i < used_atom_cnt; i++) {
+		int len = strlen(used_atom[i].name);
+		if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
+			return i;
+	}
+
 	/* Is the atom a valid one? */
 	for (i = 0; i < ARRAY_SIZE(valid_atom); i++) {
 		int len = strlen(valid_atom[i].name);
@@ -709,11 +729,14 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 	return at;
 }
 
-static void quote_formatting(struct strbuf *s, const char *str, int quote_style)
+static void quote_formatting(struct strbuf *s, const char *str, size_t len, int quote_style)
 {
 	switch (quote_style) {
 	case QUOTE_NONE:
-		strbuf_addstr(s, str);
+		if (len != ATOM_VALUE_S_SIZE_INIT)
+			strbuf_add(s, str, len);
+		else
+			strbuf_addstr(s, str);
 		break;
 	case QUOTE_SHELL:
 		sq_quote_buf(s, str);
@@ -740,9 +763,12 @@ static int append_atom(struct atom_value *v, struct ref_formatting_state *state,
 	 * encountered.
 	 */
 	if (!state->stack->prev)
-		quote_formatting(&state->stack->output, v->s, state->quote_style);
+		quote_formatting(&state->stack->output, v->s, v->s_size, state->quote_style);
 	else
-		strbuf_addstr(&state->stack->output, v->s);
+		if (v->s_size != ATOM_VALUE_S_SIZE_INIT)
+			strbuf_add(&state->stack->output, v->s, v->s_size);
+		else
+			strbuf_addstr(&state->stack->output, v->s);
 	return 0;
 }
 
@@ -842,21 +868,23 @@ static int if_atom_handler(struct atom_value *atomv, struct ref_formatting_state
 	return 0;
 }
 
-static int is_empty(const char *s)
+static int is_empty(struct strbuf *buf)
 {
-	while (*s != '\0') {
-		if (!isspace(*s))
-			return 0;
-		s++;
-	}
-	return 1;
-}
+	const char *cur = buf->buf;
+	const char *end = buf->buf + buf->len;
+
+	while (cur != end && (isspace(*cur)))
+		cur++;
+
+	return cur == end;
+ }
 
 static int then_atom_handler(struct atom_value *atomv, struct ref_formatting_state *state,
 			     struct strbuf *err)
 {
 	struct ref_formatting_stack *cur = state->stack;
 	struct if_then_else *if_then_else = NULL;
+	size_t str_len = 0;
 
 	if (cur->at_end == if_then_else_handler)
 		if_then_else = (struct if_then_else *)cur->at_end_data;
@@ -867,18 +895,22 @@ static int then_atom_handler(struct atom_value *atomv, struct ref_formatting_sta
 	if (if_then_else->else_atom_seen)
 		return strbuf_addf_ret(err, -1, _("format: %%(then) atom used after %%(else)"));
 	if_then_else->then_atom_seen = 1;
+	if (if_then_else->str)
+		str_len = strlen(if_then_else->str);
 	/*
 	 * If the 'equals' or 'notequals' attribute is used then
 	 * perform the required comparison. If not, only non-empty
 	 * strings satisfy the 'if' condition.
 	 */
 	if (if_then_else->cmp_status == COMPARE_EQUAL) {
-		if (!strcmp(if_then_else->str, cur->output.buf))
+		if (str_len == cur->output.len &&
+		    !memcmp(if_then_else->str, cur->output.buf, cur->output.len))
 			if_then_else->condition_satisfied = 1;
 	} else if (if_then_else->cmp_status == COMPARE_UNEQUAL) {
-		if (strcmp(if_then_else->str, cur->output.buf))
+		if (str_len != cur->output.len ||
+		    memcmp(if_then_else->str, cur->output.buf, cur->output.len))
 			if_then_else->condition_satisfied = 1;
-	} else if (cur->output.len && !is_empty(cur->output.buf))
+	} else if (cur->output.len && !is_empty(&cur->output))
 		if_then_else->condition_satisfied = 1;
 	strbuf_reset(&cur->output);
 	return 0;
@@ -924,7 +956,7 @@ static int end_atom_handler(struct atom_value *atomv, struct ref_formatting_stat
 	 * only on the topmost supporting atom.
 	 */
 	if (!current->prev->prev) {
-		quote_formatting(&s, current->output.buf, state->quote_style);
+		quote_formatting(&s, current->output.buf, current->output.len, state->quote_style);
 		strbuf_swap(&current->output, &s);
 	}
 	strbuf_release(&s);
@@ -974,6 +1006,10 @@ int verify_ref_format(struct ref_format *format)
 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
 		if (at < 0)
 			die("%s", err.buf);
+		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
+		    used_atom[at].u.raw_data.option == RAW_BARE)
+			die(_("--format=%.*s cannot be used with"
+			      "--python, --shell, --tcl, --perl"), (int)(ep - sp - 2), sp + 2);
 		cp = ep + 1;
 
 		if (skip_prefix(used_atom[at].name, "color:", &color))
@@ -1362,17 +1398,29 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, struct exp
 	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
 	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
 	void *buf = data->content;
+	unsigned long buf_size = data->size;
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		struct used_atom *atom = &used_atom[i];
 		const char *name = atom->name;
 		struct atom_value *v = &val[i];
+		enum atom_type atom_type = atom->atom_type;
 
 		if (!!deref != (*name == '*'))
 			continue;
 		if (deref)
 			name++;
 
+		if (atom_type == ATOM_RAW) {
+			if (atom->u.raw_data.option == RAW_BARE) {
+				v->s = xmemdupz(buf, buf_size);
+				v->s_size = buf_size;
+			} else if (atom->u.raw_data.option == RAW_LENGTH) {
+				v->s = xstrfmt("%"PRIuMAX, (uintmax_t)buf_size);
+			}
+			continue;
+		}
+
 		if ((data->type != OBJ_TAG &&
 		     data->type != OBJ_COMMIT) ||
 		    (strcmp(name, "body") &&
@@ -1460,9 +1508,11 @@ static void grab_values(struct atom_value *val, int deref, struct object *obj, s
 		break;
 	case OBJ_TREE:
 		/* grab_tree_values(val, deref, obj, buf, sz); */
+		grab_sub_body_contents(val, deref, data);
 		break;
 	case OBJ_BLOB:
 		/* grab_blob_values(val, deref, obj, buf, sz); */
+		grab_sub_body_contents(val, deref, data);
 		break;
 	default:
 		die("Eh?  Object of type %d?", obj->type);
@@ -1766,6 +1816,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 		const char *refname;
 		struct branch *branch = NULL;
 
+		v->s_size = ATOM_VALUE_S_SIZE_INIT;
 		v->handler = append_atom;
 		v->atom = atom;
 
@@ -2369,6 +2420,19 @@ static int compare_detached_head(struct ref_array_item *a, struct ref_array_item
 	return 0;
 }
 
+static int memcasecmp(const void *vs1, const void *vs2, size_t n)
+{
+	const char *s1 = vs1, *s2 = vs2;
+	const char *end = s1 + n;
+
+	for (; s1 < end; s1++, s2++) {
+		int diff = tolower(*s1) - tolower(*s2);
+		if (diff)
+			return diff;
+	}
+	return 0;
+}
+
 static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, struct ref_array_item *b)
 {
 	struct atom_value *va, *vb;
@@ -2389,10 +2453,30 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru
 	} else if (s->sort_flags & REF_SORTING_VERSION) {
 		cmp = versioncmp(va->s, vb->s);
 	} else if (cmp_type == FIELD_STR) {
-		int (*cmp_fn)(const char *, const char *);
-		cmp_fn = s->sort_flags & REF_SORTING_ICASE
-			? strcasecmp : strcmp;
-		cmp = cmp_fn(va->s, vb->s);
+		if (va->s_size == ATOM_VALUE_S_SIZE_INIT &&
+		    vb->s_size == ATOM_VALUE_S_SIZE_INIT) {
+			int (*cmp_fn)(const char *, const char *);
+			cmp_fn = s->sort_flags & REF_SORTING_ICASE
+				? strcasecmp : strcmp;
+			cmp = cmp_fn(va->s, vb->s);
+		} else {
+			size_t a_size = va->s_size == ATOM_VALUE_S_SIZE_INIT ?
+					strlen(va->s) : va->s_size;
+			size_t b_size = vb->s_size == ATOM_VALUE_S_SIZE_INIT ?
+					strlen(vb->s) : vb->s_size;
+			int (*cmp_fn)(const void *, const void *, size_t);
+			cmp_fn = s->sort_flags & REF_SORTING_ICASE
+				? memcasecmp : memcmp;
+
+			cmp = cmp_fn(va->s, vb->s, b_size > a_size ?
+				     a_size : b_size);
+			if (!cmp) {
+				if (a_size > b_size)
+					cmp = 1;
+				else if (a_size < b_size)
+					cmp = -1;
+			}
+		}
 	} else {
 		if (va->value < vb->value)
 			cmp = -1;
@@ -2492,6 +2576,7 @@ int format_ref_array_item(struct ref_array_item *info,
 	}
 	if (format->need_color_reset_at_eol) {
 		struct atom_value resetv;
+		resetv.s_size = ATOM_VALUE_S_SIZE_INIT;
 		resetv.s = GIT_COLOR_RESET;
 		if (append_atom(&resetv, &state, error_buf)) {
 			pop_stack_element(&state.stack);
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 9e0214076b4d..e2867de791e7 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -130,6 +130,8 @@ test_atom head parent:short=10 ''
 test_atom head numparent 0
 test_atom head object ''
 test_atom head type ''
+test_atom head raw "$(git cat-file commit refs/heads/main)
+"
 test_atom head '*objectname' ''
 test_atom head '*objecttype' ''
 test_atom head author 'A U Thor <author@example.com> 1151968724 +0200'
@@ -221,6 +223,15 @@ test_atom tag contents 'Tagging at 1151968727
 '
 test_atom tag HEAD ' '
 
+test_expect_success 'basic atom: refs/tags/testtag *raw' '
+	git cat-file commit refs/tags/testtag^{} >expected &&
+	git for-each-ref --format="%(*raw)" refs/tags/testtag >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	echo "" >>expected.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_expect_success 'Check invalid atoms names are errors' '
 	test_must_fail git for-each-ref --format="%(INVALID)" refs/heads
 '
@@ -686,6 +697,15 @@ test_atom refs/tags/signed-empty contents:body ''
 test_atom refs/tags/signed-empty contents:signature "$sig"
 test_atom refs/tags/signed-empty contents "$sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-empty raw' '
+	git cat-file tag refs/tags/signed-empty >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-empty >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	echo "" >>expected.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_atom refs/tags/signed-short subject 'subject line'
 test_atom refs/tags/signed-short subject:sanitize 'subject-line'
 test_atom refs/tags/signed-short contents:subject 'subject line'
@@ -695,6 +715,15 @@ test_atom refs/tags/signed-short contents:signature "$sig"
 test_atom refs/tags/signed-short contents "subject line
 $sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-short raw' '
+	git cat-file tag refs/tags/signed-short >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-short >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	echo "" >>expected.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_atom refs/tags/signed-long subject 'subject line'
 test_atom refs/tags/signed-long subject:sanitize 'subject-line'
 test_atom refs/tags/signed-long contents:subject 'subject line'
@@ -708,6 +737,15 @@ test_atom refs/tags/signed-long contents "subject line
 body contents
 $sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-long raw' '
+	git cat-file tag refs/tags/signed-long >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-long >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	echo "" >>expected.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_expect_success 'set up refs pointing to tree and blob' '
 	git update-ref refs/mytrees/first refs/heads/main^{tree} &&
 	git update-ref refs/myblobs/first refs/heads/main:one
@@ -720,6 +758,16 @@ test_atom refs/mytrees/first contents:body ""
 test_atom refs/mytrees/first contents:signature ""
 test_atom refs/mytrees/first contents ""
 
+test_expect_success 'basic atom: refs/mytrees/first raw' '
+	git cat-file tree refs/mytrees/first >expected &&
+	echo "" >>expected &&
+	git for-each-ref --format="%(raw)" refs/mytrees/first >actual &&
+	test_cmp expected actual &&
+	git cat-file -s refs/mytrees/first >expected &&
+	git for-each-ref --format="%(raw:size)" refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
 test_atom refs/myblobs/first subject ""
 test_atom refs/myblobs/first contents:subject ""
 test_atom refs/myblobs/first body ""
@@ -727,6 +775,165 @@ test_atom refs/myblobs/first contents:body ""
 test_atom refs/myblobs/first contents:signature ""
 test_atom refs/myblobs/first contents ""
 
+test_expect_success 'basic atom: refs/myblobs/first raw' '
+	git cat-file blob refs/myblobs/first >expected &&
+	echo "" >>expected &&
+	git for-each-ref --format="%(raw)" refs/myblobs/first >actual &&
+	test_cmp expected actual &&
+	git cat-file -s refs/myblobs/first >expected &&
+	git for-each-ref --format="%(raw:size)" refs/myblobs/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'set up refs pointing to binary blob' '
+	printf "a\0b\0c" >blob1 &&
+	printf "a\0c\0b" >blob2 &&
+	printf "\0a\0b\0c" >blob3 &&
+	printf "abc" >blob4 &&
+	printf "\0 \0 \0 " >blob5 &&
+	printf "\0 \0a\0 " >blob6 &&
+	printf "  " >blob7 &&
+	>blob8 &&
+	git hash-object blob1 -w | xargs git update-ref refs/myblobs/blob1 &&
+	git hash-object blob2 -w | xargs git update-ref refs/myblobs/blob2 &&
+	git hash-object blob3 -w | xargs git update-ref refs/myblobs/blob3 &&
+	git hash-object blob4 -w | xargs git update-ref refs/myblobs/blob4 &&
+	git hash-object blob5 -w | xargs git update-ref refs/myblobs/blob5 &&
+	git hash-object blob6 -w | xargs git update-ref refs/myblobs/blob6 &&
+	git hash-object blob7 -w | xargs git update-ref refs/myblobs/blob7 &&
+	git hash-object blob8 -w | xargs git update-ref refs/myblobs/blob8
+'
+
+test_expect_success 'Verify sorts with raw' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob8
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/myblobs/blob3
+	refs/myblobs/blob7
+	refs/mytrees/first
+	refs/myblobs/first
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob4
+	refs/heads/main
+	EOF
+	git for-each-ref --format="%(refname)" --sort=raw \
+		refs/heads/main refs/myblobs/ refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'Verify sorts with raw:size' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob8
+	refs/myblobs/first
+	refs/myblobs/blob7
+	refs/heads/main
+	refs/myblobs/blob4
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob3
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/mytrees/first
+	EOF
+	git for-each-ref --format="%(refname)" --sort=raw:size \
+		refs/heads/main refs/myblobs/ refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'validate raw atom with %(if:equals)' '
+	cat >expected <<-EOF &&
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	refs/myblobs/blob4
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	EOF
+	git for-each-ref --format="%(if:equals=abc)%(raw)%(then)%(refname)%(else)not equals%(end)" \
+		refs/myblobs/ refs/heads/ >actual &&
+	test_cmp expected actual
+'
+test_expect_success 'validate raw atom with %(if:notequals)' '
+	cat >expected <<-EOF &&
+	refs/heads/ambiguous
+	refs/heads/main
+	refs/heads/newtag
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob3
+	equals
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/myblobs/blob7
+	refs/myblobs/blob8
+	refs/myblobs/first
+	EOF
+	git for-each-ref --format="%(if:notequals=abc)%(raw)%(then)%(refname)%(else)equals%(end)" \
+		refs/myblobs/ refs/heads/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'empty raw refs with %(if)' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob1 not empty
+	refs/myblobs/blob2 not empty
+	refs/myblobs/blob3 not empty
+	refs/myblobs/blob4 not empty
+	refs/myblobs/blob5 not empty
+	refs/myblobs/blob6 not empty
+	refs/myblobs/blob7 empty
+	refs/myblobs/blob8 empty
+	refs/myblobs/first not empty
+	EOF
+	git for-each-ref --format="%(refname) %(if)%(raw)%(then)not empty%(else)empty%(end)" \
+		refs/myblobs/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success '%(raw) with --python must failed' '
+	test_must_fail git for-each-ref --format="%(raw)" --python
+'
+
+test_expect_success '%(raw) with --tcl must failed' '
+	test_must_fail git for-each-ref --format="%(raw)" --tcl
+'
+
+test_expect_success '%(raw) with --perl must failed' '
+	test_must_fail git for-each-ref --format="%(raw)" --perl
+'
+
+test_expect_success '%(raw) with --shell must failed' '
+	test_must_fail git for-each-ref --format="%(raw)" --shell
+'
+
+test_expect_success '%(raw) with --shell and --sort=raw must failed' '
+	test_must_fail git for-each-ref --format="%(raw)" --sort=raw --shell
+'
+
+test_expect_success '%(raw:size) with --shell' '
+	git for-each-ref --format="%(raw:size)" | while read line
+	do
+		echo "'\''$line'\''" >>expect
+	done &&
+	git for-each-ref --format="%(raw:size)" --shell >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'for-each-ref --format compare with cat-file --batch' '
+	git rev-parse refs/mytrees/first | git cat-file --batch >expected &&
+	git for-each-ref --format="%(objectname) %(objecttype) %(objectsize)
+%(raw)" refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
 test_expect_success 'set up multiple-sort tags' '
 	for when in 100000 200000
 	do
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v2 3/9] [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  2021-06-15 13:28 ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  2021-06-15 13:28   ` [PATCH v2 1/9] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
  2021-06-15 13:28   ` [PATCH v2 2/9] [GSOC] ref-filter: add %(raw) atom ZheNing Hu via GitGitGadget
@ 2021-06-15 13:28   ` ZheNing Hu via GitGitGadget
  2021-06-15 13:29   ` [PATCH v2 4/9] [GSOC] ref-filter: add %(rest) atom ZheNing Hu via GitGitGadget
                     ` (8 subsequent siblings)
  11 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-15 13:28 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Use non-const ref_format in *_atom_parser(), which can help us
modify the members of ref_format in *_atom_parser().

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/tag.c |  2 +-
 ref-filter.c  | 44 ++++++++++++++++++++++----------------------
 ref-filter.h  |  4 ++--
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/builtin/tag.c b/builtin/tag.c
index 82fcfc098242..452558ec9575 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -146,7 +146,7 @@ static int verify_tag(const char *name, const char *ref,
 		      const struct object_id *oid, void *cb_data)
 {
 	int flags;
-	const struct ref_format *format = cb_data;
+	struct ref_format *format = cb_data;
 	flags = GPG_VERIFY_VERBOSE;
 
 	if (format->format)
diff --git a/ref-filter.c b/ref-filter.c
index 7822be903071..af8c15aef44d 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -226,7 +226,7 @@ static int strbuf_addf_ret(struct strbuf *sb, int ret, const char *fmt, ...)
 	return ret;
 }
 
-static int color_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int color_atom_parser(struct ref_format *format, struct used_atom *atom,
 			     const char *color_value, struct strbuf *err)
 {
 	if (!color_value)
@@ -264,7 +264,7 @@ static int refname_atom_parser_internal(struct refname_atom *atom, const char *a
 	return 0;
 }
 
-static int remote_ref_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int remote_ref_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	struct string_list params = STRING_LIST_INIT_DUP;
@@ -311,7 +311,7 @@ static int remote_ref_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int objecttype_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int objecttype_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -323,7 +323,7 @@ static int objecttype_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int objectsize_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int objectsize_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	if (!arg) {
@@ -343,7 +343,7 @@ static int objectsize_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int deltabase_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int deltabase_atom_parser(struct ref_format *format, struct used_atom *atom,
 				 const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -355,7 +355,7 @@ static int deltabase_atom_parser(const struct ref_format *format, struct used_at
 	return 0;
 }
 
-static int body_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int body_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -364,7 +364,7 @@ static int body_atom_parser(const struct ref_format *format, struct used_atom *a
 	return 0;
 }
 
-static int subject_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int subject_atom_parser(struct ref_format *format, struct used_atom *atom,
 			       const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -376,7 +376,7 @@ static int subject_atom_parser(const struct ref_format *format, struct used_atom
 	return 0;
 }
 
-static int trailers_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int trailers_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	atom->u.contents.trailer_opts.no_divider = 1;
@@ -402,7 +402,7 @@ static int trailers_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
-static int contents_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int contents_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -430,7 +430,7 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
-static int raw_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int raw_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -442,7 +442,7 @@ static int raw_atom_parser(const struct ref_format *format, struct used_atom *at
 	return 0;
 }
 
-static int oid_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int oid_atom_parser(struct ref_format *format, struct used_atom *atom,
 			   const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -461,7 +461,7 @@ static int oid_atom_parser(const struct ref_format *format, struct used_atom *at
 	return 0;
 }
 
-static int person_email_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int person_email_atom_parser(struct ref_format *format, struct used_atom *atom,
 				    const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -475,7 +475,7 @@ static int person_email_atom_parser(const struct ref_format *format, struct used
 	return 0;
 }
 
-static int refname_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int refname_atom_parser(struct ref_format *format, struct used_atom *atom,
 			       const char *arg, struct strbuf *err)
 {
 	return refname_atom_parser_internal(&atom->u.refname, arg, atom->name, err);
@@ -492,7 +492,7 @@ static align_type parse_align_position(const char *s)
 	return -1;
 }
 
-static int align_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int align_atom_parser(struct ref_format *format, struct used_atom *atom,
 			     const char *arg, struct strbuf *err)
 {
 	struct align *align = &atom->u.align;
@@ -544,7 +544,7 @@ static int align_atom_parser(const struct ref_format *format, struct used_atom *
 	return 0;
 }
 
-static int if_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int if_atom_parser(struct ref_format *format, struct used_atom *atom,
 			  const char *arg, struct strbuf *err)
 {
 	if (!arg) {
@@ -559,7 +559,7 @@ static int if_atom_parser(const struct ref_format *format, struct used_atom *ato
 	return 0;
 }
 
-static int head_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int head_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *unused_err)
 {
 	atom->u.head = resolve_refdup("HEAD", RESOLVE_REF_READING, NULL, NULL);
@@ -570,7 +570,7 @@ static struct {
 	const char *name;
 	info_source source;
 	cmp_type cmp_type;
-	int (*parser)(const struct ref_format *format, struct used_atom *atom,
+	int (*parser)(struct ref_format *format, struct used_atom *atom,
 		      const char *arg, struct strbuf *err);
 } valid_atom[] = {
 	[ATOM_REFNAME] = { "refname", SOURCE_NONE, FIELD_STR, refname_atom_parser },
@@ -649,7 +649,7 @@ struct atom_value {
 /*
  * Used to parse format string and sort specifiers
  */
-static int parse_ref_filter_atom(const struct ref_format *format,
+static int parse_ref_filter_atom(struct ref_format *format,
 				 const char *atom, const char *ep,
 				 struct strbuf *err)
 {
@@ -2546,9 +2546,9 @@ static void append_literal(const char *cp, const char *ep, struct ref_formatting
 }
 
 int format_ref_array_item(struct ref_array_item *info,
-			   const struct ref_format *format,
-			   struct strbuf *final_buf,
-			   struct strbuf *error_buf)
+			  struct ref_format *format,
+			  struct strbuf *final_buf,
+			  struct strbuf *error_buf)
 {
 	const char *cp, *sp, *ep;
 	struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
@@ -2593,7 +2593,7 @@ int format_ref_array_item(struct ref_array_item *info,
 }
 
 void pretty_print_ref(const char *name, const struct object_id *oid,
-		      const struct ref_format *format)
+		      struct ref_format *format)
 {
 	struct ref_array_item *ref_item;
 	struct strbuf output = STRBUF_INIT;
diff --git a/ref-filter.h b/ref-filter.h
index baf72a718965..74fb423fc89f 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -116,7 +116,7 @@ void ref_array_sort(struct ref_sorting *sort, struct ref_array *array);
 void ref_sorting_set_sort_flags_all(struct ref_sorting *sorting, unsigned int mask, int on);
 /*  Based on the given format and quote_style, fill the strbuf */
 int format_ref_array_item(struct ref_array_item *info,
-			  const struct ref_format *format,
+			  struct ref_format *format,
 			  struct strbuf *final_buf,
 			  struct strbuf *error_buf);
 /*  Parse a single sort specifier and add it to the list */
@@ -137,7 +137,7 @@ void setup_ref_filter_porcelain_msg(void);
  * name must be a fully qualified refname.
  */
 void pretty_print_ref(const char *name, const struct object_id *oid,
-		      const struct ref_format *format);
+		      struct ref_format *format);
 
 /*
  * Push a single ref onto the array; this can be used to construct your own
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v2 4/9] [GSOC] ref-filter: add %(rest) atom
  2021-06-15 13:28 ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                     ` (2 preceding siblings ...)
  2021-06-15 13:28   ` [PATCH v2 3/9] [GSOC] ref-filter: use non-const ref_format in *_atom_parser() ZheNing Hu via GitGitGadget
@ 2021-06-15 13:29   ` ZheNing Hu via GitGitGadget
  2021-06-15 13:29   ` [PATCH v2 5/9] [GSOC] ref-filter: teach get_object() return useful value ZheNing Hu via GitGitGadget
                     ` (7 subsequent siblings)
  11 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-15 13:29 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

In order to let "cat-file --batch=%(rest)" use the ref-filter
interface, add %(rest) atom for ref-filter. "git for-each-ref",
"git branch", "git tag" and "git verify-tag" will reject %(rest)
by default.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c             | 21 +++++++++++++++++++++
 ref-filter.h             |  5 ++++-
 t/t3203-branch-output.sh |  4 ++++
 t/t6300-for-each-ref.sh  |  4 ++++
 t/t7004-tag.sh           |  4 ++++
 t/t7030-verify-tag.sh    |  4 ++++
 6 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/ref-filter.c b/ref-filter.c
index af8c15aef44d..8868cf98f090 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -157,6 +157,7 @@ enum atom_type {
 	ATOM_IF,
 	ATOM_THEN,
 	ATOM_ELSE,
+	ATOM_REST,
 };
 
 /*
@@ -559,6 +560,15 @@ static int if_atom_parser(struct ref_format *format, struct used_atom *atom,
 	return 0;
 }
 
+static int rest_atom_parser(struct ref_format *format, struct used_atom *atom,
+			    const char *arg, struct strbuf *err)
+{
+	if (arg)
+		return strbuf_addf_ret(err, -1, _("%%(rest) does not take arguments"));
+	format->use_rest = 1;
+	return 0;
+}
+
 static int head_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *unused_err)
 {
@@ -615,6 +625,7 @@ static struct {
 	[ATOM_IF] = { "if", SOURCE_NONE, FIELD_STR, if_atom_parser },
 	[ATOM_THEN] = { "then", SOURCE_NONE },
 	[ATOM_ELSE] = { "else", SOURCE_NONE },
+	[ATOM_REST] = { "rest", SOURCE_NONE, FIELD_STR, rest_atom_parser },
 	/*
 	 * Please update $__git_ref_fieldlist in git-completion.bash
 	 * when you add new atoms
@@ -1006,6 +1017,9 @@ int verify_ref_format(struct ref_format *format)
 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
 		if (at < 0)
 			die("%s", err.buf);
+		if (used_atom[at].atom_type == ATOM_REST)
+			die("this command reject atom %%(%.*s)", (int)(ep - sp - 2), sp + 2);
+
 		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
 		    used_atom[at].u.raw_data.option == RAW_BARE)
 			die(_("--format=%.*s cannot be used with"
@@ -1920,6 +1934,12 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 			v->handler = else_atom_handler;
 			v->s = xstrdup("");
 			continue;
+		} else if (atom_type == ATOM_REST) {
+			if (ref->rest)
+				v->s = xstrdup(ref->rest);
+			else
+				v->s = xstrdup("");
+			continue;
 		} else
 			continue;
 
@@ -2137,6 +2157,7 @@ static struct ref_array_item *new_ref_array_item(const char *refname,
 
 	FLEX_ALLOC_STR(ref, refname, refname);
 	oidcpy(&ref->objectname, oid);
+	ref->rest = NULL;
 
 	return ref;
 }
diff --git a/ref-filter.h b/ref-filter.h
index 74fb423fc89f..9dc07476a584 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -38,6 +38,7 @@ struct ref_sorting {
 
 struct ref_array_item {
 	struct object_id objectname;
+	const char *rest;
 	int flag;
 	unsigned int kind;
 	const char *symref;
@@ -76,14 +77,16 @@ struct ref_format {
 	 * verify_ref_format() afterwards to finalize.
 	 */
 	const char *format;
+	const char *rest;
 	int quote_style;
+	int use_rest;
 	int use_color;
 
 	/* Internal state to ref-filter */
 	int need_color_reset_at_eol;
 };
 
-#define REF_FORMAT_INIT { NULL, 0, -1 }
+#define REF_FORMAT_INIT { NULL, NULL, 0, 0, -1 }
 
 /*  Macros for checking --merged and --no-merged options */
 #define _OPT_MERGED_NO_MERGED(option, filter, h) \
diff --git a/t/t3203-branch-output.sh b/t/t3203-branch-output.sh
index 5325b9f67a00..2780ec8803fd 100755
--- a/t/t3203-branch-output.sh
+++ b/t/t3203-branch-output.sh
@@ -340,6 +340,10 @@ test_expect_success 'git branch --format option' '
 	test_cmp expect actual
 '
 
+test_expect_success 'git branch with --format=%(rest) must failed' '
+	test_must_fail git branch --format="%(rest)" >actual
+'
+
 test_expect_success 'worktree colors correct' '
 	cat >expect <<-EOF &&
 	* <GREEN>(HEAD detached from fromtag)<RESET>
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index e2867de791e7..8c97c3b877c6 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -1187,6 +1187,10 @@ test_expect_success 'basic atom: head contents:trailers' '
 	test_cmp expect actual.clean
 '
 
+test_expect_success 'basic atom: rest must failed' '
+	test_must_fail git for-each-ref --format="%(rest)" refs/heads/main
+'
+
 test_expect_success 'trailer parsing not fooled by --- line' '
 	git commit --allow-empty -F - <<-\EOF &&
 	this is the subject
diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
index 2f72c5c6883e..9fc4c4323949 100755
--- a/t/t7004-tag.sh
+++ b/t/t7004-tag.sh
@@ -1998,6 +1998,10 @@ test_expect_success '--format should list tags as per format given' '
 	test_cmp expect actual
 '
 
+test_expect_success 'git tag -l with --format="%(rest)" must failed' '
+	test_must_fail git tag -l --format="%(rest)" "v1*"
+'
+
 test_expect_success "set up color tests" '
 	echo "<RED>v1.0<RESET>" >expect.color &&
 	echo "v1.0" >expect.bare &&
diff --git a/t/t7030-verify-tag.sh b/t/t7030-verify-tag.sh
index 3cefde9602bf..785b32eb88f9 100755
--- a/t/t7030-verify-tag.sh
+++ b/t/t7030-verify-tag.sh
@@ -194,6 +194,10 @@ test_expect_success GPG 'verifying tag with --format' '
 	test_cmp expect actual
 '
 
+test_expect_success GPG 'verifying tag with --format="%(rest)" must failed' '
+	test_must_fail git verify-tag --format="%(rest)" "fourth-signed"
+'
+
 test_expect_success GPG 'verifying a forged tag with --format should fail silently' '
 	test_must_fail git verify-tag --format="tagname : %(tag)" $(cat forged1.tag) >actual-forged &&
 	test_must_be_empty actual-forged
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v2 5/9] [GSOC] ref-filter: teach get_object() return useful value
  2021-06-15 13:28 ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                     ` (3 preceding siblings ...)
  2021-06-15 13:29   ` [PATCH v2 4/9] [GSOC] ref-filter: add %(rest) atom ZheNing Hu via GitGitGadget
@ 2021-06-15 13:29   ` ZheNing Hu via GitGitGadget
  2021-06-16  7:36     ` Junio C Hamano
  2021-06-15 13:29   ` [PATCH v2 6/9] [GSOC] ref-filter: introduce free_array_item_internal() function ZheNing Hu via GitGitGadget
                     ` (6 subsequent siblings)
  11 siblings, 1 reply; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-15 13:29 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Let `populate_value()`, `get_ref_atom_value()` and
`format_ref_array_item()` get the return value of `get_object()`
correctly. This can help us later let `cat-file --batch` get the
correct error message and return value of `get_object()`.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 8868cf98f090..420c0bf9384f 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1808,7 +1808,7 @@ static char *get_worktree_path(const struct used_atom *atom, const struct ref_ar
 static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 {
 	struct object *obj;
-	int i;
+	int i, ret = 0;
 	struct object_info empty = OBJECT_INFO_INIT;
 
 	CALLOC_ARRAY(ref->value, used_atom_cnt);
@@ -1965,8 +1965,8 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 
 
 	oi.oid = ref->objectname;
-	if (get_object(ref, 0, &obj, &oi, err))
-		return -1;
+	if ((ret = get_object(ref, 0, &obj, &oi, err)))
+		return ret;
 
 	/*
 	 * If there is no atom that wants to know about tagged
@@ -1997,9 +1997,11 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 static int get_ref_atom_value(struct ref_array_item *ref, int atom,
 			      struct atom_value **v, struct strbuf *err)
 {
+	int ret = 0;
+
 	if (!ref->value) {
-		if (populate_value(ref, err))
-			return -1;
+		if ((ret = populate_value(ref, err)))
+			return ret;
 		fill_missing_values(ref->value);
 	}
 	*v = &ref->value[atom];
@@ -2573,6 +2575,7 @@ int format_ref_array_item(struct ref_array_item *info,
 {
 	const char *cp, *sp, *ep;
 	struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
+	int ret = 0;
 
 	state.quote_style = format->quote_style;
 	push_stack_element(&state.stack);
@@ -2585,10 +2588,10 @@ int format_ref_array_item(struct ref_array_item *info,
 		if (cp < sp)
 			append_literal(cp, sp, &state);
 		pos = parse_ref_filter_atom(format, sp + 2, ep, error_buf);
-		if (pos < 0 || get_ref_atom_value(info, pos, &atomv, error_buf) ||
+		if (pos < 0 || (ret = get_ref_atom_value(info, pos, &atomv, error_buf)) ||
 		    atomv->handler(atomv, &state, error_buf)) {
 			pop_stack_element(&state.stack);
-			return -1;
+			return ret ? ret : -1;
 		}
 	}
 	if (*cp) {
@@ -2610,7 +2613,7 @@ int format_ref_array_item(struct ref_array_item *info,
 	}
 	strbuf_addbuf(final_buf, &state.stack->output);
 	pop_stack_element(&state.stack);
-	return 0;
+	return ret;
 }
 
 void pretty_print_ref(const char *name, const struct object_id *oid,
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v2 6/9] [GSOC] ref-filter: introduce free_array_item_internal() function
  2021-06-15 13:28 ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                     ` (4 preceding siblings ...)
  2021-06-15 13:29   ` [PATCH v2 5/9] [GSOC] ref-filter: teach get_object() return useful value ZheNing Hu via GitGitGadget
@ 2021-06-15 13:29   ` ZheNing Hu via GitGitGadget
  2021-06-16  7:49     ` Junio C Hamano
  2021-06-15 13:29   ` [PATCH v2 7/9] [GSOC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                     ` (5 subsequent siblings)
  11 siblings, 1 reply; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-15 13:29 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Introduce free_array_item_internal() for freeing ref_array_item value.
It will be called internally by free_array_item(), and it will help
`cat-file --batch` free ref_array_item's memory later.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 11 ++++++++---
 ref-filter.h |  2 ++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 420c0bf9384f..aa2ce634106b 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -2282,16 +2282,21 @@ static int ref_filter_handler(const char *refname, const struct object_id *oid,
 	return 0;
 }
 
-/*  Free memory allocated for a ref_array_item */
-static void free_array_item(struct ref_array_item *item)
+void free_array_item_internal(struct ref_array_item *item)
 {
-	free((char *)item->symref);
 	if (item->value) {
 		int i;
 		for (i = 0; i < used_atom_cnt; i++)
 			free((char *)item->value[i].s);
 		free(item->value);
 	}
+}
+
+/*  Free memory allocated for a ref_array_item */
+static void free_array_item(struct ref_array_item *item)
+{
+	free((char *)item->symref);
+	free_array_item_internal(item);
 	free(item);
 }
 
diff --git a/ref-filter.h b/ref-filter.h
index 9dc07476a584..d4531fef5f91 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -111,6 +111,8 @@ struct ref_format {
 int filter_refs(struct ref_array *array, struct ref_filter *filter, unsigned int type);
 /*  Clear all memory allocated to ref_array */
 void ref_array_clear(struct ref_array *array);
+/* Free array item's value */
+void free_array_item_internal(struct ref_array_item *item);
 /*  Used to verify if the given format is correct and to parse out the used atoms */
 int verify_ref_format(struct ref_format *format);
 /*  Sort the given ref_array as per the ref_sorting provided */
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v2 7/9] [GSOC] cat-file: reuse ref-filter logic
  2021-06-15 13:28 ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                     ` (5 preceding siblings ...)
  2021-06-15 13:29   ` [PATCH v2 6/9] [GSOC] ref-filter: introduce free_array_item_internal() function ZheNing Hu via GitGitGadget
@ 2021-06-15 13:29   ` ZheNing Hu via GitGitGadget
  2021-06-15 13:29   ` [PATCH v2 8/9] [GSOC] cat-file: reuse err buf in batch_objet_write() ZheNing Hu via GitGitGadget
                     ` (4 subsequent siblings)
  11 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-15 13:29 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

In order to let cat-file use ref-filter logic, the following
methods are used:

1. Add `cat_file_mode` member in struct `ref_format`, this can
help us reject atoms in verify_ref_format() which cat-file
cannot use, e.g. `%(refname)`, `%(push)`, `%(upstream)`...
2. Change the type of member `format` in struct `batch_options`
to `ref_format`, We can add format data in it.
3. Let `batch_objects()` add atoms to format, and use
`verify_ref_format()` to check atoms.
4. Use `has_object_file()` in `batch_one_object()` to check
whether the input object exists.
5. Use `format_ref_array_item()` in `batch_object_write()` to
get the formatted data corresponding to the object. If the
return value of `format_ref_array_item()` is equals to zero,
use `batch_write()` to print object data; else if the return
value less than zero, use `die()` to print the error message
and exit; else return value greater than zero, only print the
error message, but not exit.
6. Let get_object() return 1 and print "<oid> missing" instead
of returning -1 and printing "missing object <oid> for <refname>",
this can help `format_ref_array_item()` just report that the
object is missing without letting Git exit.

Most of the atoms in `for-each-ref --format` are now supported,
such as `%(tree)`, `%(parent)`, `%(author)`, `%(tagger)`, `%(if)`,
`%(then)`, `%(else)`, `%(end)`. But these atoms will be rejected:
`%(refname)`, `%(symref)`, `%(upstream)`, `%(push)`, `%(worktreepath)`,
`%(flag)`, `%(HEAD)`, because our objects don't have refname.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-cat-file.txt |   6 +
 builtin/cat-file.c             | 248 +++++++-------------------------
 ref-filter.c                   |  15 +-
 ref-filter.h                   |   3 +-
 t/t1006-cat-file.sh            | 252 +++++++++++++++++++++++++++++++++
 t/t6301-for-each-ref-errors.sh |   2 +-
 6 files changed, 323 insertions(+), 203 deletions(-)

diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index 4eb0421b3fd9..ef8ab952b2fa 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -226,6 +226,12 @@ newline. The available atoms are:
 	after that first run of whitespace (i.e., the "rest" of the
 	line) are output in place of the `%(rest)` atom.
 
+Note that most of the atoms in `for-each-ref --format` are now supported,
+such as `%(tree)`, `%(parent)`, `%(author)`, `%(tagger)`, `%(if)`,
+`%(then)`, `%(else)`, `%(end)`. But these atoms will be rejected:
+`%(refname)`, `%(symref)`, `%(upstream)`, `%(push)`, `%(worktreepath)`,
+`%(flag)`, `%(HEAD)`. See linkgit:git-for-each-ref[1].
+
 If no format is specified, the default format is `%(objectname)
 %(objecttype) %(objectsize)`.
 
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 5ebf13359e83..026d9405636a 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -16,6 +16,7 @@
 #include "packfile.h"
 #include "object-store.h"
 #include "promisor-remote.h"
+#include "ref-filter.h"
 
 struct batch_options {
 	int enabled;
@@ -25,7 +26,7 @@ struct batch_options {
 	int all_objects;
 	int unordered;
 	int cmdmode; /* may be 'w' or 'c' for --filters or --textconv */
-	const char *format;
+	struct ref_format format;
 };
 
 static const char *force_path;
@@ -195,99 +196,10 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
 
 struct expand_data {
 	struct object_id oid;
-	enum object_type type;
-	unsigned long size;
-	off_t disk_size;
 	const char *rest;
-	struct object_id delta_base_oid;
-
-	/*
-	 * If mark_query is true, we do not expand anything, but rather
-	 * just mark the object_info with items we wish to query.
-	 */
-	int mark_query;
-
-	/*
-	 * Whether to split the input on whitespace before feeding it to
-	 * get_sha1; this is decided during the mark_query phase based on
-	 * whether we have a %(rest) token in our format.
-	 */
 	int split_on_whitespace;
-
-	/*
-	 * After a mark_query run, this object_info is set up to be
-	 * passed to oid_object_info_extended. It will point to the data
-	 * elements above, so you can retrieve the response from there.
-	 */
-	struct object_info info;
-
-	/*
-	 * This flag will be true if the requested batch format and options
-	 * don't require us to call oid_object_info, which can then be
-	 * optimized out.
-	 */
-	unsigned skip_object_info : 1;
 };
 
-static int is_atom(const char *atom, const char *s, int slen)
-{
-	int alen = strlen(atom);
-	return alen == slen && !memcmp(atom, s, alen);
-}
-
-static void expand_atom(struct strbuf *sb, const char *atom, int len,
-			void *vdata)
-{
-	struct expand_data *data = vdata;
-
-	if (is_atom("objectname", atom, len)) {
-		if (!data->mark_query)
-			strbuf_addstr(sb, oid_to_hex(&data->oid));
-	} else if (is_atom("objecttype", atom, len)) {
-		if (data->mark_query)
-			data->info.typep = &data->type;
-		else
-			strbuf_addstr(sb, type_name(data->type));
-	} else if (is_atom("objectsize", atom, len)) {
-		if (data->mark_query)
-			data->info.sizep = &data->size;
-		else
-			strbuf_addf(sb, "%"PRIuMAX , (uintmax_t)data->size);
-	} else if (is_atom("objectsize:disk", atom, len)) {
-		if (data->mark_query)
-			data->info.disk_sizep = &data->disk_size;
-		else
-			strbuf_addf(sb, "%"PRIuMAX, (uintmax_t)data->disk_size);
-	} else if (is_atom("rest", atom, len)) {
-		if (data->mark_query)
-			data->split_on_whitespace = 1;
-		else if (data->rest)
-			strbuf_addstr(sb, data->rest);
-	} else if (is_atom("deltabase", atom, len)) {
-		if (data->mark_query)
-			data->info.delta_base_oid = &data->delta_base_oid;
-		else
-			strbuf_addstr(sb,
-				      oid_to_hex(&data->delta_base_oid));
-	} else
-		die("unknown format element: %.*s", len, atom);
-}
-
-static size_t expand_format(struct strbuf *sb, const char *start, void *data)
-{
-	const char *end;
-
-	if (*start != '(')
-		return 0;
-	end = strchr(start + 1, ')');
-	if (!end)
-		die("format element '%s' does not end in ')'", start);
-
-	expand_atom(sb, start + 1, end - start - 1, data);
-
-	return end - start + 1;
-}
-
 static void batch_write(struct batch_options *opt, const void *data, int len)
 {
 	if (opt->buffer_output) {
@@ -297,87 +209,31 @@ static void batch_write(struct batch_options *opt, const void *data, int len)
 		write_or_die(1, data, len);
 }
 
-static void print_object_or_die(struct batch_options *opt, struct expand_data *data)
-{
-	const struct object_id *oid = &data->oid;
-
-	assert(data->info.typep);
-
-	if (data->type == OBJ_BLOB) {
-		if (opt->buffer_output)
-			fflush(stdout);
-		if (opt->cmdmode) {
-			char *contents;
-			unsigned long size;
-
-			if (!data->rest)
-				die("missing path for '%s'", oid_to_hex(oid));
-
-			if (opt->cmdmode == 'w') {
-				if (filter_object(data->rest, 0100644, oid,
-						  &contents, &size))
-					die("could not convert '%s' %s",
-					    oid_to_hex(oid), data->rest);
-			} else if (opt->cmdmode == 'c') {
-				enum object_type type;
-				if (!textconv_object(the_repository,
-						     data->rest, 0100644, oid,
-						     1, &contents, &size))
-					contents = read_object_file(oid,
-								    &type,
-								    &size);
-				if (!contents)
-					die("could not convert '%s' %s",
-					    oid_to_hex(oid), data->rest);
-			} else
-				BUG("invalid cmdmode: %c", opt->cmdmode);
-			batch_write(opt, contents, size);
-			free(contents);
-		} else {
-			stream_blob(oid);
-		}
-	}
-	else {
-		enum object_type type;
-		unsigned long size;
-		void *contents;
-
-		contents = read_object_file(oid, &type, &size);
-		if (!contents)
-			die("object %s disappeared", oid_to_hex(oid));
-		if (type != data->type)
-			die("object %s changed type!?", oid_to_hex(oid));
-		if (data->info.sizep && size != data->size)
-			die("object %s changed size!?", oid_to_hex(oid));
-
-		batch_write(opt, contents, size);
-		free(contents);
-	}
-}
 
 static void batch_object_write(const char *obj_name,
 			       struct strbuf *scratch,
 			       struct batch_options *opt,
 			       struct expand_data *data)
 {
-	if (!data->skip_object_info &&
-	    oid_object_info_extended(the_repository, &data->oid, &data->info,
-				     OBJECT_INFO_LOOKUP_REPLACE) < 0) {
-		printf("%s missing\n",
-		       obj_name ? obj_name : oid_to_hex(&data->oid));
-		fflush(stdout);
-		return;
-	}
+	int ret = 0;
+	struct strbuf err = STRBUF_INIT;
+	struct ref_array_item item = { data->oid, data->rest };
 
 	strbuf_reset(scratch);
-	strbuf_expand(scratch, opt->format, expand_format, data);
-	strbuf_addch(scratch, '\n');
-	batch_write(opt, scratch->buf, scratch->len);
 
-	if (opt->print_contents) {
-		print_object_or_die(opt, data);
-		batch_write(opt, "\n", 1);
+	ret = format_ref_array_item(&item, &opt->format, scratch, &err);
+	if (!ret) {
+		strbuf_addch(scratch, '\n');
+		batch_write(opt, scratch->buf, scratch->len);
+	} else if (ret < 0) {
+		die("%s\n", err.buf);
+	} else {
+		/* when ret > 0 , don't call die and print the err to stdout*/
+		printf("%s\n", err.buf);
+		fflush(stdout);
 	}
+	free_array_item_internal(&item);
+	strbuf_release(&err);
 }
 
 static void batch_one_object(const char *obj_name,
@@ -428,6 +284,13 @@ static void batch_one_object(const char *obj_name,
 		return;
 	}
 
+	if (!has_object_file(&data->oid)) {
+		printf("%s missing\n",
+		       obj_name ? obj_name : oid_to_hex(&data->oid));
+		fflush(stdout);
+		return;
+	}
+
 	batch_object_write(obj_name, scratch, opt, data);
 }
 
@@ -488,42 +351,34 @@ static int batch_unordered_packed(const struct object_id *oid,
 	return batch_unordered_object(oid, data);
 }
 
-static int batch_objects(struct batch_options *opt)
+static const char * const cat_file_usage[] = {
+	N_("git cat-file (-t [--allow-unknown-type] | -s [--allow-unknown-type] | -e | -p | <type> | --textconv | --filters) [--path=<path>] <object>"),
+	N_("git cat-file (--batch[=<format>] | --batch-check[=<format>]) [--follow-symlinks] [--textconv | --filters]"),
+	NULL
+};
+
+static int batch_objects(struct batch_options *opt, const struct option *options)
 {
 	struct strbuf input = STRBUF_INIT;
 	struct strbuf output = STRBUF_INIT;
+	struct strbuf format = STRBUF_INIT;
 	struct expand_data data;
 	int save_warning;
 	int retval = 0;
 
-	if (!opt->format)
-		opt->format = "%(objectname) %(objecttype) %(objectsize)";
-
-	/*
-	 * Expand once with our special mark_query flag, which will prime the
-	 * object_info to be handed to oid_object_info_extended for each
-	 * object.
-	 */
 	memset(&data, 0, sizeof(data));
-	data.mark_query = 1;
-	strbuf_expand(&output, opt->format, expand_format, &data);
-	data.mark_query = 0;
-	strbuf_release(&output);
-	if (opt->cmdmode)
-		data.split_on_whitespace = 1;
-
-	if (opt->all_objects) {
-		struct object_info empty = OBJECT_INFO_INIT;
-		if (!memcmp(&data.info, &empty, sizeof(empty)))
-			data.skip_object_info = 1;
-	}
-
-	/*
-	 * If we are printing out the object, then always fill in the type,
-	 * since we will want to decide whether or not to stream.
-	 */
+	if (!opt->format.format)
+		strbuf_addstr(&format, "%(objectname) %(objecttype) %(objectsize)");
+	else
+		strbuf_addstr(&format, opt->format.format);
 	if (opt->print_contents)
-		data.info.typep = &data.type;
+		strbuf_addstr(&format, "\n%(raw)");
+	opt->format.format = format.buf;
+	if (verify_ref_format(&opt->format))
+		usage_with_options(cat_file_usage, options);
+
+	if (opt->cmdmode || opt->format.use_rest)
+		data.split_on_whitespace = 1;
 
 	if (opt->all_objects) {
 		struct object_cb_data cb;
@@ -556,6 +411,7 @@ static int batch_objects(struct batch_options *opt)
 			oid_array_clear(&sa);
 		}
 
+		strbuf_release(&format);
 		strbuf_release(&output);
 		return 0;
 	}
@@ -588,18 +444,13 @@ static int batch_objects(struct batch_options *opt)
 		batch_one_object(input.buf, &output, opt, &data);
 	}
 
+	strbuf_release(&format);
 	strbuf_release(&input);
 	strbuf_release(&output);
 	warn_on_object_refname_ambiguity = save_warning;
 	return retval;
 }
 
-static const char * const cat_file_usage[] = {
-	N_("git cat-file (-t [--allow-unknown-type] | -s [--allow-unknown-type] | -e | -p | <type> | --textconv | --filters) [--path=<path>] <object>"),
-	N_("git cat-file (--batch[=<format>] | --batch-check[=<format>]) [--follow-symlinks] [--textconv | --filters]"),
-	NULL
-};
-
 static int git_cat_file_config(const char *var, const char *value, void *cb)
 {
 	if (userdiff_config(var, value) < 0)
@@ -622,7 +473,7 @@ static int batch_option_callback(const struct option *opt,
 
 	bo->enabled = 1;
 	bo->print_contents = !strcmp(opt->long_name, "batch");
-	bo->format = arg;
+	bo->format.format = arg;
 
 	return 0;
 }
@@ -631,7 +482,9 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 {
 	int opt = 0;
 	const char *exp_type = NULL, *obj_name = NULL;
-	struct batch_options batch = {0};
+	struct batch_options batch = {
+		.format = REF_FORMAT_INIT
+	};
 	int unknown_type = 0;
 
 	const struct option options[] = {
@@ -670,6 +523,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 	git_config(git_cat_file_config, NULL);
 
 	batch.buffer_output = -1;
+	batch.format.cat_file_mode = 1;
 	argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
 
 	if (opt) {
@@ -713,7 +567,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		batch.buffer_output = batch.all_objects;
 
 	if (batch.enabled)
-		return batch_objects(&batch);
+		return batch_objects(&batch, options);
 
 	if (unknown_type && opt != 't' && opt != 's')
 		die("git cat-file --allow-unknown-type: use with -s or -t");
diff --git a/ref-filter.c b/ref-filter.c
index aa2ce634106b..660e52f97f79 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1017,8 +1017,15 @@ int verify_ref_format(struct ref_format *format)
 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
 		if (at < 0)
 			die("%s", err.buf);
-		if (used_atom[at].atom_type == ATOM_REST)
-			die("this command reject atom %%(%.*s)", (int)(ep - sp - 2), sp + 2);
+		if ((!format->cat_file_mode && used_atom[at].atom_type == ATOM_REST) ||
+		    (format->cat_file_mode && (used_atom[at].atom_type == ATOM_FLAG ||
+					       used_atom[at].atom_type == ATOM_HEAD ||
+					       used_atom[at].atom_type == ATOM_PUSH ||
+					       used_atom[at].atom_type == ATOM_REFNAME ||
+					       used_atom[at].atom_type == ATOM_SYMREF ||
+					       used_atom[at].atom_type == ATOM_UPSTREAM ||
+					       used_atom[at].atom_type == ATOM_WORKTREEPATH)))
+			die(_("this command reject atom %%(%.*s)"), (int)(ep - sp - 2), sp + 2);
 
 		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
 		    used_atom[at].u.raw_data.option == RAW_BARE)
@@ -1735,8 +1742,8 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 	}
 	if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
 				     OBJECT_INFO_LOOKUP_REPLACE))
-		return strbuf_addf_ret(err, -1, _("missing object %s for %s"),
-				       oid_to_hex(&oi->oid), ref->refname);
+		return strbuf_addf_ret(err, 1, _("%s missing"),
+				       oid_to_hex(&oi->oid));
 	if (oi->info.disk_sizep && oi->disk_size < 0)
 		BUG("Object size is less than zero.");
 
diff --git a/ref-filter.h b/ref-filter.h
index d4531fef5f91..3db07729f46d 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -78,6 +78,7 @@ struct ref_format {
 	 */
 	const char *format;
 	const char *rest;
+	int cat_file_mode;
 	int quote_style;
 	int use_rest;
 	int use_color;
@@ -86,7 +87,7 @@ struct ref_format {
 	int need_color_reset_at_eol;
 };
 
-#define REF_FORMAT_INIT { NULL, NULL, 0, 0, -1 }
+#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }
 
 /*  Macros for checking --merged and --no-merged options */
 #define _OPT_MERGED_NO_MERGED(option, filter, h) \
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 5d2dc99b74ad..5efa7397cfbc 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -586,4 +586,256 @@ test_expect_success 'cat-file --unordered works' '
 	test_cmp expect actual
 '
 
+. "$TEST_DIRECTORY"/lib-gpg.sh
+. "$TEST_DIRECTORY"/lib-terminal.sh
+
+test_expect_success 'cat-file --batch|--batch-check setup' '
+	echo 1>blob1 &&
+	printf "a\0b\0\c" >blob2 &&
+	git add blob1 blob2 &&
+	git commit -m "Commit Message" &&
+	git branch -M main &&
+	git tag -a -m "v0.0.0" testtag &&
+	git update-ref refs/myblobs/blob1 HEAD:blob1 &&
+	git update-ref refs/myblobs/blob2 HEAD:blob2 &&
+	git update-ref refs/mytrees/tree1 HEAD^{tree}
+'
+
+batch_test_atom() {
+	if test "$3" = "fail"
+	then
+		test_expect_${4:-success} $PREREQ "basic atom: $1 $2 mast failed" "
+			test_must_fail git cat-file --batch-check='$2' >bad <<-EOF
+			$1
+			EOF
+		"
+	else
+		test_expect_${4:-success} $PREREQ "basic atom: $1 $2" "
+			git for-each-ref --format='$2' $1 >expected &&
+			git cat-file --batch-check='$2' >actual <<-EOF &&
+			$1
+			EOF
+			sanitize_pgp <actual >actual.clean &&
+			cmp expected actual.clean
+		"
+	fi
+}
+
+batch_test_atom refs/heads/main '%(refname)' fail
+batch_test_atom refs/heads/main '%(refname:)' fail
+batch_test_atom refs/heads/main '%(refname:short)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=1)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=2)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=-1)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=-2)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=1)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=2)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=-1)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=-2)' fail
+batch_test_atom refs/heads/main '%(refname:strip=1)' fail
+batch_test_atom refs/heads/main '%(refname:strip=2)' fail
+batch_test_atom refs/heads/main '%(refname:strip=-1)' fail
+batch_test_atom refs/heads/main '%(refname:strip=-2)' fail
+batch_test_atom refs/heads/main '%(upstream)' fail
+batch_test_atom refs/heads/main '%(upstream:short)' fail
+batch_test_atom refs/heads/main '%(upstream:lstrip=2)' fail
+batch_test_atom refs/heads/main '%(upstream:lstrip=-2)' fail
+batch_test_atom refs/heads/main '%(upstream:rstrip=2)' fail
+batch_test_atom refs/heads/main '%(upstream:rstrip=-2)' fail
+batch_test_atom refs/heads/main '%(upstream:strip=2)' fail
+batch_test_atom refs/heads/main '%(upstream:strip=-2)' fail
+batch_test_atom refs/heads/main '%(push)' fail
+batch_test_atom refs/heads/main '%(push:short)' fail
+batch_test_atom refs/heads/main '%(push:lstrip=1)' fail
+batch_test_atom refs/heads/main '%(push:lstrip=-1)' fail
+batch_test_atom refs/heads/main '%(push:rstrip=1)' fail
+batch_test_atom refs/heads/main '%(push:rstrip=-1)' fail
+batch_test_atom refs/heads/main '%(push:strip=1)' fail
+batch_test_atom refs/heads/main '%(push:strip=-1)' fail
+batch_test_atom refs/heads/main '%(objecttype)'
+batch_test_atom refs/heads/main '%(objectsize)'
+batch_test_atom refs/heads/main '%(objectsize:disk)'
+batch_test_atom refs/heads/main '%(deltabase)'
+batch_test_atom refs/heads/main '%(objectname)'
+batch_test_atom refs/heads/main '%(objectname:short)'
+batch_test_atom refs/heads/main '%(objectname:short=1)'
+batch_test_atom refs/heads/main '%(objectname:short=10)'
+batch_test_atom refs/heads/main '%(tree)'
+batch_test_atom refs/heads/main '%(tree:short)'
+batch_test_atom refs/heads/main '%(tree:short=1)'
+batch_test_atom refs/heads/main '%(tree:short=10)'
+batch_test_atom refs/heads/main '%(parent)'
+batch_test_atom refs/heads/main '%(parent:short)'
+batch_test_atom refs/heads/main '%(parent:short=1)'
+batch_test_atom refs/heads/main '%(parent:short=10)'
+batch_test_atom refs/heads/main '%(numparent)'
+batch_test_atom refs/heads/main '%(object)'
+batch_test_atom refs/heads/main '%(type)'
+batch_test_atom refs/heads/main '%(raw)'
+batch_test_atom refs/heads/main '%(*objectname)'
+batch_test_atom refs/heads/main '%(*objecttype)'
+batch_test_atom refs/heads/main '%(author)'
+batch_test_atom refs/heads/main '%(authorname)'
+batch_test_atom refs/heads/main '%(authoremail)'
+batch_test_atom refs/heads/main '%(authoremail:trim)'
+batch_test_atom refs/heads/main '%(authoremail:localpart)'
+batch_test_atom refs/heads/main '%(authordate)'
+batch_test_atom refs/heads/main '%(committer)'
+batch_test_atom refs/heads/main '%(committername)'
+batch_test_atom refs/heads/main '%(committeremail)'
+batch_test_atom refs/heads/main '%(committeremail:trim)'
+batch_test_atom refs/heads/main '%(committeremail:localpart)'
+batch_test_atom refs/heads/main '%(committerdate)'
+batch_test_atom refs/heads/main '%(tag)'
+batch_test_atom refs/heads/main '%(tagger)'
+batch_test_atom refs/heads/main '%(taggername)'
+batch_test_atom refs/heads/main '%(taggeremail)'
+batch_test_atom refs/heads/main '%(taggeremail:trim)'
+batch_test_atom refs/heads/main '%(taggeremail:localpart)'
+batch_test_atom refs/heads/main '%(taggerdate)'
+batch_test_atom refs/heads/main '%(creator)'
+batch_test_atom refs/heads/main '%(creatordate)'
+batch_test_atom refs/heads/main '%(subject)'
+batch_test_atom refs/heads/main '%(subject:sanitize)'
+batch_test_atom refs/heads/main '%(contents:subject)'
+batch_test_atom refs/heads/main '%(body)'
+batch_test_atom refs/heads/main '%(contents:body)'
+batch_test_atom refs/heads/main '%(contents:signature)'
+batch_test_atom refs/heads/main '%(contents)'
+batch_test_atom refs/heads/main '%(HEAD)' fail
+batch_test_atom refs/heads/main '%(upstream:track)' fail
+batch_test_atom refs/heads/main '%(upstream:trackshort)' fail
+batch_test_atom refs/heads/main '%(upstream:track,nobracket)' fail
+batch_test_atom refs/heads/main '%(upstream:nobracket,track)' fail
+batch_test_atom refs/heads/main '%(push:track)' fail
+batch_test_atom refs/heads/main '%(push:trackshort)' fail
+batch_test_atom refs/heads/main '%(worktreepath)' fail
+batch_test_atom refs/heads/main '%(symref)' fail
+batch_test_atom refs/heads/main '%(flag)' fail
+
+batch_test_atom refs/tags/testtag '%(refname)' fail
+batch_test_atom refs/tags/testtag '%(refname:short)' fail
+batch_test_atom refs/tags/testtag '%(upstream)' fail
+batch_test_atom refs/tags/testtag '%(push)' fail
+batch_test_atom refs/tags/testtag '%(objecttype)'
+batch_test_atom refs/tags/testtag '%(objectsize)'
+batch_test_atom refs/tags/testtag '%(objectsize:disk)'
+batch_test_atom refs/tags/testtag '%(*objectsize:disk)'
+batch_test_atom refs/tags/testtag '%(deltabase)'
+batch_test_atom refs/tags/testtag '%(*deltabase)'
+batch_test_atom refs/tags/testtag '%(objectname)'
+batch_test_atom refs/tags/testtag '%(objectname:short)'
+batch_test_atom refs/tags/testtag '%(tree)'
+batch_test_atom refs/tags/testtag '%(tree:short)'
+batch_test_atom refs/tags/testtag '%(tree:short=1)'
+batch_test_atom refs/tags/testtag '%(tree:short=10)'
+batch_test_atom refs/tags/testtag '%(parent)'
+batch_test_atom refs/tags/testtag '%(parent:short)'
+batch_test_atom refs/tags/testtag '%(parent:short=1)'
+batch_test_atom refs/tags/testtag '%(parent:short=10)'
+batch_test_atom refs/tags/testtag '%(numparent)'
+batch_test_atom refs/tags/testtag '%(object)'
+batch_test_atom refs/tags/testtag '%(type)'
+batch_test_atom refs/tags/testtag '%(*objectname)'
+batch_test_atom refs/tags/testtag '%(*objecttype)'
+batch_test_atom refs/tags/testtag '%(author)'
+batch_test_atom refs/tags/testtag '%(authorname)'
+batch_test_atom refs/tags/testtag '%(authoremail)'
+batch_test_atom refs/tags/testtag '%(authoremail:trim)'
+batch_test_atom refs/tags/testtag '%(authoremail:localpart)'
+batch_test_atom refs/tags/testtag '%(authordate)'
+batch_test_atom refs/tags/testtag '%(committer)'
+batch_test_atom refs/tags/testtag '%(committername)'
+batch_test_atom refs/tags/testtag '%(committeremail)'
+batch_test_atom refs/tags/testtag '%(committeremail:trim)'
+batch_test_atom refs/tags/testtag '%(committeremail:localpart)'
+batch_test_atom refs/tags/testtag '%(committerdate)'
+batch_test_atom refs/tags/testtag '%(tag)'
+batch_test_atom refs/tags/testtag '%(tagger)'
+batch_test_atom refs/tags/testtag '%(taggername)'
+batch_test_atom refs/tags/testtag '%(taggeremail)'
+batch_test_atom refs/tags/testtag '%(taggeremail:trim)'
+batch_test_atom refs/tags/testtag '%(taggeremail:localpart)'
+batch_test_atom refs/tags/testtag '%(taggerdate)'
+batch_test_atom refs/tags/testtag '%(creator)'
+batch_test_atom refs/tags/testtag '%(creatordate)'
+batch_test_atom refs/tags/testtag '%(subject)'
+batch_test_atom refs/tags/testtag '%(subject:sanitize)'
+batch_test_atom refs/tags/testtag '%(contents:subject)'
+batch_test_atom refs/tags/testtag '%(body)'
+batch_test_atom refs/tags/testtag '%(contents:body)'
+batch_test_atom refs/tags/testtag '%(contents:signature)'
+batch_test_atom refs/tags/testtag '%(contents)'
+batch_test_atom refs/tags/testtag '%(HEAD)' fail
+
+batch_test_atom refs/myblobs/blob1 '%(refname)' fail
+batch_test_atom refs/myblobs/blob1 '%(upstream)' fail
+batch_test_atom refs/myblobs/blob1 '%(push)' fail
+batch_test_atom refs/myblobs/blob1 '%(HEAD)' fail
+
+batch_test_atom refs/myblobs/blob1 '%(objectname)'
+batch_test_atom refs/myblobs/blob1 '%(objecttype)'
+batch_test_atom refs/myblobs/blob1 '%(objectsize)'
+batch_test_atom refs/myblobs/blob1 '%(objectsize:disk)'
+batch_test_atom refs/myblobs/blob1 '%(deltabase)'
+
+batch_test_atom refs/myblobs/blob1 '%(contents)'
+batch_test_atom refs/myblobs/blob2 '%(contents)'
+
+batch_test_atom refs/myblobs/blob1 '%(raw)'
+batch_test_atom refs/myblobs/blob2 '%(raw)'
+batch_test_atom refs/mytrees/tree1 '%(raw)'
+
+batch_test_atom refs/myblobs/blob1 '%(raw:size)'
+batch_test_atom refs/myblobs/blob2 '%(raw:size)'
+batch_test_atom refs/mytrees/tree1 '%(raw:size)'
+
+batch_test_atom refs/myblobs/blob1 '%(if:equals=blob)%(objecttype)%(then)commit%(else)not commit%(end)'
+batch_test_atom refs/myblobs/blob2 '%(if:equals=blob)%(objecttype)%(then)commit%(else)not commit%(end)'
+batch_test_atom refs/mytrees/tree1 '%(if:equals=tree)%(objecttype)%(then)tree%(else)not tree%(end)'
+
+batch_test_atom refs/heads/main '%(align:60) objectname is %(objectname)%(end)|%(objectname)'
+batch_test_atom refs/heads/main '%(align:left,60) objectname is %(objectname)%(end)|%(objectname)'
+batch_test_atom refs/heads/main '%(align:middle,60) objectname is %(objectname)%(end)|%(objectname)'
+batch_test_atom refs/heads/main '%(align:60,right) objectname is %(objectname)%(end)|%(objectname)'
+
+batch_test_atom refs/heads/main 'VALID'
+batch_test_atom refs/heads/main '%(INVALID)' fail
+batch_test_atom refs/heads/main '%(authordate:INVALID)' fail
+
+test_expect_success 'cat-file refs/heads/main refs/tags/testtag %(rest)' '
+	cat >expected <<-EOF &&
+	123 commit 123
+	456 tag 456
+	EOF
+	git cat-file --batch-check="%(rest) %(objecttype) %(rest)" >actual <<-EOF &&
+	refs/heads/main 123
+	refs/tags/testtag 456
+	EOF
+	test_cmp expected actual
+'
+
+batch_test_atom refs/heads/main '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+batch_test_atom refs/tags/testtag '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+batch_test_atom refs/myblobs/blob1 '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+batch_test_atom refs/myblobs/blob2 '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+
+
+test_expect_success 'cat-file --batch equals to --batch-check with atoms' '
+	git cat-file --batch-check="%(objectname) %(objecttype) %(objectsize)
+%(raw)" >expected <<-EOF &&
+	refs/heads/main
+	refs/tags/testtag
+	EOF
+	git cat-file --batch >actual <<-EOF &&
+	refs/heads/main
+	refs/tags/testtag
+	EOF
+	cmp expected actual
+'
+
 test_done
diff --git a/t/t6301-for-each-ref-errors.sh b/t/t6301-for-each-ref-errors.sh
index 40edf9dab534..3553f84a00c1 100755
--- a/t/t6301-for-each-ref-errors.sh
+++ b/t/t6301-for-each-ref-errors.sh
@@ -41,7 +41,7 @@ test_expect_success 'Missing objects are reported correctly' '
 	r=refs/heads/missing &&
 	echo $MISSING >.git/$r &&
 	test_when_finished "rm -f .git/$r" &&
-	echo "fatal: missing object $MISSING for $r" >missing-err &&
+	echo "fatal: $MISSING missing" >missing-err &&
 	test_must_fail git for-each-ref 2>err &&
 	test_cmp missing-err err &&
 	(
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v2 8/9] [GSOC] cat-file: reuse err buf in batch_objet_write()
  2021-06-15 13:28 ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                     ` (6 preceding siblings ...)
  2021-06-15 13:29   ` [PATCH v2 7/9] [GSOC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
@ 2021-06-15 13:29   ` ZheNing Hu via GitGitGadget
  2021-06-15 13:29   ` [PATCH v2 9/9] [GSOC] cat-file: re-implement --textconv, --filters options ZheNing Hu via GitGitGadget
                     ` (3 subsequent siblings)
  11 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-15 13:29 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Reuse the `err` buffer in batch_object_write(), as the
buffer `scratch` does. This will reduce the overhead
of multiple allocations of memory of the err buffer.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/cat-file.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 026d9405636a..6766efb06f41 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -212,32 +212,33 @@ static void batch_write(struct batch_options *opt, const void *data, int len)
 
 static void batch_object_write(const char *obj_name,
 			       struct strbuf *scratch,
+			       struct strbuf *err,
 			       struct batch_options *opt,
 			       struct expand_data *data)
 {
 	int ret = 0;
-	struct strbuf err = STRBUF_INIT;
 	struct ref_array_item item = { data->oid, data->rest };
 
 	strbuf_reset(scratch);
+	strbuf_reset(err);
 
-	ret = format_ref_array_item(&item, &opt->format, scratch, &err);
+	ret = format_ref_array_item(&item, &opt->format, scratch, err);
 	if (!ret) {
 		strbuf_addch(scratch, '\n');
 		batch_write(opt, scratch->buf, scratch->len);
 	} else if (ret < 0) {
-		die("%s\n", err.buf);
+		die("%s\n", err->buf);
 	} else {
 		/* when ret > 0 , don't call die and print the err to stdout*/
-		printf("%s\n", err.buf);
+		printf("%s\n", err->buf);
 		fflush(stdout);
 	}
 	free_array_item_internal(&item);
-	strbuf_release(&err);
 }
 
 static void batch_one_object(const char *obj_name,
 			     struct strbuf *scratch,
+			     struct strbuf *err,
 			     struct batch_options *opt,
 			     struct expand_data *data)
 {
@@ -291,7 +292,7 @@ static void batch_one_object(const char *obj_name,
 		return;
 	}
 
-	batch_object_write(obj_name, scratch, opt, data);
+	batch_object_write(obj_name, scratch, err, opt, data);
 }
 
 struct object_cb_data {
@@ -299,13 +300,14 @@ struct object_cb_data {
 	struct expand_data *expand;
 	struct oidset *seen;
 	struct strbuf *scratch;
+	struct strbuf *err;
 };
 
 static int batch_object_cb(const struct object_id *oid, void *vdata)
 {
 	struct object_cb_data *data = vdata;
 	oidcpy(&data->expand->oid, oid);
-	batch_object_write(NULL, data->scratch, data->opt, data->expand);
+	batch_object_write(NULL, data->scratch, data->err, data->opt, data->expand);
 	return 0;
 }
 
@@ -361,6 +363,7 @@ static int batch_objects(struct batch_options *opt, const struct option *options
 {
 	struct strbuf input = STRBUF_INIT;
 	struct strbuf output = STRBUF_INIT;
+	struct strbuf err = STRBUF_INIT;
 	struct strbuf format = STRBUF_INIT;
 	struct expand_data data;
 	int save_warning;
@@ -389,6 +392,7 @@ static int batch_objects(struct batch_options *opt, const struct option *options
 		cb.opt = opt;
 		cb.expand = &data;
 		cb.scratch = &output;
+		cb.err = &err;
 
 		if (opt->unordered) {
 			struct oidset seen = OIDSET_INIT;
@@ -413,6 +417,7 @@ static int batch_objects(struct batch_options *opt, const struct option *options
 
 		strbuf_release(&format);
 		strbuf_release(&output);
+		strbuf_release(&err);
 		return 0;
 	}
 
@@ -441,12 +446,13 @@ static int batch_objects(struct batch_options *opt, const struct option *options
 			data.rest = p;
 		}
 
-		batch_one_object(input.buf, &output, opt, &data);
+		batch_one_object(input.buf, &output, &err, opt, &data);
 	}
 
 	strbuf_release(&format);
 	strbuf_release(&input);
 	strbuf_release(&output);
+	strbuf_release(&err);
 	warn_on_object_refname_ambiguity = save_warning;
 	return retval;
 }
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v2 9/9] [GSOC] cat-file: re-implement --textconv, --filters options
  2021-06-15 13:28 ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                     ` (7 preceding siblings ...)
  2021-06-15 13:29   ` [PATCH v2 8/9] [GSOC] cat-file: reuse err buf in batch_objet_write() ZheNing Hu via GitGitGadget
@ 2021-06-15 13:29   ` ZheNing Hu via GitGitGadget
  2021-06-16  7:29   ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic Junio C Hamano
                     ` (2 subsequent siblings)
  11 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-15 13:29 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, ZheNing Hu, ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

After cat-file reuses the ref-filter logic, we re-implement the
functions of --textconv and --filters options.

Add members `use_textconv` and `use_filters` in struct `ref_format`,
and use global variables `use_filters` and `use_textconv` in
`ref-filter.c`, so that we can filter the content of the object
in get_object(). Use `actual_oi` to record the real expand_data:
it may point to the original `oi` or the `act_oi` processed by
`textconv_object()` or `convert_to_working_tree()`. `grab_values()`
will grab the contents of `actual_oi` and `grab_common_values()`
to grab the contents of origin `oi`, this ensures that `%(objectsize)`
still uses the size of the unfiltered data.

In `get_object()`, we made an optimization: Firstly, get the size and
type of the object instead of directly getting the object data.
If using --textconv, after successfully obtaining the filtered object
data, an extra oid_object_info_extended() will be skipped, which can
reduce the cost of object data copy; If using --filter, the data of
the object first will be getted first, and then convert_to_working_tree()
will be used to get the filtered object data.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/cat-file.c |  5 ++++
 ref-filter.c       | 66 ++++++++++++++++++++++++++++++++++++++++++++--
 ref-filter.h       |  4 ++-
 3 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 6766efb06f41..2978b8990e8d 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -377,6 +377,11 @@ static int batch_objects(struct batch_options *opt, const struct option *options
 	if (opt->print_contents)
 		strbuf_addstr(&format, "\n%(raw)");
 	opt->format.format = format.buf;
+	if (opt->cmdmode == 'c')
+		opt->format.use_textconv = 1;
+	if (opt->cmdmode == 'w')
+		opt->format.use_filters = 1;
+
 	if (verify_ref_format(&opt->format))
 		usage_with_options(cat_file_usage, options);
 
diff --git a/ref-filter.c b/ref-filter.c
index 660e52f97f79..5e4aaa94db8a 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1,3 +1,4 @@
+#define USE_THE_INDEX_COMPATIBILITY_MACROS
 #include "builtin.h"
 #include "cache.h"
 #include "parse-options.h"
@@ -84,6 +85,9 @@ static struct expand_data {
 	struct object_info info;
 } oi, oi_deref;
 
+int use_filters;
+int use_textconv;
+
 struct ref_to_worktree_entry {
 	struct hashmap_entry ent;
 	struct worktree *wt; /* key is wt->head_ref */
@@ -1027,6 +1031,9 @@ int verify_ref_format(struct ref_format *format)
 					       used_atom[at].atom_type == ATOM_WORKTREEPATH)))
 			die(_("this command reject atom %%(%.*s)"), (int)(ep - sp - 2), sp + 2);
 
+		use_filters = format->use_filters;
+		use_textconv = format->use_textconv;
+
 		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
 		    used_atom[at].u.raw_data.option == RAW_BARE)
 			die(_("--format=%.*s cannot be used with"
@@ -1735,10 +1742,41 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 {
 	/* parse_object_buffer() will set eaten to 0 if free() will be needed */
 	int eaten = 1;
+	struct expand_data *actual_oi = oi;
+	struct expand_data act_oi = {0};
+
 	if (oi->info.contentp) {
 		/* We need to know that to use parse_object_buffer properly */
+		void **temp_contentp = oi->info.contentp;
+		oi->info.contentp = NULL;
 		oi->info.sizep = &oi->size;
 		oi->info.typep = &oi->type;
+
+		/* get the type and size */
+		if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
+					OBJECT_INFO_LOOKUP_REPLACE))
+			return strbuf_addf_ret(err, 1, _("%s missing"),
+					       oid_to_hex(&oi->oid));
+
+		oi->info.sizep = NULL;
+		oi->info.typep = NULL;
+		oi->info.contentp = temp_contentp;
+
+		if (use_textconv) {
+			act_oi = *oi;
+
+			if(!ref->rest)
+				return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
+						       oid_to_hex(&act_oi.oid));
+			if (act_oi.type == OBJ_BLOB) {
+				if (textconv_object(the_repository,
+						    ref->rest, 0100644, &act_oi.oid,
+						    1, (char **)(&act_oi.content), &act_oi.size)) {
+					actual_oi = &act_oi;
+					goto success;
+				}
+			}
+		}
 	}
 	if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
 				     OBJECT_INFO_LOOKUP_REPLACE))
@@ -1748,19 +1786,43 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 		BUG("Object size is less than zero.");
 
 	if (oi->info.contentp) {
-		*obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten);
+		if (use_filters) {
+			if(!ref->rest)
+				return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
+						       oid_to_hex(&oi->oid));
+			if (oi->type == OBJ_BLOB) {
+				struct strbuf strbuf = STRBUF_INIT;
+				struct checkout_metadata meta;
+				act_oi = *oi;
+
+				init_checkout_metadata(&meta, NULL, NULL, &act_oi.oid);
+				if (convert_to_working_tree(&the_index, ref->rest, act_oi.content, act_oi.size, &strbuf, &meta)) {
+					act_oi.size = strbuf.len;
+					act_oi.content = strbuf_detach(&strbuf, NULL);
+					actual_oi = &act_oi;
+				} else {
+					die("could not convert '%s' %s",
+					    oid_to_hex(&oi->oid), ref->rest);
+				}
+			}
+		}
+
+success:
+		*obj = parse_object_buffer(the_repository, &actual_oi->oid, actual_oi->type, actual_oi->size, actual_oi->content, &eaten);
 		if (!*obj) {
 			if (!eaten)
 				free(oi->content);
 			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
 					       oid_to_hex(&oi->oid), ref->refname);
 		}
-		grab_values(ref->value, deref, *obj, oi);
+		grab_values(ref->value, deref, *obj, actual_oi);
 	}
 
 	grab_common_values(ref->value, deref, oi);
 	if (!eaten)
 		free(oi->content);
+	if (actual_oi != oi)
+		free(actual_oi->content);
 	return 0;
 }
 
diff --git a/ref-filter.h b/ref-filter.h
index 3db07729f46d..56a5ec4c9eaa 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -80,6 +80,8 @@ struct ref_format {
 	const char *rest;
 	int cat_file_mode;
 	int quote_style;
+	int use_textconv;
+	int use_filters;
 	int use_rest;
 	int use_color;
 
@@ -87,7 +89,7 @@ struct ref_format {
 	int need_color_reset_at_eol;
 };
 
-#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }
+#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, 0, 0, -1 }
 
 /*  Macros for checking --merged and --no-merged options */
 #define _OPT_MERGED_NO_MERGED(option, filter, h) \
-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic
  2021-06-15 13:28 ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                     ` (8 preceding siblings ...)
  2021-06-15 13:29   ` [PATCH v2 9/9] [GSOC] cat-file: re-implement --textconv, --filters options ZheNing Hu via GitGitGadget
@ 2021-06-16  7:29   ` Junio C Hamano
  2021-06-17  6:07     ` ZheNing Hu
  2021-06-17  7:26   ` Ævar Arnfjörð Bjarmason
  2021-06-19  7:02   ` [PATCH v3 00/10] " ZheNing Hu via GitGitGadget
  11 siblings, 1 reply; 121+ messages in thread
From: Junio C Hamano @ 2021-06-16  7:29 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget
  Cc: git, Christian Couder, Hariom Verma, Bagas Sanjaya, Jeff King,
	ZheNing Hu

"ZheNing Hu via GitGitGadget" <gitgitgadget@gmail.com> writes:

> This patch series make cat-file reuse ref-filter logic, which based on
> 5a5b5f78 ([GSOC] ref-filter: add %(rest) atom)

Hmph, does anybody have 5a5b5f78?

The way to deal with this and avoid resending the same patches
(assuming that this is not a 9-patch series, but only 5 of them are
new) is to rebase your topic on 723bc66d (ref-filter: add %(rest)
atom, 2021-06-09), which will allow you to discard the 4 earlier
patches, and force push, with base set to 723bc66d, I think, but I
am not a GGG user, so there may need an extra step or two on top of
that.

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v2 5/9] [GSOC] ref-filter: teach get_object() return useful value
  2021-06-15 13:29   ` [PATCH v2 5/9] [GSOC] ref-filter: teach get_object() return useful value ZheNing Hu via GitGitGadget
@ 2021-06-16  7:36     ` Junio C Hamano
  2021-06-17  7:23       ` ZheNing Hu
  0 siblings, 1 reply; 121+ messages in thread
From: Junio C Hamano @ 2021-06-16  7:36 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget
  Cc: git, Christian Couder, Hariom Verma, Bagas Sanjaya, Jeff King,
	ZheNing Hu

"ZheNing Hu via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: ZheNing Hu <adlternative@gmail.com>
>
> Let `populate_value()`, `get_ref_atom_value()` and
> `format_ref_array_item()` get the return value of `get_object()`
> correctly.

The "get" the value correctly, I think.  What you are teaching them
is to pass the return value from get_object() through the callchain
to their callers.

The readers will be helped if you say what kind of errors
get_object() wants to tell its callers, not just "-1" is for error,
which is what populate_value() assumes to be sufficient.  In other
words, which non-zero returns from get_object() are interesting and
why?

> @@ -1997,9 +1997,11 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
>  static int get_ref_atom_value(struct ref_array_item *ref, int atom,
>  			      struct atom_value **v, struct strbuf *err)
>  {
> +	int ret = 0;
> +
>  	if (!ref->value) {
> -		if (populate_value(ref, err))
> -			return -1;
> +		if ((ret = populate_value(ref, err)))
> +			return ret;

The new variable only needs to be in this scope, and does not have
to be shown to the entire function.

> @@ -2573,6 +2575,7 @@ int format_ref_array_item(struct ref_array_item *info,
>  {
>  	const char *cp, *sp, *ep;
>  	struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
> +	int ret = 0;

This is dubious...

>  	state.quote_style = format->quote_style;
>  	push_stack_element(&state.stack);
> @@ -2585,10 +2588,10 @@ int format_ref_array_item(struct ref_array_item *info,
>  		if (cp < sp)
>  			append_literal(cp, sp, &state);
>  		pos = parse_ref_filter_atom(format, sp + 2, ep, error_buf);
> -		if (pos < 0 || get_ref_atom_value(info, pos, &atomv, error_buf) ||
> +		if (pos < 0 || (ret = get_ref_atom_value(info, pos, &atomv, error_buf)) ||

Here, if "ret" gets assigned any non-zero value, the condition is
satisfied, and ...

>  		    atomv->handler(atomv, &state, error_buf)) {
>  			pop_stack_element(&state.stack);
> -			return -1;
> +			return ret ? ret : -1;

... the control flow will leave this function.  Therefore, ...

>  		}
>  	}
>  	if (*cp) {
> @@ -2610,7 +2613,7 @@ int format_ref_array_item(struct ref_array_item *info,
>  	}
>  	strbuf_addbuf(final_buf, &state.stack->output);
>  	pop_stack_element(&state.stack);
> -	return 0;
> +	return ret;

... at this point, "ret" can never be anything other than zero.  Am
I misreading the patch?

If I am not misreading the patch, then "ret" does not have to be
globally visible in this function---it can have the same scope as
"pos".

>  }
>  
>  void pretty_print_ref(const char *name, const struct object_id *oid,

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v2 6/9] [GSOC] ref-filter: introduce free_array_item_internal() function
  2021-06-15 13:29   ` [PATCH v2 6/9] [GSOC] ref-filter: introduce free_array_item_internal() function ZheNing Hu via GitGitGadget
@ 2021-06-16  7:49     ` Junio C Hamano
  2021-06-17  8:03       ` ZheNing Hu
  0 siblings, 1 reply; 121+ messages in thread
From: Junio C Hamano @ 2021-06-16  7:49 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget
  Cc: git, Christian Couder, Hariom Verma, Bagas Sanjaya, Jeff King,
	ZheNing Hu

"ZheNing Hu via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: ZheNing Hu <adlternative@gmail.com>
>
> Introduce free_array_item_internal() for freeing ref_array_item value.
> It will be called internally by free_array_item(), and it will help
> `cat-file --batch` free ref_array_item's memory later.

As a file local static function, the horrible name free_array_item()
was tolerable.  But before exposing a name like that to the outside
world, think twice if that is specific enough, and it is not.  There
are 47 different kinds of "array"s we use in the system, but this
new helper function only works with ref_array_item and not on items
in any other kinds of arrays.

> -/*  Free memory allocated for a ref_array_item */
> -static void free_array_item(struct ref_array_item *item)
> +void free_array_item_internal(struct ref_array_item *item)
>  {

And "internal" is a horrible name to have as an external name.  You
probably can come up with a more appropriate name when you imagine
yourself explaining to somebody who is relatively new to this part
of the codebase what the difference between free_array_item() and
this new helper is, where the difference comes from, why the symref
member (and no other member) is so special, etc.

I _think_ what is special is not the .symref but is the .value
field, IOW, you are trying to come up with an interface to free the
value part of ref_array_item without touching other things.  But it
is not helpful at all to readers if you do not explain why you want
to do so.  Why is the .value member so special?  The ability to
clear only the .value member without touching other members is useful
because ...?

In any case, assuming that you'd establish why the .value member is
so special to deserve an externally callable function, when external
callers do not have to be able to free the item as a whole (i.e.
free_array_item() is still file-scope static), in the proposed log
message in an updated patch, I would imagine that 

    free_ref_array_item_value()

would be a more suitable name than the _internal thing.  When it
happens, you might want to rename the static one to
free_ref_array_item() to match, even if it does not have external
callers.

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic
  2021-06-16  7:29   ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic Junio C Hamano
@ 2021-06-17  6:07     ` ZheNing Hu
  0 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu @ 2021-06-17  6:07 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: ZheNing Hu via GitGitGadget, Git List, Christian Couder,
	Hariom Verma, Bagas Sanjaya, Jeff King

Junio C Hamano <gitster@pobox.com> 于2021年6月16日周三 下午3:29写道:
>
> "ZheNing Hu via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
> > This patch series make cat-file reuse ref-filter logic, which based on
> > 5a5b5f78 ([GSOC] ref-filter: add %(rest) atom)
>
> Hmph, does anybody have 5a5b5f78?
>
> The way to deal with this and avoid resending the same patches
> (assuming that this is not a 9-patch series, but only 5 of them are
> new) is to rebase your topic on 723bc66d (ref-filter: add %(rest)
> atom, 2021-06-09), which will allow you to discard the 4 earlier
> patches, and force push, with base set to 723bc66d, I think, but I
> am not a GGG user, so there may need an extra step or two on top of
> that.

Oh, 5a5b5f78 is in gitgitgadget.git and 723bc66d in git.git, their commit
messages are different. Later I will rebase my new patch to zh/xxx.

Thanks.
--
ZheNing Hu

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 1/8] [GSOC] ref-filter: add obj-type check in grab contents
  2021-06-12 11:14 ` [PATCH 1/8] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
@ 2021-06-17  7:04   ` Ævar Arnfjörð Bjarmason
  2021-06-17  7:28     ` Junio C Hamano
  0 siblings, 1 reply; 121+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-17  7:04 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget
  Cc: git, Junio C Hamano, Christian Couder, Hariom Verma,
	Bagas Sanjaya, Jeff King, ZheNing Hu


On Sat, Jun 12 2021, ZheNing Hu via GitGitGadget wrote:

> From: ZheNing Hu <adlternative@gmail.com>
>
> Only tag and commit objects use `grab_sub_body_contents()` to grab
> object contents in the current codebase.  We want to teach the
> function to also handle blobs and trees to get their raw data,
> without parsing a blob (whose contents looks like a commit or a tag)
> incorrectly as a commit or a tag.
>
> Skip the block of code that is specific to handling commits and tags
> early when the given object is of a wrong type to help later
> addition to handle other types of objects in this function.
>
> Mentored-by: Christian Couder <christian.couder@gmail.com>
> Mentored-by: Hariom Verma <hariom18599@gmail.com>
> Helped-by: Junio C Hamano <gitster@pobox.com>
> Signed-off-by: ZheNing Hu <adlternative@gmail.com>
> ---
>  ref-filter.c | 24 +++++++++++++++---------
>  1 file changed, 15 insertions(+), 9 deletions(-)
>
> diff --git a/ref-filter.c b/ref-filter.c
> index 4db0e40ff4c6..5cee6512fbaf 100644
> --- a/ref-filter.c
> +++ b/ref-filter.c
> @@ -1356,11 +1356,12 @@ static void append_lines(struct strbuf *out, const char *buf, unsigned long size
>  }
>  
>  /* See grab_values */
> -static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
> +static void grab_sub_body_contents(struct atom_value *val, int deref, struct expand_data *data)
>  {
>  	int i;
>  	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
>  	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
> +	void *buf = data->content;
>  
>  	for (i = 0; i < used_atom_cnt; i++) {
>  		struct used_atom *atom = &used_atom[i];
> @@ -1371,10 +1372,13 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
>  			continue;
>  		if (deref)
>  			name++;
> -		if (strcmp(name, "body") &&
> -		    !starts_with(name, "subject") &&
> -		    !starts_with(name, "trailers") &&
> -		    !starts_with(name, "contents"))
> +
> +		if ((data->type != OBJ_TAG &&
> +		     data->type != OBJ_COMMIT) ||
> +		    (strcmp(name, "body") &&
> +		     !starts_with(name, "subject") &&
> +		     !starts_with(name, "trailers") &&
> +		     !starts_with(name, "contents")))

We have 4 "real" object types, commit, tree, blob, tag. Do you really
mean "not tag or commit" here, don't you mean "is tree or blob" instead?
I.e. do we really want to pass OBJ_NONE etc. here?

>  			continue;
>  		if (!subpos)
>  			find_subpos(buf,
> @@ -1438,17 +1442,19 @@ static void fill_missing_values(struct atom_value *val)
>   * pointed at by the ref itself; otherwise it is the object the
>   * ref (which is a tag) refers to.
>   */
> -static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf)
> +static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
>  {
> +	void *buf = data->content;
> +
>  	switch (obj->type) {
>  	case OBJ_TAG:
>  		grab_tag_values(val, deref, obj);
> -		grab_sub_body_contents(val, deref, buf);
> +		grab_sub_body_contents(val, deref, data);
>  		grab_person("tagger", val, deref, buf);
>  		break;
>  	case OBJ_COMMIT:
>  		grab_commit_values(val, deref, obj);
> -		grab_sub_body_contents(val, deref, buf);
> +		grab_sub_body_contents(val, deref, data);
>  		grab_person("author", val, deref, buf);
>  		grab_person("committer", val, deref, buf);
>  		break;
> @@ -1678,7 +1684,7 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
>  			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
>  					       oid_to_hex(&oi->oid), ref->refname);
>  		}
> -		grab_values(ref->value, deref, *obj, oi->content);
> +		grab_values(ref->value, deref, *obj, oi);
>  	}
>  
>  	grab_common_values(ref->value, deref, oi);


^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 2/8] [GSOC] ref-filter: add %(raw) atom
  2021-06-12 11:14 ` [PATCH 2/8] [GSOC] ref-filter: add %(raw) atom ZheNing Hu via GitGitGadget
@ 2021-06-17  7:10   ` Ævar Arnfjörð Bjarmason
  2021-06-17  7:34     ` Junio C Hamano
  2021-06-17  9:22     ` ZheNing Hu
  0 siblings, 2 replies; 121+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-17  7:10 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget
  Cc: git, Junio C Hamano, Christian Couder, Hariom Verma,
	Bagas Sanjaya, Jeff King, ZheNing Hu


On Sat, Jun 12 2021, ZheNing Hu via GitGitGadget wrote:

> From: ZheNing Hu <adlternative@gmail.com>
>
> Add new formatting option `%(raw)`, which will print the raw
> object data without any changes. It will help further to migrate
> all cat-file formatting logic from cat-file to ref-filter.

Nice goal and feature to have.

> The raw data of blob, tree objects may contain '\0', but most of
> the logic in `ref-filter` depends on the output of the atom being
> text (specifically, no embedded NULs in it).
>
> E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
> add the data to the buffer. The raw data of a tree object is
> `100644 one\0...`, only the `100644 one` will be added to the buffer,
> which is incorrect.
>
> Therefore, add a new member in `struct atom_value`: `s_size`, which
> can record raw object size, it can help us add raw object data to
> the buffer or compare two buffers which contain raw object data.

Most of the functions that deal with this already use a strbuf in some
way, before we had a const char *, now there's a size_t to go along with
it, why not simply use a strbuf in the struct for the data? You'll then
get the size and \0 handling for free, and any functions to deal with
conversion can stick to the strbuf API, there seems to be a lot of back
and forth now.

> Beyond, `--format=%(raw)` cannot be used with `--python`, `--shell`,
> `--tcl`, `--perl` because if our binary raw data is passed to a variable
> in the host language, the host language may not support arbitrary binary
> data in the variables of its string type.

Perl at least deals with that just fine, and to the extent that it
doesn't any new problems here would have nothing to do with \0 being in
the data. Perl doesn't have a notion of "binary has \0 in it", it always
supports \0, it has a notion of "is it utf-8 or not?", so any encoding
problems wouldn't be new. I'd think that the same would be true of
Python, but I'm not sure.


> +test_expect_success 'basic atom: refs/tags/testtag *raw' '
> +	git cat-file commit refs/tags/testtag^{} >expected &&
> +	git for-each-ref --format="%(*raw)" refs/tags/testtag >actual &&
> +	sanitize_pgp <expected >expected.clean &&
> +	sanitize_pgp <actual >actual.clean &&
> +	echo "" >>expected.clean &&

Just "echo" will do, ditto for the rest. Also odd to go back and forth
between populating expected.clean & actual.clean.


> +test_expect_success 'set up refs pointing to binary blob' '
> +	printf "a\0b\0c" >blob1 &&
> +	printf "a\0c\0b" >blob2 &&
> +	printf "\0a\0b\0c" >blob3 &&
> +	printf "abc" >blob4 &&
> +	printf "\0 \0 \0 " >blob5 &&
> +	printf "\0 \0a\0 " >blob6 &&
> +	printf "  " >blob7 &&
> +	>blob8 &&
> +	git hash-object blob1 -w | xargs git update-ref refs/myblobs/blob1 &&
> +	git hash-object blob2 -w | xargs git update-ref refs/myblobs/blob2 &&
> +	git hash-object blob3 -w | xargs git update-ref refs/myblobs/blob3 &&
> +	git hash-object blob4 -w | xargs git update-ref refs/myblobs/blob4 &&
> +	git hash-object blob5 -w | xargs git update-ref refs/myblobs/blob5 &&
> +	git hash-object blob6 -w | xargs git update-ref refs/myblobs/blob6 &&
> +	git hash-object blob7 -w | xargs git update-ref refs/myblobs/blob7 &&
> +	git hash-object blob8 -w | xargs git update-ref refs/myblobs/blob8

Hrm, xargs just to avoid:

    git update-ref ... $(git hash-object) ?

> +test_expect_success '%(raw) with --python must failed' '
> +	test_must_fail git for-each-ref --format="%(raw)" --python
> +'
> +
> +test_expect_success '%(raw) with --tcl must failed' '
> +	test_must_fail git for-each-ref --format="%(raw)" --tcl
> +'
> +
> +test_expect_success '%(raw) with --perl must failed' '
> +	test_must_fail git for-each-ref --format="%(raw)" --perl
> +'
> +
> +test_expect_success '%(raw) with --shell must failed' '
> +	test_must_fail git for-each-ref --format="%(raw)" --shell
> +'
> +
> +test_expect_success '%(raw) with --shell and --sort=raw must failed' '
> +	test_must_fail git for-each-ref --format="%(raw)" --sort=raw --shell
> +'

s/must failed/must fail/, but see question above about encoding in these
languages...

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 7/8] [GSOC] cat-file: reuse err buf in batch_objet_write()
  2021-06-12 11:14 ` [PATCH 7/8] [GSOC] cat-file: reuse err buf in batch_objet_write() ZheNing Hu via GitGitGadget
@ 2021-06-17  7:16   ` Ævar Arnfjörð Bjarmason
  2021-06-17  8:05     ` ZheNing Hu
  0 siblings, 1 reply; 121+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-17  7:16 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget
  Cc: git, Junio C Hamano, Christian Couder, Hariom Verma,
	Bagas Sanjaya, Jeff King, ZheNing Hu




On Sat, Jun 12 2021, ZheNing Hu via GitGitGadget wrote:

Subject: s/objet/object/

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 8/8] [GSOC] cat-file: re-implement --textconv, --filters options
  2021-06-12 11:14 ` [PATCH 8/8] [GSOC] cat-file: re-implement --textconv, --filters options ZheNing Hu via GitGitGadget
@ 2021-06-17  7:18   ` Ævar Arnfjörð Bjarmason
  2021-06-17  9:53     ` ZheNing Hu
  0 siblings, 1 reply; 121+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-17  7:18 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget
  Cc: git, Junio C Hamano, Christian Couder, Hariom Verma,
	Bagas Sanjaya, Jeff King, ZheNing Hu


On Sat, Jun 12 2021, ZheNing Hu via GitGitGadget wrote:

> From: ZheNing Hu <adlternative@gmail.com>

>  	opt->format.format = format.buf;
> +	if (opt->cmdmode == 'c')
> +		opt->format.use_textconv = 1;
> +	if (opt->cmdmode == 'w')
> +		opt->format.use_filters = 1;
> +


Style nit: if/if -> if/else if, both can't be true.

> +		/* get the type and size */
> +		if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
> +					OBJECT_INFO_LOOKUP_REPLACE))
> +			return strbuf_addf_ret(err, 1, _("%s missing"),
> +					       oid_to_hex(&oi->oid));
> +
> +		oi->info.sizep = NULL;
> +		oi->info.typep = NULL;
> +		oi->info.contentp = temp_contentp;

Here we have a call function and error if bad, then proceed to populate
stuff (good)...

> +
> +		if (use_textconv) {
> +			act_oi = *oi;
> +
> +			if(!ref->rest)
> +				return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
> +						       oid_to_hex(&act_oi.oid));
> +			if (act_oi.type == OBJ_BLOB) {
> +				if (textconv_object(the_repository,
> +						    ref->rest, 0100644, &act_oi.oid,
> +						    1, (char **)(&act_oi.content), &act_oi.size)) {
> +					actual_oi = &act_oi;
> +					goto success;
> +				}
> +			}
> +		}


Maybe change the if (A) { if (B) {} ) to if (A && B) {} ?

>  	}
>  	if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
>  				     OBJECT_INFO_LOOKUP_REPLACE))
> @@ -1748,19 +1786,43 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
>  		BUG("Object size is less than zero.");
>  
>  	if (oi->info.contentp) {
> -		*obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten);
> +		if (use_filters) {
> +			if(!ref->rest)
> +				return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
> +						       oid_to_hex(&oi->oid));
> +			if (oi->type == OBJ_BLOB) {
> +				struct strbuf strbuf = STRBUF_INIT;
> +				struct checkout_metadata meta;
> +				act_oi = *oi;
> +
> +				init_checkout_metadata(&meta, NULL, NULL, &act_oi.oid);
> +				if (convert_to_working_tree(&the_index, ref->rest, act_oi.content, act_oi.size, &strbuf, &meta)) {
> +					act_oi.size = strbuf.len;
> +					act_oi.content = strbuf_detach(&strbuf, NULL);
> +					actual_oi = &act_oi;
> +				} else {
> +					die("could not convert '%s' %s",
> +					    oid_to_hex(&oi->oid), ref->rest);
> +				}

... but here instead of "if (!x) { bad } do stuff" we have if (x) {do
stuff} else { bad }". Better to get the die out of the way, and avoid
the indentation on the "do stuff" IMO.


> -#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }
> +#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, 0, 0, -1 }

Not a new problem, but an earlier cleanup to simply change this to
designated initializers would be welcome, see recent work of mine in
fsck.h for an example.

I.e. we keep churning on changing this *_INIT just to populate the one
-1 field at the end, can also be simply:

    #define FOO_INIT { .that_field = 1 }


^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 5/8] [GSOC] ref-filter: teach get_object() return useful value
  2021-06-12 11:14 ` [PATCH 5/8] [GSOC] ref-filter: teach get_object() return useful value ZheNing Hu via GitGitGadget
  2021-06-12 20:09   ` Christian Couder
@ 2021-06-17  7:22   ` Ævar Arnfjörð Bjarmason
  2021-06-17 10:01     ` ZheNing Hu
  1 sibling, 1 reply; 121+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-17  7:22 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget
  Cc: git, Junio C Hamano, Christian Couder, Hariom Verma,
	Bagas Sanjaya, Jeff King, ZheNing Hu


On Sat, Jun 12 2021, ZheNing Hu via GitGitGadget wrote:

> From: ZheNing Hu <adlternative@gmail.com>
>
> Let `populate_value()`, `get_ref_atom_value()` and
> `format_ref_array_item()` get the return value of `get_value()`
> correctly. This can help us later let `cat-file --batch` get the
> correct error message and return value of `get_value()`.
>
> Mentored-by: Christian Couder <christian.couder@gmail.com>
> Mentored-by: Hariom Verma <hariom18599@gmail.com>
> Signed-off-by: ZheNing Hu <adlternative@gmail.com>
> ---
>  ref-filter.c | 19 +++++++++++--------
>  1 file changed, 11 insertions(+), 8 deletions(-)
>
> diff --git a/ref-filter.c b/ref-filter.c
> index 8868cf98f090..420c0bf9384f 100644
> --- a/ref-filter.c
> +++ b/ref-filter.c
> @@ -1808,7 +1808,7 @@ static char *get_worktree_path(const struct used_atom *atom, const struct ref_ar
>  static int populate_value(struct ref_array_item *ref, struct strbuf *err)
>  {
>  	struct object *obj;
> -	int i;
> +	int i, ret = 0;

The usual style is to not bunch up variables based on type, but only if
they're related, i.e. we'd do:

    if i, j; /* proceed to use i and j in two for-loops */

But:

    int i; /* for the for-loop */
    int ret = 0; /* for our return value */

(Without the comments)

>  	struct object_info empty = OBJECT_INFO_INIT;
>  
>  	CALLOC_ARRAY(ref->value, used_atom_cnt);
> @@ -1965,8 +1965,8 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
>  
>  
>  	oi.oid = ref->objectname;
> -	if (get_object(ref, 0, &obj, &oi, err))
> -		return -1;
> +	if ((ret = get_object(ref, 0, &obj, &oi, err)))
> +		return ret;

Maybe more personal style, I'd just write this as:

    ret = x();
    if (!ret)
        return ret;

Makes it easier to read and balance parens in your head for the common
case...

> @@ -2585,10 +2588,10 @@ int format_ref_array_item(struct ref_array_item *info,
>  		if (cp < sp)
>  			append_literal(cp, sp, &state);
>  		pos = parse_ref_filter_atom(format, sp + 2, ep, error_buf);
> -		if (pos < 0 || get_ref_atom_value(info, pos, &atomv, error_buf) ||
> +		if (pos < 0 || (ret = get_ref_atom_value(info, pos, &atomv, error_buf)) ||
>  		    atomv->handler(atomv, &state, error_buf)) {
>  			pop_stack_element(&state.stack);

... and use that mental energy on readist stuff like this, which I'd
just leave as the inline assignment.

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v2 5/9] [GSOC] ref-filter: teach get_object() return useful value
  2021-06-16  7:36     ` Junio C Hamano
@ 2021-06-17  7:23       ` ZheNing Hu
  0 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu @ 2021-06-17  7:23 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: ZheNing Hu via GitGitGadget, Git List, Christian Couder,
	Hariom Verma, Bagas Sanjaya, Jeff King

Junio C Hamano <gitster@pobox.com> 于2021年6月16日周三 下午3:36写道:
>
> "ZheNing Hu via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
> > From: ZheNing Hu <adlternative@gmail.com>
> >
> > Let `populate_value()`, `get_ref_atom_value()` and
> > `format_ref_array_item()` get the return value of `get_object()`
> > correctly.
>
> The "get" the value correctly, I think.  What you are teaching them
> is to pass the return value from get_object() through the callchain
> to their callers.
>

Yes, this is exactly what I meant.

> The readers will be helped if you say what kind of errors
> get_object() wants to tell its callers, not just "-1" is for error,
> which is what populate_value() assumes to be sufficient.  In other
> words, which non-zero returns from get_object() are interesting and
> why?
>

As stated in 765337a, We can just print the error without exiting if the
return value of format_ref_array_item() is greater than 0. Therefore,
the current patch is to make get_object() return a value other than
-1 when an error occurs.

> > @@ -1997,9 +1997,11 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
> >  static int get_ref_atom_value(struct ref_array_item *ref, int atom,
> >                             struct atom_value **v, struct strbuf *err)
> >  {
> > +     int ret = 0;
> > +
> >       if (!ref->value) {
> > -             if (populate_value(ref, err))
> > -                     return -1;
> > +             if ((ret = populate_value(ref, err)))
> > +                     return ret;
>
> The new variable only needs to be in this scope, and does not have
> to be shown to the entire function.
>

Makes sense.

> > @@ -2573,6 +2575,7 @@ int format_ref_array_item(struct ref_array_item *info,
> >  {
> >       const char *cp, *sp, *ep;
> >       struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
> > +     int ret = 0;
>
> This is dubious...
>
> >       state.quote_style = format->quote_style;
> >       push_stack_element(&state.stack);
> > @@ -2585,10 +2588,10 @@ int format_ref_array_item(struct ref_array_item *info,
> >               if (cp < sp)
> >                       append_literal(cp, sp, &state);
> >               pos = parse_ref_filter_atom(format, sp + 2, ep, error_buf);
> > -             if (pos < 0 || get_ref_atom_value(info, pos, &atomv, error_buf) ||
> > +             if (pos < 0 || (ret = get_ref_atom_value(info, pos, &atomv, error_buf)) ||
>
> Here, if "ret" gets assigned any non-zero value, the condition is
> satisfied, and ...
>
> >                   atomv->handler(atomv, &state, error_buf)) {
> >                       pop_stack_element(&state.stack);
> > -                     return -1;
> > +                     return ret ? ret : -1;
>
> ... the control flow will leave this function.  Therefore, ...
>
> >               }
> >       }
> >       if (*cp) {
> > @@ -2610,7 +2613,7 @@ int format_ref_array_item(struct ref_array_item *info,
> >       }
> >       strbuf_addbuf(final_buf, &state.stack->output);
> >       pop_stack_element(&state.stack);
> > -     return 0;
> > +     return ret;
>
> ... at this point, "ret" can never be anything other than zero.  Am
> I misreading the patch?
>
> If I am not misreading the patch, then "ret" does not have to be
> globally visible in this function---it can have the same scope as
> "pos".
>

You are right, It is correct to only return 0 here at the moment.

Thanks.
--
ZheNing Hu

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic
  2021-06-15 13:28 ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                     ` (9 preceding siblings ...)
  2021-06-16  7:29   ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic Junio C Hamano
@ 2021-06-17  7:26   ` Ævar Arnfjörð Bjarmason
  2021-06-17 10:02     ` ZheNing Hu
  2021-06-19  7:02   ` [PATCH v3 00/10] " ZheNing Hu via GitGitGadget
  11 siblings, 1 reply; 121+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-17  7:26 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget
  Cc: git, Junio C Hamano, Christian Couder, Hariom Verma,
	Bagas Sanjaya, Jeff King, ZheNing Hu


On Tue, Jun 15 2021, ZheNing Hu via GitGitGadget wrote:

> This patch series make cat-file reuse ref-filter logic, which based on
> 5a5b5f78 ([GSOC] ref-filter: add %(rest) atom)
>
> Change from last version:
>
>  1. Use free_array_item_internal() to solve the memory leak problem.
>  2. Change commit message of ([GSOC] ref-filter: teach get_object() return
>     useful value).

I left some comments, but saw after the fact that I'd replied to the v1
E-Mails by accident, but anyway, the comments were all on things that
are also in v2, so it worked out in the end. Sorry about the confusion.

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 1/8] [GSOC] ref-filter: add obj-type check in grab contents
  2021-06-17  7:04   ` Ævar Arnfjörð Bjarmason
@ 2021-06-17  7:28     ` Junio C Hamano
  0 siblings, 0 replies; 121+ messages in thread
From: Junio C Hamano @ 2021-06-17  7:28 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: ZheNing Hu via GitGitGadget, git, Christian Couder, Hariom Verma,
	Bagas Sanjaya, Jeff King, ZheNing Hu

Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

>> -		if (strcmp(name, "body") &&
>> -		    !starts_with(name, "subject") &&
>> -		    !starts_with(name, "trailers") &&
>> -		    !starts_with(name, "contents"))
>> +
>> +		if ((data->type != OBJ_TAG &&
>> +		     data->type != OBJ_COMMIT) ||
>> +		    (strcmp(name, "body") &&
>> +		     !starts_with(name, "subject") &&
>> +		     !starts_with(name, "trailers") &&
>> +		     !starts_with(name, "contents")))
>
> We have 4 "real" object types, commit, tree, blob, tag. Do you really
> mean "not tag or commit" here, don't you mean "is tree or blob" instead?
> I.e. do we really want to pass OBJ_NONE etc. here?
>
>>  			continue;

If somebody throws OBJ_NONE at us by mistake, we do not want to
handle such an object and try to extract the subject member from it
anyway, no?

The intent of the code here, before the patch, is that "what we do
after the control flow passes this point is about the body, subject,
trailers, and contents request, so everybody else should go to the
next iteration".  The caller used to give us an object compatible
with these four types of requests, now the caller may throw others,
hence "by the way, we know these four kinds of requests make sense
only for tags and commits, so everybody else should go to the next
iteration, too" would be a natural thing to add.  So in that sense,
I prefer it over "we know these four types of requests do not make
sense for blobs and trees".

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 2/8] [GSOC] ref-filter: add %(raw) atom
  2021-06-17  7:10   ` Ævar Arnfjörð Bjarmason
@ 2021-06-17  7:34     ` Junio C Hamano
  2021-06-17  9:22     ` ZheNing Hu
  1 sibling, 0 replies; 121+ messages in thread
From: Junio C Hamano @ 2021-06-17  7:34 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: ZheNing Hu via GitGitGadget, git, Christian Couder, Hariom Verma,
	Bagas Sanjaya, Jeff King, ZheNing Hu

Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

>> Beyond, `--format=%(raw)` cannot be used with `--python`, `--shell`,
>> `--tcl`, `--perl` because if our binary raw data is passed to a variable
>> in the host language, the host language may not support arbitrary binary
>> data in the variables of its string type.
>
> Perl at least deals with that just fine, and to the extent that it
> doesn't any new problems here would have nothing to do with \0 being in
> the data. Perl doesn't have a notion of "binary has \0 in it", it always
> supports \0, it has a notion of "is it utf-8 or not?", so any encoding
> problems wouldn't be new. I'd think that the same would be true of
> Python, but I'm not sure.

During an earlier iteration long time ago, as we knew Perl is
capable of handling sequence of bytes including NUL, it was decided
to start more strict by rejecting binary for all languages, which
can later be loosened, to limit the scope of the initial
implementation.

>> +	git hash-object blob1 -w | xargs git update-ref refs/myblobs/blob1 &&
>> +	git hash-object blob2 -w | xargs git update-ref refs/myblobs/blob2 &&
>> +	git hash-object blob3 -w | xargs git update-ref refs/myblobs/blob3 &&
>> +	git hash-object blob4 -w | xargs git update-ref refs/myblobs/blob4 &&
>> +	git hash-object blob5 -w | xargs git update-ref refs/myblobs/blob5 &&
>> +	git hash-object blob6 -w | xargs git update-ref refs/myblobs/blob6 &&
>> +	git hash-object blob7 -w | xargs git update-ref refs/myblobs/blob7 &&
>> +	git hash-object blob8 -w | xargs git update-ref refs/myblobs/blob8
>
> Hrm, xargs just to avoid:
>
>     git update-ref ... $(git hash-object) ?

That's horrible.  Thanks for noticing.

We'd want to catch segfaults from both hash-object and update-ref.
One way to do so may be

	O=$(git hash-object -w blob1) &&
	git update-ref refs/myblobs/blob1 "$O"

Thanks for a review.

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v2 6/9] [GSOC] ref-filter: introduce free_array_item_internal() function
  2021-06-16  7:49     ` Junio C Hamano
@ 2021-06-17  8:03       ` ZheNing Hu
  0 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu @ 2021-06-17  8:03 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: ZheNing Hu via GitGitGadget, Git List, Christian Couder,
	Hariom Verma, Bagas Sanjaya, Jeff King

Junio C Hamano <gitster@pobox.com> 于2021年6月16日周三 下午3:49写道:
>
> "ZheNing Hu via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
> > From: ZheNing Hu <adlternative@gmail.com>
> >
> > Introduce free_array_item_internal() for freeing ref_array_item value.
> > It will be called internally by free_array_item(), and it will help
> > `cat-file --batch` free ref_array_item's memory later.
>
> As a file local static function, the horrible name free_array_item()
> was tolerable.  But before exposing a name like that to the outside
> world, think twice if that is specific enough, and it is not.  There
> are 47 different kinds of "array"s we use in the system, but this
> new helper function only works with ref_array_item and not on items
> in any other kinds of arrays.
>

Well, this is indeed ignored by me.

So free_array_item() --> free_ref_array_item() and free_array_item()
--> free_ref_array_item().

> > -/*  Free memory allocated for a ref_array_item */
> > -static void free_array_item(struct ref_array_item *item)
> > +void free_array_item_internal(struct ref_array_item *item)
> >  {
>
> And "internal" is a horrible name to have as an external name.  You
> probably can come up with a more appropriate name when you imagine
> yourself explaining to somebody who is relatively new to this part
> of the codebase what the difference between free_array_item() and
> this new helper is, where the difference comes from, why the symref
> member (and no other member) is so special, etc.
>

Yeah, "internal" is an incorrect naming. The main purpose here is
to only free a ref_array_item's value.

> I _think_ what is special is not the .symref but is the .value
> field, IOW, you are trying to come up with an interface to free the
> value part of ref_array_item without touching other things.  But it
> is not helpful at all to readers if you do not explain why you want
> to do so.  Why is the .value member so special?  The ability to
> clear only the .value member without touching other members is useful
> because ...?
>

Because batch_object_write() use a ref_array_item which is allocated on the
stack, and it's member symref is not used at all. ref_array_item's symref and
itself do not need free. The original free_array_item() will free all
dynamically
allocated content in ref_array_item. It cannot meet our requirements
at this time:
only free the ref_array_item's value.

> In any case, assuming that you'd establish why the .value member is
> so special to deserve an externally callable function, when external
> callers do not have to be able to free the item as a whole (i.e.
> free_array_item() is still file-scope static), in the proposed log
> message in an updated patch, I would imagine that
>
>     free_ref_array_item_value()
>
> would be a more suitable name than the _internal thing.  When it
> happens, you might want to rename the static one to
> free_ref_array_item() to match, even if it does not have external
> callers.

Make sence.

--
ZheNing Hu

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 7/8] [GSOC] cat-file: reuse err buf in batch_objet_write()
  2021-06-17  7:16   ` Ævar Arnfjörð Bjarmason
@ 2021-06-17  8:05     ` ZheNing Hu
  0 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu @ 2021-06-17  8:05 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: ZheNing Hu via GitGitGadget, Git List, Junio C Hamano,
	Christian Couder, Hariom Verma, Bagas Sanjaya, Jeff King

Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年6月17日周四 下午3:17写道:
>
>
>
>
> On Sat, Jun 12 2021, ZheNing Hu via GitGitGadget wrote:
>
> Subject: s/objet/object/

OK.

--
ZheNing Hu

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 2/8] [GSOC] ref-filter: add %(raw) atom
  2021-06-17  7:10   ` Ævar Arnfjörð Bjarmason
  2021-06-17  7:34     ` Junio C Hamano
@ 2021-06-17  9:22     ` ZheNing Hu
  2021-06-17 14:37       ` Ævar Arnfjörð Bjarmason
  1 sibling, 1 reply; 121+ messages in thread
From: ZheNing Hu @ 2021-06-17  9:22 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: ZheNing Hu via GitGitGadget, Git List, Junio C Hamano,
	Christian Couder, Hariom Verma, Bagas Sanjaya, Jeff King

Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年6月17日周四 下午3:16写道:
>
> > The raw data of blob, tree objects may contain '\0', but most of
> > the logic in `ref-filter` depends on the output of the atom being
> > text (specifically, no embedded NULs in it).
> >
> > E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
> > add the data to the buffer. The raw data of a tree object is
> > `100644 one\0...`, only the `100644 one` will be added to the buffer,
> > which is incorrect.
> >
> > Therefore, add a new member in `struct atom_value`: `s_size`, which
> > can record raw object size, it can help us add raw object data to
> > the buffer or compare two buffers which contain raw object data.
>
> Most of the functions that deal with this already use a strbuf in some
> way, before we had a const char *, now there's a size_t to go along with
> it, why not simply use a strbuf in the struct for the data? You'll then
> get the size and \0 handling for free, and any functions to deal with
> conversion can stick to the strbuf API, there seems to be a lot of back
> and forth now.
>

Yes, strbuf is a suitable choice when using <str,len> pair.
But if replace v->s with strbuf, the possible changes will be larger.

> > Beyond, `--format=%(raw)` cannot be used with `--python`, `--shell`,
> > `--tcl`, `--perl` because if our binary raw data is passed to a variable
> > in the host language, the host language may not support arbitrary binary
> > data in the variables of its string type.
>
> Perl at least deals with that just fine, and to the extent that it
> doesn't any new problems here would have nothing to do with \0 being in
> the data. Perl doesn't have a notion of "binary has \0 in it", it always
> supports \0, it has a notion of "is it utf-8 or not?", so any encoding
> problems wouldn't be new. I'd think that the same would be true of
> Python, but I'm not sure.
>

Not python safe. See [1].
Regarding the perl language, I support Junio's point of view: it can be
re-supported in the future.

>
> > +test_expect_success 'basic atom: refs/tags/testtag *raw' '
> > +     git cat-file commit refs/tags/testtag^{} >expected &&
> > +     git for-each-ref --format="%(*raw)" refs/tags/testtag >actual &&
> > +     sanitize_pgp <expected >expected.clean &&
> > +     sanitize_pgp <actual >actual.clean &&
> > +     echo "" >>expected.clean &&
>
> Just "echo" will do, ditto for the rest. Also odd to go back and forth
> between populating expected.clean & actual.clean.
>

Are you saying that sanitize_pgp is not needed?

>
> > +test_expect_success 'set up refs pointing to binary blob' '
> > +     printf "a\0b\0c" >blob1 &&
> > +     printf "a\0c\0b" >blob2 &&
> > +     printf "\0a\0b\0c" >blob3 &&
> > +     printf "abc" >blob4 &&
> > +     printf "\0 \0 \0 " >blob5 &&
> > +     printf "\0 \0a\0 " >blob6 &&
> > +     printf "  " >blob7 &&
> > +     >blob8 &&
> > +     git hash-object blob1 -w | xargs git update-ref refs/myblobs/blob1 &&
> > +     git hash-object blob2 -w | xargs git update-ref refs/myblobs/blob2 &&
> > +     git hash-object blob3 -w | xargs git update-ref refs/myblobs/blob3 &&
> > +     git hash-object blob4 -w | xargs git update-ref refs/myblobs/blob4 &&
> > +     git hash-object blob5 -w | xargs git update-ref refs/myblobs/blob5 &&
> > +     git hash-object blob6 -w | xargs git update-ref refs/myblobs/blob6 &&
> > +     git hash-object blob7 -w | xargs git update-ref refs/myblobs/blob7 &&
> > +     git hash-object blob8 -w | xargs git update-ref refs/myblobs/blob8
>
> Hrm, xargs just to avoid:
>
>     git update-ref ... $(git hash-object) ?
>

I didn’t think about it, just for convenience.

> > +test_expect_success '%(raw) with --python must failed' '
> > +     test_must_fail git for-each-ref --format="%(raw)" --python
> > +'
> > +
> > +test_expect_success '%(raw) with --tcl must failed' '
> > +     test_must_fail git for-each-ref --format="%(raw)" --tcl
> > +'
> > +
> > +test_expect_success '%(raw) with --perl must failed' '
> > +     test_must_fail git for-each-ref --format="%(raw)" --perl
> > +'
> > +
> > +test_expect_success '%(raw) with --shell must failed' '
> > +     test_must_fail git for-each-ref --format="%(raw)" --shell
> > +'
> > +
> > +test_expect_success '%(raw) with --shell and --sort=raw must failed' '
> > +     test_must_fail git for-each-ref --format="%(raw)" --sort=raw --shell
> > +'
>
> s/must failed/must fail/, but see question above about encoding in these
> languages...


[1]: https://lore.kernel.org/git/CAOLTT8QR_GRm4TYk0E_eazQ+unVQODc-3L+b4V5JUN5jtZR8uA@mail.gmail.com/

Thanks for a review.
--
ZheNing Hu

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 8/8] [GSOC] cat-file: re-implement --textconv, --filters options
  2021-06-17  7:18   ` Ævar Arnfjörð Bjarmason
@ 2021-06-17  9:53     ` ZheNing Hu
  0 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu @ 2021-06-17  9:53 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: ZheNing Hu via GitGitGadget, Git List, Junio C Hamano,
	Christian Couder, Hariom Verma, Bagas Sanjaya, Jeff King

Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年6月17日周四 下午3:22写道:
>
>
> On Sat, Jun 12 2021, ZheNing Hu via GitGitGadget wrote:
>
> > From: ZheNing Hu <adlternative@gmail.com>
>
> >       opt->format.format = format.buf;
> > +     if (opt->cmdmode == 'c')
> > +             opt->format.use_textconv = 1;
> > +     if (opt->cmdmode == 'w')
> > +             opt->format.use_filters = 1;
> > +
>
>
> Style nit: if/if -> if/else if, both can't be true.
>

Ok.

> > +             /* get the type and size */
> > +             if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
> > +                                     OBJECT_INFO_LOOKUP_REPLACE))
> > +                     return strbuf_addf_ret(err, 1, _("%s missing"),
> > +                                            oid_to_hex(&oi->oid));
> > +
> > +             oi->info.sizep = NULL;
> > +             oi->info.typep = NULL;
> > +             oi->info.contentp = temp_contentp;
>
> Here we have a call function and error if bad, then proceed to populate
> stuff (good)...
>
> > +
> > +             if (use_textconv) {
> > +                     act_oi = *oi;
> > +
> > +                     if(!ref->rest)
> > +                             return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
> > +                                                    oid_to_hex(&act_oi.oid));
> > +                     if (act_oi.type == OBJ_BLOB) {
> > +                             if (textconv_object(the_repository,
> > +                                                 ref->rest, 0100644, &act_oi.oid,
> > +                                                 1, (char **)(&act_oi.content), &act_oi.size)) {
> > +                                     actual_oi = &act_oi;
> > +                                     goto success;
> > +                             }
> > +                     }
> > +             }
>
>
> Maybe change the if (A) { if (B) {} ) to if (A && B) {} ?
>

Maybe it's something like this:

@@ -1762,19 +1762,17 @@ static int get_object(struct ref_array_item
*ref, int deref, struct object **obj
                oi->info.typep = NULL;
                oi->info.contentp = temp_contentp;

-               if (use_textconv) {
-                       act_oi = *oi;
+               if (use_textconv && !ref->rest)
+                       return strbuf_addf_ret(err, -1, _("missing
path for '%s'"),
+                                              oid_to_hex(&act_oi.oid));

-                       if(!ref->rest)
-                               return strbuf_addf_ret(err, -1,
_("missing path for '%s'"),
-                                                      oid_to_hex(&act_oi.oid));
-                       if (act_oi.type == OBJ_BLOB) {
-                               if (textconv_object(the_repository,
-                                                   ref->rest,
0100644, &act_oi.oid,
-                                                   1, (char
**)(&act_oi.content), &act_oi.size)) {
-                                       actual_oi = &act_oi;
-                                       goto success;
-                               }
+               if (use_textconv && oi->type == OBJ_BLOB) {
+                       act_oi = *oi;
+                       if (textconv_object(the_repository,
+                                           ref->rest, 0100644, &act_oi.oid,
+                                           1, (char
**)(&act_oi.content), &act_oi.size)) {
+                               actual_oi = &act_oi;
+                               goto success;
                        }
                }
        }

> >       }
> >       if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
> >                                    OBJECT_INFO_LOOKUP_REPLACE))
> > @@ -1748,19 +1786,43 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
> >               BUG("Object size is less than zero.");
> >
> >       if (oi->info.contentp) {
> > -             *obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten);
> > +             if (use_filters) {
> > +                     if(!ref->rest)
> > +                             return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
> > +                                                    oid_to_hex(&oi->oid));
> > +                     if (oi->type == OBJ_BLOB) {
> > +                             struct strbuf strbuf = STRBUF_INIT;
> > +                             struct checkout_metadata meta;
> > +                             act_oi = *oi;
> > +
> > +                             init_checkout_metadata(&meta, NULL, NULL, &act_oi.oid);
> > +                             if (convert_to_working_tree(&the_index, ref->rest, act_oi.content, act_oi.size, &strbuf, &meta)) {
> > +                                     act_oi.size = strbuf.len;
> > +                                     act_oi.content = strbuf_detach(&strbuf, NULL);
> > +                                     actual_oi = &act_oi;
> > +                             } else {
> > +                                     die("could not convert '%s' %s",
> > +                                         oid_to_hex(&oi->oid), ref->rest);
> > +                             }
>
> ... but here instead of "if (!x) { bad } do stuff" we have if (x) {do
> stuff} else { bad }". Better to get the die out of the way, and avoid
> the indentation on the "do stuff" IMO.
>

And this part:

@@ -1786,25 +1784,21 @@ static int get_object(struct ref_array_item
*ref, int deref, struct object **obj
                BUG("Object size is less than zero.");

        if (oi->info.contentp) {
-               if (use_filters) {
-                       if(!ref->rest)
-                               return strbuf_addf_ret(err, -1,
_("missing path for '%s'"),
-                                                      oid_to_hex(&oi->oid));
-                       if (oi->type == OBJ_BLOB) {
-                               struct strbuf strbuf = STRBUF_INIT;
-                               struct checkout_metadata meta;
-                               act_oi = *oi;
-
-                               init_checkout_metadata(&meta, NULL,
NULL, &act_oi.oid);
-                               if
(convert_to_working_tree(&the_index, ref->rest, act_oi.content,
act_oi.size, &strbuf, &meta)) {
-                                       act_oi.size = strbuf.len;
-                                       act_oi.content =
strbuf_detach(&strbuf, NULL);
-                                       actual_oi = &act_oi;
-                               } else {
-                                       die("could not convert '%s' %s",
-                                           oid_to_hex(&oi->oid), ref->rest);
-                               }
-                       }
+               if (use_filters && !ref->rest)
+                       return strbuf_addf_ret(err, -1, _("missing
path for '%s'"),
+                                               oid_to_hex(&oi->oid));
+               if (use_filters && oi->type == OBJ_BLOB) {
+                       struct strbuf strbuf = STRBUF_INIT;
+                       struct checkout_metadata meta;
+                       act_oi = *oi;
+
+                       init_checkout_metadata(&meta, NULL, NULL, &act_oi.oid);
+                       if (!convert_to_working_tree(&the_index,
ref->rest, act_oi.content, act_oi.size, &strbuf, &meta))
+                               die("could not convert '%s' %s",
+                                       oid_to_hex(&oi->oid), ref->rest);
+                       act_oi.size = strbuf.len;
+                       act_oi.content = strbuf_detach(&strbuf, NULL);
+                       actual_oi = &act_oi;
                }


>
> > -#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }
> > +#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, 0, 0, -1 }
>
> Not a new problem, but an earlier cleanup to simply change this to
> designated initializers would be welcome, see recent work of mine in
> fsck.h for an example.
>
> I.e. we keep churning on changing this *_INIT just to populate the one
> -1 field at the end, can also be simply:
>
>     #define FOO_INIT { .that_field = 1 }
>

I agree, this method is better. We don't need to care about which members
so many "0" point to.

Thanks.
--
ZheNing Hu

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 5/8] [GSOC] ref-filter: teach get_object() return useful value
  2021-06-17  7:22   ` Ævar Arnfjörð Bjarmason
@ 2021-06-17 10:01     ` ZheNing Hu
  0 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu @ 2021-06-17 10:01 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: ZheNing Hu via GitGitGadget, Git List, Junio C Hamano,
	Christian Couder, Hariom Verma, Bagas Sanjaya, Jeff King

Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年6月17日周四 下午3:25写道:
>
>
> On Sat, Jun 12 2021, ZheNing Hu via GitGitGadget wrote:
>
> > From: ZheNing Hu <adlternative@gmail.com>
> >
> > Let `populate_value()`, `get_ref_atom_value()` and
> > `format_ref_array_item()` get the return value of `get_value()`
> > correctly. This can help us later let `cat-file --batch` get the
> > correct error message and return value of `get_value()`.
> >
> > Mentored-by: Christian Couder <christian.couder@gmail.com>
> > Mentored-by: Hariom Verma <hariom18599@gmail.com>
> > Signed-off-by: ZheNing Hu <adlternative@gmail.com>
> > ---
> >  ref-filter.c | 19 +++++++++++--------
> >  1 file changed, 11 insertions(+), 8 deletions(-)
> >
> > diff --git a/ref-filter.c b/ref-filter.c
> > index 8868cf98f090..420c0bf9384f 100644
> > --- a/ref-filter.c
> > +++ b/ref-filter.c
> > @@ -1808,7 +1808,7 @@ static char *get_worktree_path(const struct used_atom *atom, const struct ref_ar
> >  static int populate_value(struct ref_array_item *ref, struct strbuf *err)
> >  {
> >       struct object *obj;
> > -     int i;
> > +     int i, ret = 0;
>
> The usual style is to not bunch up variables based on type, but only if
> they're related, i.e. we'd do:
>
>     if i, j; /* proceed to use i and j in two for-loops */
>
> But:
>
>     int i; /* for the for-loop */
>     int ret = 0; /* for our return value */
>
> (Without the comments)
>

I agree.

> >       struct object_info empty = OBJECT_INFO_INIT;
> >
> >       CALLOC_ARRAY(ref->value, used_atom_cnt);
> > @@ -1965,8 +1965,8 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
> >
> >
> >       oi.oid = ref->objectname;
> > -     if (get_object(ref, 0, &obj, &oi, err))
> > -             return -1;
> > +     if ((ret = get_object(ref, 0, &obj, &oi, err)))
> > +             return ret;
>
> Maybe more personal style, I'd just write this as:
>
>     ret = x();
>     if (!ret)
>         return ret;
>
> Makes it easier to read and balance parens in your head for the common
> case...
>

Yeah. This way it will be more readable.

> > @@ -2585,10 +2588,10 @@ int format_ref_array_item(struct ref_array_item *info,
> >               if (cp < sp)
> >                       append_literal(cp, sp, &state);
> >               pos = parse_ref_filter_atom(format, sp + 2, ep, error_buf);
> > -             if (pos < 0 || get_ref_atom_value(info, pos, &atomv, error_buf) ||
> > +             if (pos < 0 || (ret = get_ref_atom_value(info, pos, &atomv, error_buf)) ||
> >                   atomv->handler(atomv, &state, error_buf)) {
> >                       pop_stack_element(&state.stack);
>
> ... and use that mental energy on readist stuff like this, which I'd
> just leave as the inline assignment.

Indeed, inline assignment must be used in this case.

Thanks. I will pay attention to these style issues later.
--
ZheNing Hu

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic
  2021-06-17  7:26   ` Ævar Arnfjörð Bjarmason
@ 2021-06-17 10:02     ` ZheNing Hu
  0 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu @ 2021-06-17 10:02 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: ZheNing Hu via GitGitGadget, Git List, Junio C Hamano,
	Christian Couder, Hariom Verma, Bagas Sanjaya, Jeff King

Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年6月17日周四 下午3:27写道:
>
>
> On Tue, Jun 15 2021, ZheNing Hu via GitGitGadget wrote:
>
> > This patch series make cat-file reuse ref-filter logic, which based on
> > 5a5b5f78 ([GSOC] ref-filter: add %(rest) atom)
> >
> > Change from last version:
> >
> >  1. Use free_array_item_internal() to solve the memory leak problem.
> >  2. Change commit message of ([GSOC] ref-filter: teach get_object() return
> >     useful value).
>
> I left some comments, but saw after the fact that I'd replied to the v1
> E-Mails by accident, but anyway, the comments were all on things that
> are also in v2, so it worked out in the end. Sorry about the confusion.

It's okay. Your comments are very useful.

Thanks.
--
ZheNing Hu

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 2/8] [GSOC] ref-filter: add %(raw) atom
  2021-06-17  9:22     ` ZheNing Hu
@ 2021-06-17 14:37       ` Ævar Arnfjörð Bjarmason
  2021-06-17 16:14         ` ZheNing Hu
  0 siblings, 1 reply; 121+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-17 14:37 UTC (permalink / raw)
  To: ZheNing Hu
  Cc: ZheNing Hu via GitGitGadget, Git List, Junio C Hamano,
	Christian Couder, Hariom Verma, Bagas Sanjaya, Jeff King


On Thu, Jun 17 2021, ZheNing Hu wrote:

> Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年6月17日周四 下午3:16写道:
>>
>> > The raw data of blob, tree objects may contain '\0', but most of
>> > the logic in `ref-filter` depends on the output of the atom being
>> > text (specifically, no embedded NULs in it).
>> >
>> > E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
>> > add the data to the buffer. The raw data of a tree object is
>> > `100644 one\0...`, only the `100644 one` will be added to the buffer,
>> > which is incorrect.
>> >
>> > Therefore, add a new member in `struct atom_value`: `s_size`, which
>> > can record raw object size, it can help us add raw object data to
>> > the buffer or compare two buffers which contain raw object data.
>>
>> Most of the functions that deal with this already use a strbuf in some
>> way, before we had a const char *, now there's a size_t to go along with
>> it, why not simply use a strbuf in the struct for the data? You'll then
>> get the size and \0 handling for free, and any functions to deal with
>> conversion can stick to the strbuf API, there seems to be a lot of back
>> and forth now.
>>
>
> Yes, strbuf is a suitable choice when using <str,len> pair.
> But if replace v->s with strbuf, the possible changes will be larger.

I for one would like to see it done that way, those changes are usually
easy to read. Also it seems a large part of 2/8 is extra new code
because we didn't do that, e.g. getting length differently if something
is a strbuf or not, passing char*/size_t pairs to new functions etc.

>> > Beyond, `--format=%(raw)` cannot be used with `--python`, `--shell`,
>> > `--tcl`, `--perl` because if our binary raw data is passed to a variable
>> > in the host language, the host language may not support arbitrary binary
>> > data in the variables of its string type.
>>
>> Perl at least deals with that just fine, and to the extent that it
>> doesn't any new problems here would have nothing to do with \0 being in
>> the data. Perl doesn't have a notion of "binary has \0 in it", it always
>> supports \0, it has a notion of "is it utf-8 or not?", so any encoding
>> problems wouldn't be new. I'd think that the same would be true of
>> Python, but I'm not sure.
>>
>
> Not python safe. See [1].
> Regarding the perl language, I support Junio's point of view: it can be
> re-supported in the future.

Ah, I'd missed that. Anyway, if it's easy it seems you discovered that
Perl deals with it correctly, so we could just have it support this.

>>
>> > +test_expect_success 'basic atom: refs/tags/testtag *raw' '
>> > +     git cat-file commit refs/tags/testtag^{} >expected &&
>> > +     git for-each-ref --format="%(*raw)" refs/tags/testtag >actual &&
>> > +     sanitize_pgp <expected >expected.clean &&
>> > +     sanitize_pgp <actual >actual.clean &&
>> > +     echo "" >>expected.clean &&
>>
>> Just "echo" will do, ditto for the rest. Also odd to go back and forth
>> between populating expected.clean & actual.clean.
>>
>
> Are you saying that sanitize_pgp is not needed?

No that instead of:

    echo "" >x

You can do:

    echo >x

And also that going back and forth between populating different files is
confusing, i.e. this:


    echo a >x
    echo c >y
    echo b >>x

is better as:

    echo a >x
    echo b >>x
    echo c >y


>>
>> > +test_expect_success 'set up refs pointing to binary blob' '
>> > +     printf "a\0b\0c" >blob1 &&
>> > +     printf "a\0c\0b" >blob2 &&
>> > +     printf "\0a\0b\0c" >blob3 &&
>> > +     printf "abc" >blob4 &&
>> > +     printf "\0 \0 \0 " >blob5 &&
>> > +     printf "\0 \0a\0 " >blob6 &&
>> > +     printf "  " >blob7 &&
>> > +     >blob8 &&
>> > +     git hash-object blob1 -w | xargs git update-ref refs/myblobs/blob1 &&
>> > +     git hash-object blob2 -w | xargs git update-ref refs/myblobs/blob2 &&
>> > +     git hash-object blob3 -w | xargs git update-ref refs/myblobs/blob3 &&
>> > +     git hash-object blob4 -w | xargs git update-ref refs/myblobs/blob4 &&
>> > +     git hash-object blob5 -w | xargs git update-ref refs/myblobs/blob5 &&
>> > +     git hash-object blob6 -w | xargs git update-ref refs/myblobs/blob6 &&
>> > +     git hash-object blob7 -w | xargs git update-ref refs/myblobs/blob7 &&
>> > +     git hash-object blob8 -w | xargs git update-ref refs/myblobs/blob8
>>
>> Hrm, xargs just to avoid:
>>
>>     git update-ref ... $(git hash-object) ?
>>
>
> I didn’t think about it, just for convenience.

*nod*, Junio had a good suggestion.

>> > +test_expect_success '%(raw) with --python must failed' '
>> > +     test_must_fail git for-each-ref --format="%(raw)" --python
>> > +'
>> > +
>> > +test_expect_success '%(raw) with --tcl must failed' '
>> > +     test_must_fail git for-each-ref --format="%(raw)" --tcl
>> > +'
>> > +
>> > +test_expect_success '%(raw) with --perl must failed' '
>> > +     test_must_fail git for-each-ref --format="%(raw)" --perl
>> > +'
>> > +
>> > +test_expect_success '%(raw) with --shell must failed' '
>> > +     test_must_fail git for-each-ref --format="%(raw)" --shell
>> > +'
>> > +
>> > +test_expect_success '%(raw) with --shell and --sort=raw must failed' '
>> > +     test_must_fail git for-each-ref --format="%(raw)" --sort=raw --shell
>> > +'
>>
>> s/must failed/must fail/, but see question above about encoding in these
>> languages...
>
>
> [1]: https://lore.kernel.org/git/CAOLTT8QR_GRm4TYk0E_eazQ+unVQODc-3L+b4V5JUN5jtZR8uA@mail.gmail.com/
>
> Thanks for a review.


^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 2/8] [GSOC] ref-filter: add %(raw) atom
  2021-06-17 14:37       ` Ævar Arnfjörð Bjarmason
@ 2021-06-17 16:14         ` ZheNing Hu
  2021-06-18 10:49           ` Ævar Arnfjörð Bjarmason
  0 siblings, 1 reply; 121+ messages in thread
From: ZheNing Hu @ 2021-06-17 16:14 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: ZheNing Hu via GitGitGadget, Git List, Junio C Hamano,
	Christian Couder, Hariom Verma, Bagas Sanjaya, Jeff King

Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年6月17日周四 下午10:45写道:
>
>
> On Thu, Jun 17 2021, ZheNing Hu wrote:
>
> >
> > Yes, strbuf is a suitable choice when using <str,len> pair.
> > But if replace v->s with strbuf, the possible changes will be larger.
>
> I for one would like to see it done that way, those changes are usually
> easy to read. Also it seems a large part of 2/8 is extra new code
> because we didn't do that, e.g. getting length differently if something
> is a strbuf or not, passing char*/size_t pairs to new functions etc.
>

After some refactoring, I found that there are two problems:
1. There are a lot of codes like this in ref-filter to fill v->s:

v->s = show_ref(...)
v->s = copy_email(...)

It is very difficult to modify here: We know that show_ref()
or copy_email() will allocate a block of memory to v->s, but
if v->s is a strbuf, what should we do? In copy_email(), we
can pass the v->s to copy_email() and use strbuf_add()/strbuf_addstr()
instead of xstrdup() and xmemdupz(). But show_ref() will call
external functions like shorten_unambiguous_ref(), we don’t know
whether it will return us NULL or a dynamically allocated memory.
If continue to pass v->s to the inner function, it is not a feasible
method. Or we can use strbuf_attach() + strlen(), I'm not sure
this is a good method.

2. See:

-       for (i = 0; i < used_atom_cnt; i++) {
+       for (i = 0; i < used_atom_cnt; i++) {
                struct atom_value *v = &ref->value[i];
-               if (v->s == NULL && used_atom[i].source == SOURCE_NONE)
+               if (v->s.len == 0 && used_atom[i].source == SOURCE_NONE)
                        return strbuf_addf_ret(err, -1, _("missing
object %s for %s"),

oid_to_hex(&ref->objectname), ref->refname);
        }

In the case of using strbuf, I don’t know how to distinguish between an empty
strbuf and NULL. It can be easily distinguished by using c-style "const char*".

> >
> > Not python safe. See [1].
> > Regarding the perl language, I support Junio's point of view: it can be
> > re-supported in the future.
>
> Ah, I'd missed that. Anyway, if it's easy it seems you discovered that
> Perl deals with it correctly, so we could just have it support this.
>

Well, it's ok, support for perl will be put in a separate commit.

> >>
> >> > +test_expect_success 'basic atom: refs/tags/testtag *raw' '
> >> > +     git cat-file commit refs/tags/testtag^{} >expected &&
> >> > +     git for-each-ref --format="%(*raw)" refs/tags/testtag >actual &&
> >> > +     sanitize_pgp <expected >expected.clean &&
> >> > +     sanitize_pgp <actual >actual.clean &&
> >> > +     echo "" >>expected.clean &&
> >>
> >> Just "echo" will do, ditto for the rest. Also odd to go back and forth
> >> between populating expected.clean & actual.clean.
> >>
> >
> > Are you saying that sanitize_pgp is not needed?
>
> No that instead of:
>
>     echo "" >x
>
> You can do:
>
>     echo >x
>
> And also that going back and forth between populating different files is
> confusing, i.e. this:
>
>
>     echo a >x
>     echo c >y
>     echo b >>x
>
> is better as:
>
>     echo a >x
>     echo b >>x
>     echo c >y
>
>

Thanks, I get what you meant now.

> >>
> >> > +test_expect_success 'set up refs pointing to binary blob' '
> >> > +     printf "a\0b\0c" >blob1 &&
> >> > +     printf "a\0c\0b" >blob2 &&
> >> > +     printf "\0a\0b\0c" >blob3 &&
> >> > +     printf "abc" >blob4 &&
> >> > +     printf "\0 \0 \0 " >blob5 &&
> >> > +     printf "\0 \0a\0 " >blob6 &&
> >> > +     printf "  " >blob7 &&
> >> > +     >blob8 &&
> >> > +     git hash-object blob1 -w | xargs git update-ref refs/myblobs/blob1 &&
> >> > +     git hash-object blob2 -w | xargs git update-ref refs/myblobs/blob2 &&
> >> > +     git hash-object blob3 -w | xargs git update-ref refs/myblobs/blob3 &&
> >> > +     git hash-object blob4 -w | xargs git update-ref refs/myblobs/blob4 &&
> >> > +     git hash-object blob5 -w | xargs git update-ref refs/myblobs/blob5 &&
> >> > +     git hash-object blob6 -w | xargs git update-ref refs/myblobs/blob6 &&
> >> > +     git hash-object blob7 -w | xargs git update-ref refs/myblobs/blob7 &&
> >> > +     git hash-object blob8 -w | xargs git update-ref refs/myblobs/blob8
> >>
> >> Hrm, xargs just to avoid:
> >>
> >>     git update-ref ... $(git hash-object) ?
> >>
> >
> > I didn’t think about it, just for convenience.
>
> *nod*, Junio had a good suggestion.
>

ok.

Thanks.
--
ZheNing Hu

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 2/8] [GSOC] ref-filter: add %(raw) atom
  2021-06-17 16:14         ` ZheNing Hu
@ 2021-06-18 10:49           ` Ævar Arnfjörð Bjarmason
  2021-06-18 13:47             ` Christian Couder
  0 siblings, 1 reply; 121+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-18 10:49 UTC (permalink / raw)
  To: ZheNing Hu
  Cc: ZheNing Hu via GitGitGadget, Git List, Junio C Hamano,
	Christian Couder, Hariom Verma, Bagas Sanjaya, Jeff King


On Fri, Jun 18 2021, ZheNing Hu wrote:

> Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年6月17日周四 下午10:45写道:
>>
>>
>> On Thu, Jun 17 2021, ZheNing Hu wrote:
>>
>> >
>> > Yes, strbuf is a suitable choice when using <str,len> pair.
>> > But if replace v->s with strbuf, the possible changes will be larger.
>>
>> I for one would like to see it done that way, those changes are usually
>> easy to read. Also it seems a large part of 2/8 is extra new code
>> because we didn't do that, e.g. getting length differently if something
>> is a strbuf or not, passing char*/size_t pairs to new functions etc.
>>
>
> After some refactoring, I found that there are two problems:
> 1. There are a lot of codes like this in ref-filter to fill v->s:
>
> v->s = show_ref(...)
> v->s = copy_email(...)
>
> It is very difficult to modify here: We know that show_ref()
> or copy_email() will allocate a block of memory to v->s, but
> if v->s is a strbuf, what should we do? In copy_email(), we
> can pass the v->s to copy_email() and use strbuf_add()/strbuf_addstr()
> instead of xstrdup() and xmemdupz(). But show_ref() will call
> external functions like shorten_unambiguous_ref(), we don’t know
> whether it will return us NULL or a dynamically allocated memory.
> If continue to pass v->s to the inner function, it is not a feasible
> method. Or we can use strbuf_attach() + strlen(), I'm not sure
> this is a good method.
>
> 2. See:
>
> -       for (i = 0; i < used_atom_cnt; i++) {
> +       for (i = 0; i < used_atom_cnt; i++) {
>                 struct atom_value *v = &ref->value[i];
> -               if (v->s == NULL && used_atom[i].source == SOURCE_NONE)
> +               if (v->s.len == 0 && used_atom[i].source == SOURCE_NONE)
>                         return strbuf_addf_ret(err, -1, _("missing
> object %s for %s"),
>
> oid_to_hex(&ref->objectname), ref->refname);
>         }
>
> In the case of using strbuf, I don’t know how to distinguish between an empty
> strbuf and NULL. It can be easily distinguished by using c-style "const char*".

Yes, sometimes it's just too much of a hassle, looking at
shorten_unambiguous_ref() which returns a xstrdup()'d value that could
indeed be strbuf_attach'd. I haven't tried the conversion myself,
perhaps it's too much hassle.

Just a suggestion from reading your patch in isolation.


>> >
>> > Not python safe. See [1].
>> > Regarding the perl language, I support Junio's point of view: it can be
>> > re-supported in the future.
>>
>> Ah, I'd missed that. Anyway, if it's easy it seems you discovered that
>> Perl deals with it correctly, so we could just have it support this.
>>
>
> Well, it's ok, support for perl will be put in a separate commit.
>
>> >>
>> >> > +test_expect_success 'basic atom: refs/tags/testtag *raw' '
>> >> > +     git cat-file commit refs/tags/testtag^{} >expected &&
>> >> > +     git for-each-ref --format="%(*raw)" refs/tags/testtag >actual &&
>> >> > +     sanitize_pgp <expected >expected.clean &&
>> >> > +     sanitize_pgp <actual >actual.clean &&
>> >> > +     echo "" >>expected.clean &&
>> >>
>> >> Just "echo" will do, ditto for the rest. Also odd to go back and forth
>> >> between populating expected.clean & actual.clean.
>> >>
>> >
>> > Are you saying that sanitize_pgp is not needed?
>>
>> No that instead of:
>>
>>     echo "" >x
>>
>> You can do:
>>
>>     echo >x
>>
>> And also that going back and forth between populating different files is
>> confusing, i.e. this:
>>
>>
>>     echo a >x
>>     echo c >y
>>     echo b >>x
>>
>> is better as:
>>
>>     echo a >x
>>     echo b >>x
>>     echo c >y
>>
>>
>
> Thanks, I get what you meant now.
>
>> >>
>> >> > +test_expect_success 'set up refs pointing to binary blob' '
>> >> > +     printf "a\0b\0c" >blob1 &&
>> >> > +     printf "a\0c\0b" >blob2 &&
>> >> > +     printf "\0a\0b\0c" >blob3 &&
>> >> > +     printf "abc" >blob4 &&
>> >> > +     printf "\0 \0 \0 " >blob5 &&
>> >> > +     printf "\0 \0a\0 " >blob6 &&
>> >> > +     printf "  " >blob7 &&
>> >> > +     >blob8 &&
>> >> > +     git hash-object blob1 -w | xargs git update-ref refs/myblobs/blob1 &&
>> >> > +     git hash-object blob2 -w | xargs git update-ref refs/myblobs/blob2 &&
>> >> > +     git hash-object blob3 -w | xargs git update-ref refs/myblobs/blob3 &&
>> >> > +     git hash-object blob4 -w | xargs git update-ref refs/myblobs/blob4 &&
>> >> > +     git hash-object blob5 -w | xargs git update-ref refs/myblobs/blob5 &&
>> >> > +     git hash-object blob6 -w | xargs git update-ref refs/myblobs/blob6 &&
>> >> > +     git hash-object blob7 -w | xargs git update-ref refs/myblobs/blob7 &&
>> >> > +     git hash-object blob8 -w | xargs git update-ref refs/myblobs/blob8
>> >>
>> >> Hrm, xargs just to avoid:
>> >>
>> >>     git update-ref ... $(git hash-object) ?
>> >>
>> >
>> > I didn’t think about it, just for convenience.
>>
>> *nod*, Junio had a good suggestion.
>>
>
> ok.
>
> Thanks.


^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH 2/8] [GSOC] ref-filter: add %(raw) atom
  2021-06-18 10:49           ` Ævar Arnfjörð Bjarmason
@ 2021-06-18 13:47             ` Christian Couder
  0 siblings, 0 replies; 121+ messages in thread
From: Christian Couder @ 2021-06-18 13:47 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: ZheNing Hu, ZheNing Hu via GitGitGadget, Git List,
	Junio C Hamano, Hariom Verma, Bagas Sanjaya, Jeff King

On Fri, Jun 18, 2021 at 12:51 PM Ævar Arnfjörð Bjarmason
<avarab@gmail.com> wrote:

> On Fri, Jun 18 2021, ZheNing Hu wrote:

> > After some refactoring, I found that there are two problems:
> > 1. There are a lot of codes like this in ref-filter to fill v->s:
> >
> > v->s = show_ref(...)
> > v->s = copy_email(...)
> >
> > It is very difficult to modify here: We know that show_ref()
> > or copy_email() will allocate a block of memory to v->s, but
> > if v->s is a strbuf, what should we do? In copy_email(), we
> > can pass the v->s to copy_email() and use strbuf_add()/strbuf_addstr()
> > instead of xstrdup() and xmemdupz(). But show_ref() will call
> > external functions like shorten_unambiguous_ref(), we don’t know
> > whether it will return us NULL or a dynamically allocated memory.
> > If continue to pass v->s to the inner function, it is not a feasible
> > method. Or we can use strbuf_attach() + strlen(), I'm not sure
> > this is a good method.

If you resend this patch, it might be a good idea to add a short
version of the above explanations into the commit message.

[...]

> > In the case of using strbuf, I don’t know how to distinguish between an empty
> > strbuf and NULL. It can be easily distinguished by using c-style "const char*".

Maybe this could also be part of the explanation.

> Yes, sometimes it's just too much of a hassle, looking at
> shorten_unambiguous_ref() which returns a xstrdup()'d value that could
> indeed be strbuf_attach'd. I haven't tried the conversion myself,
> perhaps it's too much hassle.
>
> Just a suggestion from reading your patch in isolation.

Yeah, thanks for the review anyway!

^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v3 00/10] [GSOC][RFC] cat-file: reuse ref-filter logic
  2021-06-15 13:28 ` [PATCH v2 0/9] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                     ` (10 preceding siblings ...)
  2021-06-17  7:26   ` Ævar Arnfjörð Bjarmason
@ 2021-06-19  7:02   ` ZheNing Hu via GitGitGadget
  2021-06-19  7:02     ` [PATCH v3 01/10] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
                       ` (10 more replies)
  11 siblings, 11 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-19  7:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu

This patch series make cat-file reuse ref-filter logic. As part of the
content related to zh/ref-filter-raw-data has been modified, this patch
currently sets the base on zh/ref-filter-atom-type until
zh/ref-filter-raw-data is stable And will rebase to it.

f72ad9c~1..39a0d93 of this patch belongs to zh/ref-filter-raw-data. Change
from last version:

 1. Let --format=%(raw) re-support --perl.
 2. Correct some errors in the test.
 3. Add the reason why strbuf is not used to replace atom_value's member s
    in the commit message.

39a0d93..86ac3bc of this patch is another part. Change from last version:

 1. Modify free_array_item_internal() to free_ref_array_item_value().
 2. Change some errors of the code style.
 3. Change some commit messages.

ZheNing Hu (10):
  [GSOC] ref-filter: add obj-type check in grab contents
  [GSOC] ref-filter: add %(raw) atom
  [GSOC] ref-filter: --format=%(raw) re-support --perl
  [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  [GSOC] ref-filter: add %(rest) atom
  [GSOC] ref-filter: pass get_object() return value to their callers
  [GSOC] ref-filter: introduce free_ref_array_item_value() function
  [GSOC] cat-file: reuse ref-filter logic
  [GSOC] cat-file: reuse err buf in batch_object_write()
  [GSOC] cat-file: re-implement --textconv, --filters options

 Documentation/git-cat-file.txt     |   6 +
 Documentation/git-for-each-ref.txt |   9 +
 builtin/cat-file.c                 | 267 ++++++-----------------
 builtin/tag.c                      |   2 +-
 quote.c                            |  17 ++
 quote.h                            |   1 +
 ref-filter.c                       | 331 +++++++++++++++++++++++------
 ref-filter.h                       |  14 +-
 t/t1006-cat-file.sh                | 252 ++++++++++++++++++++++
 t/t3203-branch-output.sh           |   4 +
 t/t6300-for-each-ref.sh            | 235 ++++++++++++++++++++
 t/t6301-for-each-ref-errors.sh     |   2 +-
 t/t7004-tag.sh                     |   4 +
 t/t7030-verify-tag.sh              |   4 +
 14 files changed, 872 insertions(+), 276 deletions(-)


base-commit: 1197f1a46360d3ae96bd9c15908a3a6f8e562207
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-980%2Fadlternative%2Fcat-file-batch-refactor-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-980/adlternative/cat-file-batch-refactor-v3
Pull-Request: https://github.com/gitgitgadget/git/pull/980

Range-diff vs v2:

  1:  48d256db5c34 =  1:  f72ad9cc5e8b [GSOC] ref-filter: add obj-type check in grab contents
  2:  abee6a03becb !  2:  ab497d66c116 [GSOC] ref-filter: add %(raw) atom
     @@ Commit message
          `100644 one\0...`, only the `100644 one` will be added to the buffer,
          which is incorrect.
      
     -    Therefore, add a new member in `struct atom_value`: `s_size`, which
     -    can record raw object size, it can help us add raw object data to
     -    the buffer or compare two buffers which contain raw object data.
     +    Therefore, we need to find a way to record the length of the
     +    atom_value's member `s`. Although strbuf can already record the
     +    string and its length, if we want to replace the type of atom_value's
     +    member `s` with strbuf, many places in ref-filter that are filled
     +    with dynamically allocated mermory in `v->s` are not easy to replace.
     +    At the same time, we need to check if `v->s == NULL` in
     +    populate_value(), and strbuf cannot easily distinguish NULL and empty
     +    strings, but c-style "const char *" can do it. So add a new member in
     +    `struct atom_value`: `s_size`, which can record raw object size, it
     +    can help us add raw object data to the buffer or compare two buffers
     +    which contain raw object data.
      
          Beyond, `--format=%(raw)` cannot be used with `--python`, `--shell`,
     -    `--tcl`, `--perl` because if our binary raw data is passed to a variable
     -    in the host language, the host language may not support arbitrary binary
     -    data in the variables of its string type.
     +    `--tcl`, `--perl` because if our binary raw data is passed to a
     +    variable in the host language, the host language may not support
     +    arbitrary binary data in the variables of its string type.
      
          Mentored-by: Christian Couder <christian.couder@gmail.com>
          Mentored-by: Hariom Verma <hariom18599@gmail.com>
     +    Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
          Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
          Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
          Helped-by: Junio C Hamano <gitster@pobox.com>
     @@ t/t6300-for-each-ref.sh: test_atom tag contents 'Tagging at 1151968727
      +	git cat-file commit refs/tags/testtag^{} >expected &&
      +	git for-each-ref --format="%(*raw)" refs/tags/testtag >actual &&
      +	sanitize_pgp <expected >expected.clean &&
     ++	echo >>expected.clean &&
      +	sanitize_pgp <actual >actual.clean &&
     -+	echo "" >>expected.clean &&
      +	test_cmp expected.clean actual.clean
      +'
      +
     @@ t/t6300-for-each-ref.sh: test_atom refs/tags/signed-empty contents:body ''
      +	git cat-file tag refs/tags/signed-empty >expected &&
      +	git for-each-ref --format="%(raw)" refs/tags/signed-empty >actual &&
      +	sanitize_pgp <expected >expected.clean &&
     ++	echo >>expected.clean &&
      +	sanitize_pgp <actual >actual.clean &&
     -+	echo "" >>expected.clean &&
      +	test_cmp expected.clean actual.clean
      +'
      +
     @@ t/t6300-for-each-ref.sh: test_atom refs/tags/signed-short contents:signature "$s
      +	git cat-file tag refs/tags/signed-short >expected &&
      +	git for-each-ref --format="%(raw)" refs/tags/signed-short >actual &&
      +	sanitize_pgp <expected >expected.clean &&
     ++	echo >>expected.clean &&
      +	sanitize_pgp <actual >actual.clean &&
     -+	echo "" >>expected.clean &&
      +	test_cmp expected.clean actual.clean
      +'
      +
     @@ t/t6300-for-each-ref.sh: test_atom refs/tags/signed-long contents "subject line
      +	git cat-file tag refs/tags/signed-long >expected &&
      +	git for-each-ref --format="%(raw)" refs/tags/signed-long >actual &&
      +	sanitize_pgp <expected >expected.clean &&
     ++	echo >>expected.clean &&
      +	sanitize_pgp <actual >actual.clean &&
     -+	echo "" >>expected.clean &&
      +	test_cmp expected.clean actual.clean
      +'
      +
     @@ t/t6300-for-each-ref.sh: test_atom refs/mytrees/first contents:body ""
       
      +test_expect_success 'basic atom: refs/mytrees/first raw' '
      +	git cat-file tree refs/mytrees/first >expected &&
     -+	echo "" >>expected &&
     ++	echo >>expected &&
      +	git for-each-ref --format="%(raw)" refs/mytrees/first >actual &&
      +	test_cmp expected actual &&
      +	git cat-file -s refs/mytrees/first >expected &&
     @@ t/t6300-for-each-ref.sh: test_atom refs/myblobs/first contents:body ""
       
      +test_expect_success 'basic atom: refs/myblobs/first raw' '
      +	git cat-file blob refs/myblobs/first >expected &&
     -+	echo "" >>expected &&
     ++	echo >>expected &&
      +	git for-each-ref --format="%(raw)" refs/myblobs/first >actual &&
      +	test_cmp expected actual &&
      +	git cat-file -s refs/myblobs/first >expected &&
     @@ t/t6300-for-each-ref.sh: test_atom refs/myblobs/first contents:body ""
      +	printf "\0 \0a\0 " >blob6 &&
      +	printf "  " >blob7 &&
      +	>blob8 &&
     -+	git hash-object blob1 -w | xargs git update-ref refs/myblobs/blob1 &&
     -+	git hash-object blob2 -w | xargs git update-ref refs/myblobs/blob2 &&
     -+	git hash-object blob3 -w | xargs git update-ref refs/myblobs/blob3 &&
     -+	git hash-object blob4 -w | xargs git update-ref refs/myblobs/blob4 &&
     -+	git hash-object blob5 -w | xargs git update-ref refs/myblobs/blob5 &&
     -+	git hash-object blob6 -w | xargs git update-ref refs/myblobs/blob6 &&
     -+	git hash-object blob7 -w | xargs git update-ref refs/myblobs/blob7 &&
     -+	git hash-object blob8 -w | xargs git update-ref refs/myblobs/blob8
     ++	obj=$(git hash-object -w blob1) &&
     ++        git update-ref refs/myblobs/blob1 "$obj" &&
     ++	obj=$(git hash-object -w blob2) &&
     ++        git update-ref refs/myblobs/blob2 "$obj" &&
     ++	obj=$(git hash-object -w blob3) &&
     ++        git update-ref refs/myblobs/blob3 "$obj" &&
     ++	obj=$(git hash-object -w blob4) &&
     ++        git update-ref refs/myblobs/blob4 "$obj" &&
     ++	obj=$(git hash-object -w blob5) &&
     ++        git update-ref refs/myblobs/blob5 "$obj" &&
     ++	obj=$(git hash-object -w blob6) &&
     ++        git update-ref refs/myblobs/blob6 "$obj" &&
     ++	obj=$(git hash-object -w blob7) &&
     ++        git update-ref refs/myblobs/blob7 "$obj" &&
     ++	obj=$(git hash-object -w blob8) &&
     ++        git update-ref refs/myblobs/blob8 "$obj"
      +'
      +
      +test_expect_success 'Verify sorts with raw' '
     @@ t/t6300-for-each-ref.sh: test_atom refs/myblobs/first contents:body ""
      +		refs/myblobs/ refs/heads/ >actual &&
      +	test_cmp expected actual
      +'
     ++
      +test_expect_success 'validate raw atom with %(if:notequals)' '
      +	cat >expected <<-EOF &&
      +	refs/heads/ambiguous
     @@ t/t6300-for-each-ref.sh: test_atom refs/myblobs/first contents:body ""
      +	test_cmp expected actual
      +'
      +
     -+test_expect_success '%(raw) with --python must failed' '
     ++test_expect_success '%(raw) with --python must fail' '
      +	test_must_fail git for-each-ref --format="%(raw)" --python
      +'
      +
     -+test_expect_success '%(raw) with --tcl must failed' '
     ++test_expect_success '%(raw) with --tcl must fail' '
      +	test_must_fail git for-each-ref --format="%(raw)" --tcl
      +'
      +
     -+test_expect_success '%(raw) with --perl must failed' '
     ++test_expect_success '%(raw) with --perl must fail' '
      +	test_must_fail git for-each-ref --format="%(raw)" --perl
      +'
      +
     -+test_expect_success '%(raw) with --shell must failed' '
     ++test_expect_success '%(raw) with --shell must fail' '
      +	test_must_fail git for-each-ref --format="%(raw)" --shell
      +'
      +
     -+test_expect_success '%(raw) with --shell and --sort=raw must failed' '
     ++test_expect_success '%(raw) with --shell and --sort=raw must fail' '
      +	test_must_fail git for-each-ref --format="%(raw)" --sort=raw --shell
      +'
      +
  -:  ------------ >  3:  b54dbc431e04 [GSOC] ref-filter: --format=%(raw) re-support --perl
  3:  c99d1d070a18 =  4:  9fbbb3c492f5 [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  4:  5a5b5f78aeea !  5:  39a0d93c7bc1 [GSOC] ref-filter: add %(rest) atom
     @@ ref-filter.c: static struct {
       	 * Please update $__git_ref_fieldlist in git-completion.bash
       	 * when you add new atoms
      @@ ref-filter.c: int verify_ref_format(struct ref_format *format)
     - 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
       		if (at < 0)
       			die("%s", err.buf);
     + 
      +		if (used_atom[at].atom_type == ATOM_REST)
      +			die("this command reject atom %%(%.*s)", (int)(ep - sp - 2), sp + 2);
      +
     - 		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
     - 		    used_atom[at].u.raw_data.option == RAW_BARE)
     - 			die(_("--format=%.*s cannot be used with"
     + 		if ((format->quote_style == QUOTE_PYTHON ||
     + 		     format->quote_style == QUOTE_SHELL ||
     + 		     format->quote_style == QUOTE_TCL) &&
      @@ ref-filter.c: static int populate_value(struct ref_array_item *ref, struct strbuf *err)
       			v->handler = else_atom_handler;
       			v->s = xstrdup("");
     @@ t/t3203-branch-output.sh: test_expect_success 'git branch --format option' '
       	test_cmp expect actual
       '
       
     -+test_expect_success 'git branch with --format=%(rest) must failed' '
     ++test_expect_success 'git branch with --format=%(rest) must fail' '
      +	test_must_fail git branch --format="%(rest)" >actual
      +'
      +
     @@ t/t6300-for-each-ref.sh: test_expect_success 'basic atom: head contents:trailers
       	test_cmp expect actual.clean
       '
       
     -+test_expect_success 'basic atom: rest must failed' '
     ++test_expect_success 'basic atom: rest must fail' '
      +	test_must_fail git for-each-ref --format="%(rest)" refs/heads/main
      +'
      +
     @@ t/t7004-tag.sh: test_expect_success '--format should list tags as per format giv
       	test_cmp expect actual
       '
       
     -+test_expect_success 'git tag -l with --format="%(rest)" must failed' '
     ++test_expect_success 'git tag -l with --format="%(rest)" must fail' '
      +	test_must_fail git tag -l --format="%(rest)" "v1*"
      +'
      +
     @@ t/t7030-verify-tag.sh: test_expect_success GPG 'verifying tag with --format' '
       	test_cmp expect actual
       '
       
     -+test_expect_success GPG 'verifying tag with --format="%(rest)" must failed' '
     ++test_expect_success GPG 'verifying tag with --format="%(rest)" must fail' '
      +	test_must_fail git verify-tag --format="%(rest)" "fourth-signed"
      +'
      +
  5:  49063372e003 !  6:  35a376db1fc1 [GSOC] ref-filter: teach get_object() return useful value
     @@ Metadata
      Author: ZheNing Hu <adlternative@gmail.com>
      
       ## Commit message ##
     -    [GSOC] ref-filter: teach get_object() return useful value
     +    [GSOC] ref-filter: pass get_object() return value to their callers
      
     -    Let `populate_value()`, `get_ref_atom_value()` and
     -    `format_ref_array_item()` get the return value of `get_object()`
     -    correctly. This can help us later let `cat-file --batch` get the
     -    correct error message and return value of `get_object()`.
     +    Since in the refactor of `git cat-file --batch` later,
     +    oid_object_info_extended() in get_object() will be used to obtain
     +    the info of an object with it's oid. When the object cannot be
     +    obtained in the git repository, `cat-file --batch` expects to output
     +    "<oid> missing" and continue the next oid query instead of letting
     +    Git exit. In other error conditions, Git should exit normally. So we
     +    can achieve this function by passing the return value of get_object().
      
          Mentored-by: Christian Couder <christian.couder@gmail.com>
          Mentored-by: Hariom Verma <hariom18599@gmail.com>
     +    Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
          Signed-off-by: ZheNing Hu <adlternative@gmail.com>
      
       ## ref-filter.c ##
     -@@ ref-filter.c: static char *get_worktree_path(const struct used_atom *atom, const struct ref_ar
     - static int populate_value(struct ref_array_item *ref, struct strbuf *err)
     +@@ ref-filter.c: static int populate_value(struct ref_array_item *ref, struct strbuf *err)
       {
       	struct object *obj;
     --	int i;
     -+	int i, ret = 0;
     + 	int i;
     ++	int ret = 0;
       	struct object_info empty = OBJECT_INFO_INIT;
       
       	CALLOC_ARRAY(ref->value, used_atom_cnt);
     @@ ref-filter.c: static int populate_value(struct ref_array_item *ref, struct strbu
       	oi.oid = ref->objectname;
      -	if (get_object(ref, 0, &obj, &oi, err))
      -		return -1;
     -+	if ((ret = get_object(ref, 0, &obj, &oi, err)))
     ++	ret = get_object(ref, 0, &obj, &oi, err);
     ++	if (ret)
      +		return ret;
       
       	/*
       	 * If there is no atom that wants to know about tagged
     -@@ ref-filter.c: static int populate_value(struct ref_array_item *ref, struct strbuf *err)
     - static int get_ref_atom_value(struct ref_array_item *ref, int atom,
     +@@ ref-filter.c: static int get_ref_atom_value(struct ref_array_item *ref, int atom,
       			      struct atom_value **v, struct strbuf *err)
       {
     -+	int ret = 0;
     -+
       	if (!ref->value) {
      -		if (populate_value(ref, err))
      -			return -1;
     -+		if ((ret = populate_value(ref, err)))
     ++		int ret = populate_value(ref, err);
     ++
     ++		if (ret)
      +			return ret;
       		fill_missing_values(ref->value);
       	}
     @@ ref-filter.c: int format_ref_array_item(struct ref_array_item *info,
       		}
       	}
       	if (*cp) {
     -@@ ref-filter.c: int format_ref_array_item(struct ref_array_item *info,
     - 	}
     - 	strbuf_addbuf(final_buf, &state.stack->output);
     - 	pop_stack_element(&state.stack);
     --	return 0;
     -+	return ret;
     - }
     - 
     - void pretty_print_ref(const char *name, const struct object_id *oid,
  6:  d2f2563eb76a !  7:  8c1d683ec6e9 [GSOC] ref-filter: introduce free_array_item_internal() function
     @@ Metadata
      Author: ZheNing Hu <adlternative@gmail.com>
      
       ## Commit message ##
     -    [GSOC] ref-filter: introduce free_array_item_internal() function
     +    [GSOC] ref-filter: introduce free_ref_array_item_value() function
      
     -    Introduce free_array_item_internal() for freeing ref_array_item value.
     +    When we use ref_array_item which is not dynamically allocated and
     +    want to free the space of its member "value" after the end of use,
     +    free_array_item() does not meet our needs, because it tries to free
     +    ref_array_item itself and its member "symref".
     +
     +    Introduce free_ref_array_item_value() for freeing ref_array_item value.
          It will be called internally by free_array_item(), and it will help
     -    `cat-file --batch` free ref_array_item's memory later.
     +    `cat-file --batch` free ref_array_item's value memory later.
      
     +    Helped-by: Junio C Hamano <gitster@pobox.com>
          Mentored-by: Christian Couder <christian.couder@gmail.com>
          Mentored-by: Hariom Verma <hariom18599@gmail.com>
          Signed-off-by: ZheNing Hu <adlternative@gmail.com>
     @@ ref-filter.c: static int ref_filter_handler(const char *refname, const struct ob
       
      -/*  Free memory allocated for a ref_array_item */
      -static void free_array_item(struct ref_array_item *item)
     -+void free_array_item_internal(struct ref_array_item *item)
     ++void free_ref_array_item_value(struct ref_array_item *item)
       {
      -	free((char *)item->symref);
       	if (item->value) {
     @@ ref-filter.c: static int ref_filter_handler(const char *refname, const struct ob
      +static void free_array_item(struct ref_array_item *item)
      +{
      +	free((char *)item->symref);
     -+	free_array_item_internal(item);
     ++	free_ref_array_item_value(item);
       	free(item);
       }
       
     @@ ref-filter.h: struct ref_format {
       int filter_refs(struct ref_array *array, struct ref_filter *filter, unsigned int type);
       /*  Clear all memory allocated to ref_array */
       void ref_array_clear(struct ref_array *array);
     -+/* Free array item's value */
     -+void free_array_item_internal(struct ref_array_item *item);
     ++/* Free ref_array_item's value */
     ++void free_ref_array_item_value(struct ref_array_item *item);
       /*  Used to verify if the given format is correct and to parse out the used atoms */
       int verify_ref_format(struct ref_format *format);
       /*  Sort the given ref_array as per the ref_sorting provided */
  7:  765337a46ab0 !  8:  bd534a266a40 [GSOC] cat-file: reuse ref-filter logic
     @@ Commit message
          `verify_ref_format()` to check atoms.
          4. Use `has_object_file()` in `batch_one_object()` to check
          whether the input object exists.
     -    5. Use `format_ref_array_item()` in `batch_object_write()` to
     +    5. Let get_object() return 1 and print "<oid> missing" instead
     +    of returning -1 and printing "missing object <oid> for <refname>",
     +    this can help `format_ref_array_item()` just report that the
     +    object is missing without letting Git exit.
     +    6. Use `format_ref_array_item()` in `batch_object_write()` to
          get the formatted data corresponding to the object. If the
          return value of `format_ref_array_item()` is equals to zero,
          use `batch_write()` to print object data; else if the return
          value less than zero, use `die()` to print the error message
          and exit; else return value greater than zero, only print the
          error message, but not exit.
     -    6. Let get_object() return 1 and print "<oid> missing" instead
     -    of returning -1 and printing "missing object <oid> for <refname>",
     -    this can help `format_ref_array_item()` just report that the
     -    object is missing without letting Git exit.
     +    7. Use free_ref_array_item_value() to free ref_array_item's
     +    value.
      
          Most of the atoms in `for-each-ref --format` are now supported,
          such as `%(tree)`, `%(parent)`, `%(author)`, `%(tagger)`, `%(if)`,
     @@ builtin/cat-file.c: static void batch_write(struct batch_options *opt, const voi
      +		printf("%s\n", err.buf);
      +		fflush(stdout);
       	}
     -+	free_array_item_internal(&item);
     ++	free_ref_array_item_value(&item);
      +	strbuf_release(&err);
       }
       
     @@ builtin/cat-file.c: int cmd_cat_file(int argc, const char **argv, const char *pr
      
       ## ref-filter.c ##
      @@ ref-filter.c: int verify_ref_format(struct ref_format *format)
     - 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
       		if (at < 0)
       			die("%s", err.buf);
     + 
      -		if (used_atom[at].atom_type == ATOM_REST)
      -			die("this command reject atom %%(%.*s)", (int)(ep - sp - 2), sp + 2);
      +		if ((!format->cat_file_mode && used_atom[at].atom_type == ATOM_REST) ||
     @@ ref-filter.c: int verify_ref_format(struct ref_format *format)
      +					       used_atom[at].atom_type == ATOM_WORKTREEPATH)))
      +			die(_("this command reject atom %%(%.*s)"), (int)(ep - sp - 2), sp + 2);
       
     - 		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
     - 		    used_atom[at].u.raw_data.option == RAW_BARE)
     + 		if ((format->quote_style == QUOTE_PYTHON ||
     + 		     format->quote_style == QUOTE_SHELL ||
      @@ ref-filter.c: static int get_object(struct ref_array_item *ref, int deref, struct object **obj
       	}
       	if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
     @@ t/t1006-cat-file.sh: test_expect_success 'cat-file --unordered works' '
      +batch_test_atom() {
      +	if test "$3" = "fail"
      +	then
     -+		test_expect_${4:-success} $PREREQ "basic atom: $1 $2 mast failed" "
     ++		test_expect_${4:-success} $PREREQ "basic atom: $1 $2 must fail" "
      +			test_must_fail git cat-file --batch-check='$2' >bad <<-EOF
      +			$1
      +			EOF
  8:  058b304686fd !  9:  b66ab0f2d569 [GSOC] cat-file: reuse err buf in batch_objet_write()
     @@ Metadata
      Author: ZheNing Hu <adlternative@gmail.com>
      
       ## Commit message ##
     -    [GSOC] cat-file: reuse err buf in batch_objet_write()
     +    [GSOC] cat-file: reuse err buf in batch_object_write()
      
          Reuse the `err` buffer in batch_object_write(), as the
          buffer `scratch` does. This will reduce the overhead
     @@ builtin/cat-file.c: static void batch_write(struct batch_options *opt, const voi
      +		printf("%s\n", err->buf);
       		fflush(stdout);
       	}
     - 	free_array_item_internal(&item);
     + 	free_ref_array_item_value(&item);
      -	strbuf_release(&err);
       }
       
  9:  cbf7d51933ea ! 10:  86ac3bcaecea [GSOC] cat-file: re-implement --textconv, --filters options
     @@ builtin/cat-file.c: static int batch_objects(struct batch_options *opt, const st
       	opt->format.format = format.buf;
      +	if (opt->cmdmode == 'c')
      +		opt->format.use_textconv = 1;
     -+	if (opt->cmdmode == 'w')
     ++	else if (opt->cmdmode == 'w')
      +		opt->format.use_filters = 1;
      +
       	if (verify_ref_format(&opt->format))
     @@ ref-filter.c: int verify_ref_format(struct ref_format *format)
      +		use_filters = format->use_filters;
      +		use_textconv = format->use_textconv;
      +
     - 		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
     - 		    used_atom[at].u.raw_data.option == RAW_BARE)
     - 			die(_("--format=%.*s cannot be used with"
     + 		if ((format->quote_style == QUOTE_PYTHON ||
     + 		     format->quote_style == QUOTE_SHELL ||
     + 		     format->quote_style == QUOTE_TCL) &&
      @@ ref-filter.c: static int get_object(struct ref_array_item *ref, int deref, struct object **obj
       {
       	/* parse_object_buffer() will set eaten to 0 if free() will be needed */
     @@ ref-filter.c: static int get_object(struct ref_array_item *ref, int deref, struc
      +		oi->info.typep = NULL;
      +		oi->info.contentp = temp_contentp;
      +
     -+		if (use_textconv) {
     ++		if (use_textconv && !ref->rest)
     ++			return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
     ++					       oid_to_hex(&act_oi.oid));
     ++		if (use_textconv && oi->type == OBJ_BLOB) {
      +			act_oi = *oi;
     -+
     -+			if(!ref->rest)
     -+				return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
     -+						       oid_to_hex(&act_oi.oid));
     -+			if (act_oi.type == OBJ_BLOB) {
     -+				if (textconv_object(the_repository,
     -+						    ref->rest, 0100644, &act_oi.oid,
     -+						    1, (char **)(&act_oi.content), &act_oi.size)) {
     -+					actual_oi = &act_oi;
     -+					goto success;
     -+				}
     ++			if (textconv_object(the_repository,
     ++					    ref->rest, 0100644, &act_oi.oid,
     ++					    1, (char **)(&act_oi.content), &act_oi.size)) {
     ++				actual_oi = &act_oi;
     ++				goto success;
      +			}
      +		}
       	}
     @@ ref-filter.c: static int get_object(struct ref_array_item *ref, int deref, struc
       
       	if (oi->info.contentp) {
      -		*obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten);
     -+		if (use_filters) {
     -+			if(!ref->rest)
     -+				return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
     -+						       oid_to_hex(&oi->oid));
     -+			if (oi->type == OBJ_BLOB) {
     -+				struct strbuf strbuf = STRBUF_INIT;
     -+				struct checkout_metadata meta;
     -+				act_oi = *oi;
     ++		if (use_filters && !ref->rest)
     ++			return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
     ++					       oid_to_hex(&oi->oid));
     ++		if (use_filters && oi->type == OBJ_BLOB) {
     ++			struct strbuf strbuf = STRBUF_INIT;
     ++			struct checkout_metadata meta;
     ++			act_oi = *oi;
      +
     -+				init_checkout_metadata(&meta, NULL, NULL, &act_oi.oid);
     -+				if (convert_to_working_tree(&the_index, ref->rest, act_oi.content, act_oi.size, &strbuf, &meta)) {
     -+					act_oi.size = strbuf.len;
     -+					act_oi.content = strbuf_detach(&strbuf, NULL);
     -+					actual_oi = &act_oi;
     -+				} else {
     -+					die("could not convert '%s' %s",
     -+					    oid_to_hex(&oi->oid), ref->rest);
     -+				}
     -+			}
     ++			init_checkout_metadata(&meta, NULL, NULL, &act_oi.oid);
     ++			if (!convert_to_working_tree(&the_index, ref->rest, act_oi.content, act_oi.size, &strbuf, &meta))
     ++				die("could not convert '%s' %s",
     ++					oid_to_hex(&oi->oid), ref->rest);
     ++			act_oi.size = strbuf.len;
     ++			act_oi.content = strbuf_detach(&strbuf, NULL);
     ++			actual_oi = &act_oi;
      +		}
      +
      +success:
     @@ ref-filter.h: struct ref_format {
       };
       
      -#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }
     -+#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, 0, 0, -1 }
     ++#define REF_FORMAT_INIT { .use_color = -1 }
       
       /*  Macros for checking --merged and --no-merged options */
       #define _OPT_MERGED_NO_MERGED(option, filter, h) \

-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v3 01/10] [GSOC] ref-filter: add obj-type check in grab contents
  2021-06-19  7:02   ` [PATCH v3 00/10] " ZheNing Hu via GitGitGadget
@ 2021-06-19  7:02     ` ZheNing Hu via GitGitGadget
  2021-06-19  7:02     ` [PATCH v3 02/10] [GSOC] ref-filter: add %(raw) atom ZheNing Hu via GitGitGadget
                       ` (9 subsequent siblings)
  10 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-19  7:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Only tag and commit objects use `grab_sub_body_contents()` to grab
object contents in the current codebase.  We want to teach the
function to also handle blobs and trees to get their raw data,
without parsing a blob (whose contents looks like a commit or a tag)
incorrectly as a commit or a tag.

Skip the block of code that is specific to handling commits and tags
early when the given object is of a wrong type to help later
addition to handle other types of objects in this function.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 4db0e40ff4c6..5cee6512fbaf 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1356,11 +1356,12 @@ static void append_lines(struct strbuf *out, const char *buf, unsigned long size
 }
 
 /* See grab_values */
-static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
+static void grab_sub_body_contents(struct atom_value *val, int deref, struct expand_data *data)
 {
 	int i;
 	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
 	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
+	void *buf = data->content;
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		struct used_atom *atom = &used_atom[i];
@@ -1371,10 +1372,13 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
 			continue;
 		if (deref)
 			name++;
-		if (strcmp(name, "body") &&
-		    !starts_with(name, "subject") &&
-		    !starts_with(name, "trailers") &&
-		    !starts_with(name, "contents"))
+
+		if ((data->type != OBJ_TAG &&
+		     data->type != OBJ_COMMIT) ||
+		    (strcmp(name, "body") &&
+		     !starts_with(name, "subject") &&
+		     !starts_with(name, "trailers") &&
+		     !starts_with(name, "contents")))
 			continue;
 		if (!subpos)
 			find_subpos(buf,
@@ -1438,17 +1442,19 @@ static void fill_missing_values(struct atom_value *val)
  * pointed at by the ref itself; otherwise it is the object the
  * ref (which is a tag) refers to.
  */
-static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf)
+static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
 {
+	void *buf = data->content;
+
 	switch (obj->type) {
 	case OBJ_TAG:
 		grab_tag_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, data);
 		grab_person("tagger", val, deref, buf);
 		break;
 	case OBJ_COMMIT:
 		grab_commit_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, data);
 		grab_person("author", val, deref, buf);
 		grab_person("committer", val, deref, buf);
 		break;
@@ -1678,7 +1684,7 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
 					       oid_to_hex(&oi->oid), ref->refname);
 		}
-		grab_values(ref->value, deref, *obj, oi->content);
+		grab_values(ref->value, deref, *obj, oi);
 	}
 
 	grab_common_values(ref->value, deref, oi);
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v3 02/10] [GSOC] ref-filter: add %(raw) atom
  2021-06-19  7:02   ` [PATCH v3 00/10] " ZheNing Hu via GitGitGadget
  2021-06-19  7:02     ` [PATCH v3 01/10] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
@ 2021-06-19  7:02     ` ZheNing Hu via GitGitGadget
  2021-06-19  7:02     ` [PATCH v3 03/10] [GSOC] ref-filter: --format=%(raw) re-support --perl ZheNing Hu via GitGitGadget
                       ` (8 subsequent siblings)
  10 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-19  7:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.

The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).

E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.

Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.

Beyond, `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, `--perl` because if our binary raw data is passed to a
variable in the host language, the host language may not support
arbitrary binary data in the variables of its string type.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-for-each-ref.txt |   9 ++
 ref-filter.c                       | 139 +++++++++++++++----
 t/t6300-for-each-ref.sh            | 216 +++++++++++++++++++++++++++++
 3 files changed, 337 insertions(+), 27 deletions(-)

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index 2ae2478de706..7f1f0a1ca3b6 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -235,6 +235,15 @@ and `date` to extract the named component.  For email fields (`authoremail`,
 without angle brackets, and `:localpart` to get the part before the `@` symbol
 out of the trimmed email.
 
+The raw data in an object is `raw`.
+
+raw:size::
+	The raw data size of the object.
+
+Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
+`--perl` because the host language may not support arbitrary binary data in the
+variables of its string type.
+
 The message in a commit or a tag object is `contents`, from which
 `contents:<part>` can be used to extract various parts out of:
 
diff --git a/ref-filter.c b/ref-filter.c
index 5cee6512fbaf..7822be903071 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -144,6 +144,7 @@ enum atom_type {
 	ATOM_BODY,
 	ATOM_TRAILERS,
 	ATOM_CONTENTS,
+	ATOM_RAW,
 	ATOM_UPSTREAM,
 	ATOM_PUSH,
 	ATOM_SYMREF,
@@ -189,6 +190,9 @@ static struct used_atom {
 			struct process_trailer_options trailer_opts;
 			unsigned int nlines;
 		} contents;
+		struct {
+			enum { RAW_BARE, RAW_LENGTH } option;
+		} raw_data;
 		struct {
 			cmp_status cmp_status;
 			const char *str;
@@ -426,6 +430,18 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
+static int raw_atom_parser(const struct ref_format *format, struct used_atom *atom,
+				const char *arg, struct strbuf *err)
+{
+	if (!arg)
+		atom->u.raw_data.option = RAW_BARE;
+	else if (!strcmp(arg, "size"))
+		atom->u.raw_data.option = RAW_LENGTH;
+	else
+		return strbuf_addf_ret(err, -1, _("unrecognized %%(raw) argument: %s"), arg);
+	return 0;
+}
+
 static int oid_atom_parser(const struct ref_format *format, struct used_atom *atom,
 			   const char *arg, struct strbuf *err)
 {
@@ -586,6 +602,7 @@ static struct {
 	[ATOM_BODY] = { "body", SOURCE_OBJ, FIELD_STR, body_atom_parser },
 	[ATOM_TRAILERS] = { "trailers", SOURCE_OBJ, FIELD_STR, trailers_atom_parser },
 	[ATOM_CONTENTS] = { "contents", SOURCE_OBJ, FIELD_STR, contents_atom_parser },
+	[ATOM_RAW] = { "raw", SOURCE_OBJ, FIELD_STR, raw_atom_parser },
 	[ATOM_UPSTREAM] = { "upstream", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
 	[ATOM_PUSH] = { "push", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
 	[ATOM_SYMREF] = { "symref", SOURCE_NONE, FIELD_STR, refname_atom_parser },
@@ -620,12 +637,15 @@ struct ref_formatting_state {
 
 struct atom_value {
 	const char *s;
+	size_t s_size;
 	int (*handler)(struct atom_value *atomv, struct ref_formatting_state *state,
 		       struct strbuf *err);
 	uintmax_t value; /* used for sorting when not FIELD_STR */
 	struct used_atom *atom;
 };
 
+#define ATOM_VALUE_S_SIZE_INIT (-1)
+
 /*
  * Used to parse format string and sort specifiers
  */
@@ -644,13 +664,6 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 		return strbuf_addf_ret(err, -1, _("malformed field name: %.*s"),
 				       (int)(ep-atom), atom);
 
-	/* Do we have the atom already used elsewhere? */
-	for (i = 0; i < used_atom_cnt; i++) {
-		int len = strlen(used_atom[i].name);
-		if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
-			return i;
-	}
-
 	/*
 	 * If the atom name has a colon, strip it and everything after
 	 * it off - it specifies the format for this entry, and
@@ -660,6 +673,13 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 	arg = memchr(sp, ':', ep - sp);
 	atom_len = (arg ? arg : ep) - sp;
 
+	/* Do we have the atom already used elsewhere? */
+	for (i = 0; i < used_atom_cnt; i++) {
+		int len = strlen(used_atom[i].name);
+		if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
+			return i;
+	}
+
 	/* Is the atom a valid one? */
 	for (i = 0; i < ARRAY_SIZE(valid_atom); i++) {
 		int len = strlen(valid_atom[i].name);
@@ -709,11 +729,14 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 	return at;
 }
 
-static void quote_formatting(struct strbuf *s, const char *str, int quote_style)
+static void quote_formatting(struct strbuf *s, const char *str, size_t len, int quote_style)
 {
 	switch (quote_style) {
 	case QUOTE_NONE:
-		strbuf_addstr(s, str);
+		if (len != ATOM_VALUE_S_SIZE_INIT)
+			strbuf_add(s, str, len);
+		else
+			strbuf_addstr(s, str);
 		break;
 	case QUOTE_SHELL:
 		sq_quote_buf(s, str);
@@ -740,9 +763,12 @@ static int append_atom(struct atom_value *v, struct ref_formatting_state *state,
 	 * encountered.
 	 */
 	if (!state->stack->prev)
-		quote_formatting(&state->stack->output, v->s, state->quote_style);
+		quote_formatting(&state->stack->output, v->s, v->s_size, state->quote_style);
 	else
-		strbuf_addstr(&state->stack->output, v->s);
+		if (v->s_size != ATOM_VALUE_S_SIZE_INIT)
+			strbuf_add(&state->stack->output, v->s, v->s_size);
+		else
+			strbuf_addstr(&state->stack->output, v->s);
 	return 0;
 }
 
@@ -842,21 +868,23 @@ static int if_atom_handler(struct atom_value *atomv, struct ref_formatting_state
 	return 0;
 }
 
-static int is_empty(const char *s)
+static int is_empty(struct strbuf *buf)
 {
-	while (*s != '\0') {
-		if (!isspace(*s))
-			return 0;
-		s++;
-	}
-	return 1;
-}
+	const char *cur = buf->buf;
+	const char *end = buf->buf + buf->len;
+
+	while (cur != end && (isspace(*cur)))
+		cur++;
+
+	return cur == end;
+ }
 
 static int then_atom_handler(struct atom_value *atomv, struct ref_formatting_state *state,
 			     struct strbuf *err)
 {
 	struct ref_formatting_stack *cur = state->stack;
 	struct if_then_else *if_then_else = NULL;
+	size_t str_len = 0;
 
 	if (cur->at_end == if_then_else_handler)
 		if_then_else = (struct if_then_else *)cur->at_end_data;
@@ -867,18 +895,22 @@ static int then_atom_handler(struct atom_value *atomv, struct ref_formatting_sta
 	if (if_then_else->else_atom_seen)
 		return strbuf_addf_ret(err, -1, _("format: %%(then) atom used after %%(else)"));
 	if_then_else->then_atom_seen = 1;
+	if (if_then_else->str)
+		str_len = strlen(if_then_else->str);
 	/*
 	 * If the 'equals' or 'notequals' attribute is used then
 	 * perform the required comparison. If not, only non-empty
 	 * strings satisfy the 'if' condition.
 	 */
 	if (if_then_else->cmp_status == COMPARE_EQUAL) {
-		if (!strcmp(if_then_else->str, cur->output.buf))
+		if (str_len == cur->output.len &&
+		    !memcmp(if_then_else->str, cur->output.buf, cur->output.len))
 			if_then_else->condition_satisfied = 1;
 	} else if (if_then_else->cmp_status == COMPARE_UNEQUAL) {
-		if (strcmp(if_then_else->str, cur->output.buf))
+		if (str_len != cur->output.len ||
+		    memcmp(if_then_else->str, cur->output.buf, cur->output.len))
 			if_then_else->condition_satisfied = 1;
-	} else if (cur->output.len && !is_empty(cur->output.buf))
+	} else if (cur->output.len && !is_empty(&cur->output))
 		if_then_else->condition_satisfied = 1;
 	strbuf_reset(&cur->output);
 	return 0;
@@ -924,7 +956,7 @@ static int end_atom_handler(struct atom_value *atomv, struct ref_formatting_stat
 	 * only on the topmost supporting atom.
 	 */
 	if (!current->prev->prev) {
-		quote_formatting(&s, current->output.buf, state->quote_style);
+		quote_formatting(&s, current->output.buf, current->output.len, state->quote_style);
 		strbuf_swap(&current->output, &s);
 	}
 	strbuf_release(&s);
@@ -974,6 +1006,10 @@ int verify_ref_format(struct ref_format *format)
 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
 		if (at < 0)
 			die("%s", err.buf);
+		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
+		    used_atom[at].u.raw_data.option == RAW_BARE)
+			die(_("--format=%.*s cannot be used with"
+			      "--python, --shell, --tcl, --perl"), (int)(ep - sp - 2), sp + 2);
 		cp = ep + 1;
 
 		if (skip_prefix(used_atom[at].name, "color:", &color))
@@ -1362,17 +1398,29 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, struct exp
 	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
 	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
 	void *buf = data->content;
+	unsigned long buf_size = data->size;
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		struct used_atom *atom = &used_atom[i];
 		const char *name = atom->name;
 		struct atom_value *v = &val[i];
+		enum atom_type atom_type = atom->atom_type;
 
 		if (!!deref != (*name == '*'))
 			continue;
 		if (deref)
 			name++;
 
+		if (atom_type == ATOM_RAW) {
+			if (atom->u.raw_data.option == RAW_BARE) {
+				v->s = xmemdupz(buf, buf_size);
+				v->s_size = buf_size;
+			} else if (atom->u.raw_data.option == RAW_LENGTH) {
+				v->s = xstrfmt("%"PRIuMAX, (uintmax_t)buf_size);
+			}
+			continue;
+		}
+
 		if ((data->type != OBJ_TAG &&
 		     data->type != OBJ_COMMIT) ||
 		    (strcmp(name, "body") &&
@@ -1460,9 +1508,11 @@ static void grab_values(struct atom_value *val, int deref, struct object *obj, s
 		break;
 	case OBJ_TREE:
 		/* grab_tree_values(val, deref, obj, buf, sz); */
+		grab_sub_body_contents(val, deref, data);
 		break;
 	case OBJ_BLOB:
 		/* grab_blob_values(val, deref, obj, buf, sz); */
+		grab_sub_body_contents(val, deref, data);
 		break;
 	default:
 		die("Eh?  Object of type %d?", obj->type);
@@ -1766,6 +1816,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 		const char *refname;
 		struct branch *branch = NULL;
 
+		v->s_size = ATOM_VALUE_S_SIZE_INIT;
 		v->handler = append_atom;
 		v->atom = atom;
 
@@ -2369,6 +2420,19 @@ static int compare_detached_head(struct ref_array_item *a, struct ref_array_item
 	return 0;
 }
 
+static int memcasecmp(const void *vs1, const void *vs2, size_t n)
+{
+	const char *s1 = vs1, *s2 = vs2;
+	const char *end = s1 + n;
+
+	for (; s1 < end; s1++, s2++) {
+		int diff = tolower(*s1) - tolower(*s2);
+		if (diff)
+			return diff;
+	}
+	return 0;
+}
+
 static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, struct ref_array_item *b)
 {
 	struct atom_value *va, *vb;
@@ -2389,10 +2453,30 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru
 	} else if (s->sort_flags & REF_SORTING_VERSION) {
 		cmp = versioncmp(va->s, vb->s);
 	} else if (cmp_type == FIELD_STR) {
-		int (*cmp_fn)(const char *, const char *);
-		cmp_fn = s->sort_flags & REF_SORTING_ICASE
-			? strcasecmp : strcmp;
-		cmp = cmp_fn(va->s, vb->s);
+		if (va->s_size == ATOM_VALUE_S_SIZE_INIT &&
+		    vb->s_size == ATOM_VALUE_S_SIZE_INIT) {
+			int (*cmp_fn)(const char *, const char *);
+			cmp_fn = s->sort_flags & REF_SORTING_ICASE
+				? strcasecmp : strcmp;
+			cmp = cmp_fn(va->s, vb->s);
+		} else {
+			size_t a_size = va->s_size == ATOM_VALUE_S_SIZE_INIT ?
+					strlen(va->s) : va->s_size;
+			size_t b_size = vb->s_size == ATOM_VALUE_S_SIZE_INIT ?
+					strlen(vb->s) : vb->s_size;
+			int (*cmp_fn)(const void *, const void *, size_t);
+			cmp_fn = s->sort_flags & REF_SORTING_ICASE
+				? memcasecmp : memcmp;
+
+			cmp = cmp_fn(va->s, vb->s, b_size > a_size ?
+				     a_size : b_size);
+			if (!cmp) {
+				if (a_size > b_size)
+					cmp = 1;
+				else if (a_size < b_size)
+					cmp = -1;
+			}
+		}
 	} else {
 		if (va->value < vb->value)
 			cmp = -1;
@@ -2492,6 +2576,7 @@ int format_ref_array_item(struct ref_array_item *info,
 	}
 	if (format->need_color_reset_at_eol) {
 		struct atom_value resetv;
+		resetv.s_size = ATOM_VALUE_S_SIZE_INIT;
 		resetv.s = GIT_COLOR_RESET;
 		if (append_atom(&resetv, &state, error_buf)) {
 			pop_stack_element(&state.stack);
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 9e0214076b4d..9c5379e2f56f 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -130,6 +130,8 @@ test_atom head parent:short=10 ''
 test_atom head numparent 0
 test_atom head object ''
 test_atom head type ''
+test_atom head raw "$(git cat-file commit refs/heads/main)
+"
 test_atom head '*objectname' ''
 test_atom head '*objecttype' ''
 test_atom head author 'A U Thor <author@example.com> 1151968724 +0200'
@@ -221,6 +223,15 @@ test_atom tag contents 'Tagging at 1151968727
 '
 test_atom tag HEAD ' '
 
+test_expect_success 'basic atom: refs/tags/testtag *raw' '
+	git cat-file commit refs/tags/testtag^{} >expected &&
+	git for-each-ref --format="%(*raw)" refs/tags/testtag >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_expect_success 'Check invalid atoms names are errors' '
 	test_must_fail git for-each-ref --format="%(INVALID)" refs/heads
 '
@@ -686,6 +697,15 @@ test_atom refs/tags/signed-empty contents:body ''
 test_atom refs/tags/signed-empty contents:signature "$sig"
 test_atom refs/tags/signed-empty contents "$sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-empty raw' '
+	git cat-file tag refs/tags/signed-empty >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-empty >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_atom refs/tags/signed-short subject 'subject line'
 test_atom refs/tags/signed-short subject:sanitize 'subject-line'
 test_atom refs/tags/signed-short contents:subject 'subject line'
@@ -695,6 +715,15 @@ test_atom refs/tags/signed-short contents:signature "$sig"
 test_atom refs/tags/signed-short contents "subject line
 $sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-short raw' '
+	git cat-file tag refs/tags/signed-short >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-short >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_atom refs/tags/signed-long subject 'subject line'
 test_atom refs/tags/signed-long subject:sanitize 'subject-line'
 test_atom refs/tags/signed-long contents:subject 'subject line'
@@ -708,6 +737,15 @@ test_atom refs/tags/signed-long contents "subject line
 body contents
 $sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-long raw' '
+	git cat-file tag refs/tags/signed-long >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-long >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_expect_success 'set up refs pointing to tree and blob' '
 	git update-ref refs/mytrees/first refs/heads/main^{tree} &&
 	git update-ref refs/myblobs/first refs/heads/main:one
@@ -720,6 +758,16 @@ test_atom refs/mytrees/first contents:body ""
 test_atom refs/mytrees/first contents:signature ""
 test_atom refs/mytrees/first contents ""
 
+test_expect_success 'basic atom: refs/mytrees/first raw' '
+	git cat-file tree refs/mytrees/first >expected &&
+	echo >>expected &&
+	git for-each-ref --format="%(raw)" refs/mytrees/first >actual &&
+	test_cmp expected actual &&
+	git cat-file -s refs/mytrees/first >expected &&
+	git for-each-ref --format="%(raw:size)" refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
 test_atom refs/myblobs/first subject ""
 test_atom refs/myblobs/first contents:subject ""
 test_atom refs/myblobs/first body ""
@@ -727,6 +775,174 @@ test_atom refs/myblobs/first contents:body ""
 test_atom refs/myblobs/first contents:signature ""
 test_atom refs/myblobs/first contents ""
 
+test_expect_success 'basic atom: refs/myblobs/first raw' '
+	git cat-file blob refs/myblobs/first >expected &&
+	echo >>expected &&
+	git for-each-ref --format="%(raw)" refs/myblobs/first >actual &&
+	test_cmp expected actual &&
+	git cat-file -s refs/myblobs/first >expected &&
+	git for-each-ref --format="%(raw:size)" refs/myblobs/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'set up refs pointing to binary blob' '
+	printf "a\0b\0c" >blob1 &&
+	printf "a\0c\0b" >blob2 &&
+	printf "\0a\0b\0c" >blob3 &&
+	printf "abc" >blob4 &&
+	printf "\0 \0 \0 " >blob5 &&
+	printf "\0 \0a\0 " >blob6 &&
+	printf "  " >blob7 &&
+	>blob8 &&
+	obj=$(git hash-object -w blob1) &&
+        git update-ref refs/myblobs/blob1 "$obj" &&
+	obj=$(git hash-object -w blob2) &&
+        git update-ref refs/myblobs/blob2 "$obj" &&
+	obj=$(git hash-object -w blob3) &&
+        git update-ref refs/myblobs/blob3 "$obj" &&
+	obj=$(git hash-object -w blob4) &&
+        git update-ref refs/myblobs/blob4 "$obj" &&
+	obj=$(git hash-object -w blob5) &&
+        git update-ref refs/myblobs/blob5 "$obj" &&
+	obj=$(git hash-object -w blob6) &&
+        git update-ref refs/myblobs/blob6 "$obj" &&
+	obj=$(git hash-object -w blob7) &&
+        git update-ref refs/myblobs/blob7 "$obj" &&
+	obj=$(git hash-object -w blob8) &&
+        git update-ref refs/myblobs/blob8 "$obj"
+'
+
+test_expect_success 'Verify sorts with raw' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob8
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/myblobs/blob3
+	refs/myblobs/blob7
+	refs/mytrees/first
+	refs/myblobs/first
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob4
+	refs/heads/main
+	EOF
+	git for-each-ref --format="%(refname)" --sort=raw \
+		refs/heads/main refs/myblobs/ refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'Verify sorts with raw:size' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob8
+	refs/myblobs/first
+	refs/myblobs/blob7
+	refs/heads/main
+	refs/myblobs/blob4
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob3
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/mytrees/first
+	EOF
+	git for-each-ref --format="%(refname)" --sort=raw:size \
+		refs/heads/main refs/myblobs/ refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'validate raw atom with %(if:equals)' '
+	cat >expected <<-EOF &&
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	refs/myblobs/blob4
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	EOF
+	git for-each-ref --format="%(if:equals=abc)%(raw)%(then)%(refname)%(else)not equals%(end)" \
+		refs/myblobs/ refs/heads/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'validate raw atom with %(if:notequals)' '
+	cat >expected <<-EOF &&
+	refs/heads/ambiguous
+	refs/heads/main
+	refs/heads/newtag
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob3
+	equals
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/myblobs/blob7
+	refs/myblobs/blob8
+	refs/myblobs/first
+	EOF
+	git for-each-ref --format="%(if:notequals=abc)%(raw)%(then)%(refname)%(else)equals%(end)" \
+		refs/myblobs/ refs/heads/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'empty raw refs with %(if)' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob1 not empty
+	refs/myblobs/blob2 not empty
+	refs/myblobs/blob3 not empty
+	refs/myblobs/blob4 not empty
+	refs/myblobs/blob5 not empty
+	refs/myblobs/blob6 not empty
+	refs/myblobs/blob7 empty
+	refs/myblobs/blob8 empty
+	refs/myblobs/first not empty
+	EOF
+	git for-each-ref --format="%(refname) %(if)%(raw)%(then)not empty%(else)empty%(end)" \
+		refs/myblobs/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success '%(raw) with --python must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --python
+'
+
+test_expect_success '%(raw) with --tcl must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --tcl
+'
+
+test_expect_success '%(raw) with --perl must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --perl
+'
+
+test_expect_success '%(raw) with --shell must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --shell
+'
+
+test_expect_success '%(raw) with --shell and --sort=raw must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --sort=raw --shell
+'
+
+test_expect_success '%(raw:size) with --shell' '
+	git for-each-ref --format="%(raw:size)" | while read line
+	do
+		echo "'\''$line'\''" >>expect
+	done &&
+	git for-each-ref --format="%(raw:size)" --shell >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'for-each-ref --format compare with cat-file --batch' '
+	git rev-parse refs/mytrees/first | git cat-file --batch >expected &&
+	git for-each-ref --format="%(objectname) %(objecttype) %(objectsize)
+%(raw)" refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
 test_expect_success 'set up multiple-sort tags' '
 	for when in 100000 200000
 	do
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v3 03/10] [GSOC] ref-filter: --format=%(raw) re-support --perl
  2021-06-19  7:02   ` [PATCH v3 00/10] " ZheNing Hu via GitGitGadget
  2021-06-19  7:02     ` [PATCH v3 01/10] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
  2021-06-19  7:02     ` [PATCH v3 02/10] [GSOC] ref-filter: add %(raw) atom ZheNing Hu via GitGitGadget
@ 2021-06-19  7:02     ` ZheNing Hu via GitGitGadget
  2021-06-19  7:02     ` [PATCH v3 04/10] [GSOC] ref-filter: use non-const ref_format in *_atom_parser() ZheNing Hu via GitGitGadget
                       ` (7 subsequent siblings)
  10 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-19  7:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Because the perl language can handle binary data correctly,
add the function perl_quote_buf_with_len(), which can specify
the length of the data and prevent the data from being truncated
at '\0' to help `--format="%(raw)"` re-support `--perl`.

Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-for-each-ref.txt |  2 +-
 quote.c                            | 17 +++++++++++++++++
 quote.h                            |  1 +
 ref-filter.c                       | 15 +++++++++++----
 t/t6300-for-each-ref.sh            | 19 +++++++++++++++++--
 5 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index 7f1f0a1ca3b6..ea9b438c16f6 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -241,7 +241,7 @@ raw:size::
 	The raw data size of the object.
 
 Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
-`--perl` because the host language may not support arbitrary binary data in the
+because the host language may not support arbitrary binary data in the
 variables of its string type.
 
 The message in a commit or a tag object is `contents`, from which
diff --git a/quote.c b/quote.c
index 8a3a5e39eb12..26719d21d1e7 100644
--- a/quote.c
+++ b/quote.c
@@ -471,6 +471,23 @@ void perl_quote_buf(struct strbuf *sb, const char *src)
 	strbuf_addch(sb, sq);
 }
 
+void perl_quote_buf_with_len(struct strbuf *sb, const char *src, size_t len)
+{
+	const char sq = '\'';
+	const char bq = '\\';
+	const char *c = src;
+	const char *end = src + len;
+
+	strbuf_addch(sb, sq);
+	while (c != end) {
+		if (*c == sq || *c == bq)
+			strbuf_addch(sb, bq);
+		strbuf_addch(sb, *c);
+		c++;
+	}
+	strbuf_addch(sb, sq);
+}
+
 void python_quote_buf(struct strbuf *sb, const char *src)
 {
 	const char sq = '\'';
diff --git a/quote.h b/quote.h
index 768cc6338e27..0fe69e264b0e 100644
--- a/quote.h
+++ b/quote.h
@@ -94,6 +94,7 @@ char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigne
 
 /* quoting as a string literal for other languages */
 void perl_quote_buf(struct strbuf *sb, const char *src);
+void perl_quote_buf_with_len(struct strbuf *sb, const char *src, size_t len);
 void python_quote_buf(struct strbuf *sb, const char *src);
 void tcl_quote_buf(struct strbuf *sb, const char *src);
 void basic_regex_quote_buf(struct strbuf *sb, const char *src);
diff --git a/ref-filter.c b/ref-filter.c
index 7822be903071..797b20ffa612 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -742,7 +742,10 @@ static void quote_formatting(struct strbuf *s, const char *str, size_t len, int
 		sq_quote_buf(s, str);
 		break;
 	case QUOTE_PERL:
-		perl_quote_buf(s, str);
+		if (len != ATOM_VALUE_S_SIZE_INIT)
+			perl_quote_buf_with_len(s, str, len);
+		else
+			perl_quote_buf(s, str);
 		break;
 	case QUOTE_PYTHON:
 		python_quote_buf(s, str);
@@ -1006,10 +1009,14 @@ int verify_ref_format(struct ref_format *format)
 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
 		if (at < 0)
 			die("%s", err.buf);
-		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
-		    used_atom[at].u.raw_data.option == RAW_BARE)
+
+		if ((format->quote_style == QUOTE_PYTHON ||
+		     format->quote_style == QUOTE_SHELL ||
+		     format->quote_style == QUOTE_TCL) &&
+		     used_atom[at].atom_type == ATOM_RAW &&
+		     used_atom[at].u.raw_data.option == RAW_BARE)
 			die(_("--format=%.*s cannot be used with"
-			      "--python, --shell, --tcl, --perl"), (int)(ep - sp - 2), sp + 2);
+			      "--python, --shell, --tcl"), (int)(ep - sp - 2), sp + 2);
 		cp = ep + 1;
 
 		if (skip_prefix(used_atom[at].name, "color:", &color))
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 9c5379e2f56f..5556063c347d 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -915,8 +915,23 @@ test_expect_success '%(raw) with --tcl must fail' '
 	test_must_fail git for-each-ref --format="%(raw)" --tcl
 '
 
-test_expect_success '%(raw) with --perl must fail' '
-	test_must_fail git for-each-ref --format="%(raw)" --perl
+test_expect_success '%(raw) with --perl' '
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/blob1 --perl | perl > actual &&
+	cmp blob1 actual &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/blob3 --perl | perl > actual &&
+	cmp blob3 actual &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/blob8 --perl | perl > actual &&
+	cmp blob8 actual &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/first --perl | perl > actual &&
+	cmp one actual &&
+	git cat-file tree refs/mytrees/first > expected &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/mytrees/first --perl | perl > actual &&
+	cmp expected actual
 '
 
 test_expect_success '%(raw) with --shell must fail' '
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v3 04/10] [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  2021-06-19  7:02   ` [PATCH v3 00/10] " ZheNing Hu via GitGitGadget
                       ` (2 preceding siblings ...)
  2021-06-19  7:02     ` [PATCH v3 03/10] [GSOC] ref-filter: --format=%(raw) re-support --perl ZheNing Hu via GitGitGadget
@ 2021-06-19  7:02     ` ZheNing Hu via GitGitGadget
  2021-06-19  7:02     ` [PATCH v3 05/10] [GSOC] ref-filter: add %(rest) atom ZheNing Hu via GitGitGadget
                       ` (6 subsequent siblings)
  10 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-19  7:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Use non-const ref_format in *_atom_parser(), which can help us
modify the members of ref_format in *_atom_parser().

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/tag.c |  2 +-
 ref-filter.c  | 44 ++++++++++++++++++++++----------------------
 ref-filter.h  |  4 ++--
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/builtin/tag.c b/builtin/tag.c
index 82fcfc098242..452558ec9575 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -146,7 +146,7 @@ static int verify_tag(const char *name, const char *ref,
 		      const struct object_id *oid, void *cb_data)
 {
 	int flags;
-	const struct ref_format *format = cb_data;
+	struct ref_format *format = cb_data;
 	flags = GPG_VERIFY_VERBOSE;
 
 	if (format->format)
diff --git a/ref-filter.c b/ref-filter.c
index 797b20ffa612..d01a0266fb89 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -226,7 +226,7 @@ static int strbuf_addf_ret(struct strbuf *sb, int ret, const char *fmt, ...)
 	return ret;
 }
 
-static int color_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int color_atom_parser(struct ref_format *format, struct used_atom *atom,
 			     const char *color_value, struct strbuf *err)
 {
 	if (!color_value)
@@ -264,7 +264,7 @@ static int refname_atom_parser_internal(struct refname_atom *atom, const char *a
 	return 0;
 }
 
-static int remote_ref_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int remote_ref_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	struct string_list params = STRING_LIST_INIT_DUP;
@@ -311,7 +311,7 @@ static int remote_ref_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int objecttype_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int objecttype_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -323,7 +323,7 @@ static int objecttype_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int objectsize_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int objectsize_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	if (!arg) {
@@ -343,7 +343,7 @@ static int objectsize_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int deltabase_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int deltabase_atom_parser(struct ref_format *format, struct used_atom *atom,
 				 const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -355,7 +355,7 @@ static int deltabase_atom_parser(const struct ref_format *format, struct used_at
 	return 0;
 }
 
-static int body_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int body_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -364,7 +364,7 @@ static int body_atom_parser(const struct ref_format *format, struct used_atom *a
 	return 0;
 }
 
-static int subject_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int subject_atom_parser(struct ref_format *format, struct used_atom *atom,
 			       const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -376,7 +376,7 @@ static int subject_atom_parser(const struct ref_format *format, struct used_atom
 	return 0;
 }
 
-static int trailers_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int trailers_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	atom->u.contents.trailer_opts.no_divider = 1;
@@ -402,7 +402,7 @@ static int trailers_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
-static int contents_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int contents_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -430,7 +430,7 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
-static int raw_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int raw_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -442,7 +442,7 @@ static int raw_atom_parser(const struct ref_format *format, struct used_atom *at
 	return 0;
 }
 
-static int oid_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int oid_atom_parser(struct ref_format *format, struct used_atom *atom,
 			   const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -461,7 +461,7 @@ static int oid_atom_parser(const struct ref_format *format, struct used_atom *at
 	return 0;
 }
 
-static int person_email_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int person_email_atom_parser(struct ref_format *format, struct used_atom *atom,
 				    const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -475,7 +475,7 @@ static int person_email_atom_parser(const struct ref_format *format, struct used
 	return 0;
 }
 
-static int refname_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int refname_atom_parser(struct ref_format *format, struct used_atom *atom,
 			       const char *arg, struct strbuf *err)
 {
 	return refname_atom_parser_internal(&atom->u.refname, arg, atom->name, err);
@@ -492,7 +492,7 @@ static align_type parse_align_position(const char *s)
 	return -1;
 }
 
-static int align_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int align_atom_parser(struct ref_format *format, struct used_atom *atom,
 			     const char *arg, struct strbuf *err)
 {
 	struct align *align = &atom->u.align;
@@ -544,7 +544,7 @@ static int align_atom_parser(const struct ref_format *format, struct used_atom *
 	return 0;
 }
 
-static int if_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int if_atom_parser(struct ref_format *format, struct used_atom *atom,
 			  const char *arg, struct strbuf *err)
 {
 	if (!arg) {
@@ -559,7 +559,7 @@ static int if_atom_parser(const struct ref_format *format, struct used_atom *ato
 	return 0;
 }
 
-static int head_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int head_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *unused_err)
 {
 	atom->u.head = resolve_refdup("HEAD", RESOLVE_REF_READING, NULL, NULL);
@@ -570,7 +570,7 @@ static struct {
 	const char *name;
 	info_source source;
 	cmp_type cmp_type;
-	int (*parser)(const struct ref_format *format, struct used_atom *atom,
+	int (*parser)(struct ref_format *format, struct used_atom *atom,
 		      const char *arg, struct strbuf *err);
 } valid_atom[] = {
 	[ATOM_REFNAME] = { "refname", SOURCE_NONE, FIELD_STR, refname_atom_parser },
@@ -649,7 +649,7 @@ struct atom_value {
 /*
  * Used to parse format string and sort specifiers
  */
-static int parse_ref_filter_atom(const struct ref_format *format,
+static int parse_ref_filter_atom(struct ref_format *format,
 				 const char *atom, const char *ep,
 				 struct strbuf *err)
 {
@@ -2553,9 +2553,9 @@ static void append_literal(const char *cp, const char *ep, struct ref_formatting
 }
 
 int format_ref_array_item(struct ref_array_item *info,
-			   const struct ref_format *format,
-			   struct strbuf *final_buf,
-			   struct strbuf *error_buf)
+			  struct ref_format *format,
+			  struct strbuf *final_buf,
+			  struct strbuf *error_buf)
 {
 	const char *cp, *sp, *ep;
 	struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
@@ -2600,7 +2600,7 @@ int format_ref_array_item(struct ref_array_item *info,
 }
 
 void pretty_print_ref(const char *name, const struct object_id *oid,
-		      const struct ref_format *format)
+		      struct ref_format *format)
 {
 	struct ref_array_item *ref_item;
 	struct strbuf output = STRBUF_INIT;
diff --git a/ref-filter.h b/ref-filter.h
index baf72a718965..74fb423fc89f 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -116,7 +116,7 @@ void ref_array_sort(struct ref_sorting *sort, struct ref_array *array);
 void ref_sorting_set_sort_flags_all(struct ref_sorting *sorting, unsigned int mask, int on);
 /*  Based on the given format and quote_style, fill the strbuf */
 int format_ref_array_item(struct ref_array_item *info,
-			  const struct ref_format *format,
+			  struct ref_format *format,
 			  struct strbuf *final_buf,
 			  struct strbuf *error_buf);
 /*  Parse a single sort specifier and add it to the list */
@@ -137,7 +137,7 @@ void setup_ref_filter_porcelain_msg(void);
  * name must be a fully qualified refname.
  */
 void pretty_print_ref(const char *name, const struct object_id *oid,
-		      const struct ref_format *format);
+		      struct ref_format *format);
 
 /*
  * Push a single ref onto the array; this can be used to construct your own
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v3 05/10] [GSOC] ref-filter: add %(rest) atom
  2021-06-19  7:02   ` [PATCH v3 00/10] " ZheNing Hu via GitGitGadget
                       ` (3 preceding siblings ...)
  2021-06-19  7:02     ` [PATCH v3 04/10] [GSOC] ref-filter: use non-const ref_format in *_atom_parser() ZheNing Hu via GitGitGadget
@ 2021-06-19  7:02     ` ZheNing Hu via GitGitGadget
  2021-06-19  7:02     ` [PATCH v3 06/10] [GSOC] ref-filter: pass get_object() return value to their callers ZheNing Hu via GitGitGadget
                       ` (5 subsequent siblings)
  10 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-19  7:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

In order to let "cat-file --batch=%(rest)" use the ref-filter
interface, add %(rest) atom for ref-filter. "git for-each-ref",
"git branch", "git tag" and "git verify-tag" will reject %(rest)
by default.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c             | 21 +++++++++++++++++++++
 ref-filter.h             |  5 ++++-
 t/t3203-branch-output.sh |  4 ++++
 t/t6300-for-each-ref.sh  |  4 ++++
 t/t7004-tag.sh           |  4 ++++
 t/t7030-verify-tag.sh    |  4 ++++
 6 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/ref-filter.c b/ref-filter.c
index d01a0266fb89..10c78de9cfa4 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -157,6 +157,7 @@ enum atom_type {
 	ATOM_IF,
 	ATOM_THEN,
 	ATOM_ELSE,
+	ATOM_REST,
 };
 
 /*
@@ -559,6 +560,15 @@ static int if_atom_parser(struct ref_format *format, struct used_atom *atom,
 	return 0;
 }
 
+static int rest_atom_parser(struct ref_format *format, struct used_atom *atom,
+			    const char *arg, struct strbuf *err)
+{
+	if (arg)
+		return strbuf_addf_ret(err, -1, _("%%(rest) does not take arguments"));
+	format->use_rest = 1;
+	return 0;
+}
+
 static int head_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *unused_err)
 {
@@ -615,6 +625,7 @@ static struct {
 	[ATOM_IF] = { "if", SOURCE_NONE, FIELD_STR, if_atom_parser },
 	[ATOM_THEN] = { "then", SOURCE_NONE },
 	[ATOM_ELSE] = { "else", SOURCE_NONE },
+	[ATOM_REST] = { "rest", SOURCE_NONE, FIELD_STR, rest_atom_parser },
 	/*
 	 * Please update $__git_ref_fieldlist in git-completion.bash
 	 * when you add new atoms
@@ -1010,6 +1021,9 @@ int verify_ref_format(struct ref_format *format)
 		if (at < 0)
 			die("%s", err.buf);
 
+		if (used_atom[at].atom_type == ATOM_REST)
+			die("this command reject atom %%(%.*s)", (int)(ep - sp - 2), sp + 2);
+
 		if ((format->quote_style == QUOTE_PYTHON ||
 		     format->quote_style == QUOTE_SHELL ||
 		     format->quote_style == QUOTE_TCL) &&
@@ -1927,6 +1941,12 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 			v->handler = else_atom_handler;
 			v->s = xstrdup("");
 			continue;
+		} else if (atom_type == ATOM_REST) {
+			if (ref->rest)
+				v->s = xstrdup(ref->rest);
+			else
+				v->s = xstrdup("");
+			continue;
 		} else
 			continue;
 
@@ -2144,6 +2164,7 @@ static struct ref_array_item *new_ref_array_item(const char *refname,
 
 	FLEX_ALLOC_STR(ref, refname, refname);
 	oidcpy(&ref->objectname, oid);
+	ref->rest = NULL;
 
 	return ref;
 }
diff --git a/ref-filter.h b/ref-filter.h
index 74fb423fc89f..9dc07476a584 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -38,6 +38,7 @@ struct ref_sorting {
 
 struct ref_array_item {
 	struct object_id objectname;
+	const char *rest;
 	int flag;
 	unsigned int kind;
 	const char *symref;
@@ -76,14 +77,16 @@ struct ref_format {
 	 * verify_ref_format() afterwards to finalize.
 	 */
 	const char *format;
+	const char *rest;
 	int quote_style;
+	int use_rest;
 	int use_color;
 
 	/* Internal state to ref-filter */
 	int need_color_reset_at_eol;
 };
 
-#define REF_FORMAT_INIT { NULL, 0, -1 }
+#define REF_FORMAT_INIT { NULL, NULL, 0, 0, -1 }
 
 /*  Macros for checking --merged and --no-merged options */
 #define _OPT_MERGED_NO_MERGED(option, filter, h) \
diff --git a/t/t3203-branch-output.sh b/t/t3203-branch-output.sh
index 5325b9f67a00..6e94c6db7b5a 100755
--- a/t/t3203-branch-output.sh
+++ b/t/t3203-branch-output.sh
@@ -340,6 +340,10 @@ test_expect_success 'git branch --format option' '
 	test_cmp expect actual
 '
 
+test_expect_success 'git branch with --format=%(rest) must fail' '
+	test_must_fail git branch --format="%(rest)" >actual
+'
+
 test_expect_success 'worktree colors correct' '
 	cat >expect <<-EOF &&
 	* <GREEN>(HEAD detached from fromtag)<RESET>
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 5556063c347d..82c0ad2cb115 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -1211,6 +1211,10 @@ test_expect_success 'basic atom: head contents:trailers' '
 	test_cmp expect actual.clean
 '
 
+test_expect_success 'basic atom: rest must fail' '
+	test_must_fail git for-each-ref --format="%(rest)" refs/heads/main
+'
+
 test_expect_success 'trailer parsing not fooled by --- line' '
 	git commit --allow-empty -F - <<-\EOF &&
 	this is the subject
diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
index 2f72c5c6883e..082be85dffc7 100755
--- a/t/t7004-tag.sh
+++ b/t/t7004-tag.sh
@@ -1998,6 +1998,10 @@ test_expect_success '--format should list tags as per format given' '
 	test_cmp expect actual
 '
 
+test_expect_success 'git tag -l with --format="%(rest)" must fail' '
+	test_must_fail git tag -l --format="%(rest)" "v1*"
+'
+
 test_expect_success "set up color tests" '
 	echo "<RED>v1.0<RESET>" >expect.color &&
 	echo "v1.0" >expect.bare &&
diff --git a/t/t7030-verify-tag.sh b/t/t7030-verify-tag.sh
index 3cefde9602bf..10faa645157e 100755
--- a/t/t7030-verify-tag.sh
+++ b/t/t7030-verify-tag.sh
@@ -194,6 +194,10 @@ test_expect_success GPG 'verifying tag with --format' '
 	test_cmp expect actual
 '
 
+test_expect_success GPG 'verifying tag with --format="%(rest)" must fail' '
+	test_must_fail git verify-tag --format="%(rest)" "fourth-signed"
+'
+
 test_expect_success GPG 'verifying a forged tag with --format should fail silently' '
 	test_must_fail git verify-tag --format="tagname : %(tag)" $(cat forged1.tag) >actual-forged &&
 	test_must_be_empty actual-forged
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v3 06/10] [GSOC] ref-filter: pass get_object() return value to their callers
  2021-06-19  7:02   ` [PATCH v3 00/10] " ZheNing Hu via GitGitGadget
                       ` (4 preceding siblings ...)
  2021-06-19  7:02     ` [PATCH v3 05/10] [GSOC] ref-filter: add %(rest) atom ZheNing Hu via GitGitGadget
@ 2021-06-19  7:02     ` ZheNing Hu via GitGitGadget
  2021-06-19  7:02     ` [PATCH v3 07/10] [GSOC] ref-filter: introduce free_ref_array_item_value() function ZheNing Hu via GitGitGadget
                       ` (4 subsequent siblings)
  10 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-19  7:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Since in the refactor of `git cat-file --batch` later,
oid_object_info_extended() in get_object() will be used to obtain
the info of an object with it's oid. When the object cannot be
obtained in the git repository, `cat-file --batch` expects to output
"<oid> missing" and continue the next oid query instead of letting
Git exit. In other error conditions, Git should exit normally. So we
can achieve this function by passing the return value of get_object().

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 10c78de9cfa4..58def6ccd33a 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1816,6 +1816,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 {
 	struct object *obj;
 	int i;
+	int ret = 0;
 	struct object_info empty = OBJECT_INFO_INIT;
 
 	CALLOC_ARRAY(ref->value, used_atom_cnt);
@@ -1972,8 +1973,9 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 
 
 	oi.oid = ref->objectname;
-	if (get_object(ref, 0, &obj, &oi, err))
-		return -1;
+	ret = get_object(ref, 0, &obj, &oi, err);
+	if (ret)
+		return ret;
 
 	/*
 	 * If there is no atom that wants to know about tagged
@@ -2005,8 +2007,10 @@ static int get_ref_atom_value(struct ref_array_item *ref, int atom,
 			      struct atom_value **v, struct strbuf *err)
 {
 	if (!ref->value) {
-		if (populate_value(ref, err))
-			return -1;
+		int ret = populate_value(ref, err);
+
+		if (ret)
+			return ret;
 		fill_missing_values(ref->value);
 	}
 	*v = &ref->value[atom];
@@ -2580,6 +2584,7 @@ int format_ref_array_item(struct ref_array_item *info,
 {
 	const char *cp, *sp, *ep;
 	struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
+	int ret = 0;
 
 	state.quote_style = format->quote_style;
 	push_stack_element(&state.stack);
@@ -2592,10 +2597,10 @@ int format_ref_array_item(struct ref_array_item *info,
 		if (cp < sp)
 			append_literal(cp, sp, &state);
 		pos = parse_ref_filter_atom(format, sp + 2, ep, error_buf);
-		if (pos < 0 || get_ref_atom_value(info, pos, &atomv, error_buf) ||
+		if (pos < 0 || (ret = get_ref_atom_value(info, pos, &atomv, error_buf)) ||
 		    atomv->handler(atomv, &state, error_buf)) {
 			pop_stack_element(&state.stack);
-			return -1;
+			return ret ? ret : -1;
 		}
 	}
 	if (*cp) {
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v3 07/10] [GSOC] ref-filter: introduce free_ref_array_item_value() function
  2021-06-19  7:02   ` [PATCH v3 00/10] " ZheNing Hu via GitGitGadget
                       ` (5 preceding siblings ...)
  2021-06-19  7:02     ` [PATCH v3 06/10] [GSOC] ref-filter: pass get_object() return value to their callers ZheNing Hu via GitGitGadget
@ 2021-06-19  7:02     ` ZheNing Hu via GitGitGadget
  2021-06-19  7:02     ` [PATCH v3 08/10] [GSOC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                       ` (3 subsequent siblings)
  10 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-19  7:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

When we use ref_array_item which is not dynamically allocated and
want to free the space of its member "value" after the end of use,
free_array_item() does not meet our needs, because it tries to free
ref_array_item itself and its member "symref".

Introduce free_ref_array_item_value() for freeing ref_array_item value.
It will be called internally by free_array_item(), and it will help
`cat-file --batch` free ref_array_item's value memory later.

Helped-by: Junio C Hamano <gitster@pobox.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 11 ++++++++---
 ref-filter.h |  2 ++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 58def6ccd33a..22315d4809dc 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -2291,16 +2291,21 @@ static int ref_filter_handler(const char *refname, const struct object_id *oid,
 	return 0;
 }
 
-/*  Free memory allocated for a ref_array_item */
-static void free_array_item(struct ref_array_item *item)
+void free_ref_array_item_value(struct ref_array_item *item)
 {
-	free((char *)item->symref);
 	if (item->value) {
 		int i;
 		for (i = 0; i < used_atom_cnt; i++)
 			free((char *)item->value[i].s);
 		free(item->value);
 	}
+}
+
+/*  Free memory allocated for a ref_array_item */
+static void free_array_item(struct ref_array_item *item)
+{
+	free((char *)item->symref);
+	free_ref_array_item_value(item);
 	free(item);
 }
 
diff --git a/ref-filter.h b/ref-filter.h
index 9dc07476a584..76f9af7b4676 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -111,6 +111,8 @@ struct ref_format {
 int filter_refs(struct ref_array *array, struct ref_filter *filter, unsigned int type);
 /*  Clear all memory allocated to ref_array */
 void ref_array_clear(struct ref_array *array);
+/* Free ref_array_item's value */
+void free_ref_array_item_value(struct ref_array_item *item);
 /*  Used to verify if the given format is correct and to parse out the used atoms */
 int verify_ref_format(struct ref_format *format);
 /*  Sort the given ref_array as per the ref_sorting provided */
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v3 08/10] [GSOC] cat-file: reuse ref-filter logic
  2021-06-19  7:02   ` [PATCH v3 00/10] " ZheNing Hu via GitGitGadget
                       ` (6 preceding siblings ...)
  2021-06-19  7:02     ` [PATCH v3 07/10] [GSOC] ref-filter: introduce free_ref_array_item_value() function ZheNing Hu via GitGitGadget
@ 2021-06-19  7:02     ` ZheNing Hu via GitGitGadget
  2021-06-21  5:55       ` Christian Couder
  2021-06-19  7:02     ` [PATCH v3 09/10] [GSOC] cat-file: reuse err buf in batch_object_write() ZheNing Hu via GitGitGadget
                       ` (2 subsequent siblings)
  10 siblings, 1 reply; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-19  7:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

In order to let cat-file use ref-filter logic, the following
methods are used:

1. Add `cat_file_mode` member in struct `ref_format`, this can
help us reject atoms in verify_ref_format() which cat-file
cannot use, e.g. `%(refname)`, `%(push)`, `%(upstream)`...
2. Change the type of member `format` in struct `batch_options`
to `ref_format`, We can add format data in it.
3. Let `batch_objects()` add atoms to format, and use
`verify_ref_format()` to check atoms.
4. Use `has_object_file()` in `batch_one_object()` to check
whether the input object exists.
5. Let get_object() return 1 and print "<oid> missing" instead
of returning -1 and printing "missing object <oid> for <refname>",
this can help `format_ref_array_item()` just report that the
object is missing without letting Git exit.
6. Use `format_ref_array_item()` in `batch_object_write()` to
get the formatted data corresponding to the object. If the
return value of `format_ref_array_item()` is equals to zero,
use `batch_write()` to print object data; else if the return
value less than zero, use `die()` to print the error message
and exit; else return value greater than zero, only print the
error message, but not exit.
7. Use free_ref_array_item_value() to free ref_array_item's
value.

Most of the atoms in `for-each-ref --format` are now supported,
such as `%(tree)`, `%(parent)`, `%(author)`, `%(tagger)`, `%(if)`,
`%(then)`, `%(else)`, `%(end)`. But these atoms will be rejected:
`%(refname)`, `%(symref)`, `%(upstream)`, `%(push)`, `%(worktreepath)`,
`%(flag)`, `%(HEAD)`, because our objects don't have refname.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-cat-file.txt |   6 +
 builtin/cat-file.c             | 248 +++++++-------------------------
 ref-filter.c                   |  15 +-
 ref-filter.h                   |   3 +-
 t/t1006-cat-file.sh            | 252 +++++++++++++++++++++++++++++++++
 t/t6301-for-each-ref-errors.sh |   2 +-
 6 files changed, 323 insertions(+), 203 deletions(-)

diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index 4eb0421b3fd9..ef8ab952b2fa 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -226,6 +226,12 @@ newline. The available atoms are:
 	after that first run of whitespace (i.e., the "rest" of the
 	line) are output in place of the `%(rest)` atom.
 
+Note that most of the atoms in `for-each-ref --format` are now supported,
+such as `%(tree)`, `%(parent)`, `%(author)`, `%(tagger)`, `%(if)`,
+`%(then)`, `%(else)`, `%(end)`. But these atoms will be rejected:
+`%(refname)`, `%(symref)`, `%(upstream)`, `%(push)`, `%(worktreepath)`,
+`%(flag)`, `%(HEAD)`. See linkgit:git-for-each-ref[1].
+
 If no format is specified, the default format is `%(objectname)
 %(objecttype) %(objectsize)`.
 
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 5ebf13359e83..cbedb8e1c471 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -16,6 +16,7 @@
 #include "packfile.h"
 #include "object-store.h"
 #include "promisor-remote.h"
+#include "ref-filter.h"
 
 struct batch_options {
 	int enabled;
@@ -25,7 +26,7 @@ struct batch_options {
 	int all_objects;
 	int unordered;
 	int cmdmode; /* may be 'w' or 'c' for --filters or --textconv */
-	const char *format;
+	struct ref_format format;
 };
 
 static const char *force_path;
@@ -195,99 +196,10 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
 
 struct expand_data {
 	struct object_id oid;
-	enum object_type type;
-	unsigned long size;
-	off_t disk_size;
 	const char *rest;
-	struct object_id delta_base_oid;
-
-	/*
-	 * If mark_query is true, we do not expand anything, but rather
-	 * just mark the object_info with items we wish to query.
-	 */
-	int mark_query;
-
-	/*
-	 * Whether to split the input on whitespace before feeding it to
-	 * get_sha1; this is decided during the mark_query phase based on
-	 * whether we have a %(rest) token in our format.
-	 */
 	int split_on_whitespace;
-
-	/*
-	 * After a mark_query run, this object_info is set up to be
-	 * passed to oid_object_info_extended. It will point to the data
-	 * elements above, so you can retrieve the response from there.
-	 */
-	struct object_info info;
-
-	/*
-	 * This flag will be true if the requested batch format and options
-	 * don't require us to call oid_object_info, which can then be
-	 * optimized out.
-	 */
-	unsigned skip_object_info : 1;
 };
 
-static int is_atom(const char *atom, const char *s, int slen)
-{
-	int alen = strlen(atom);
-	return alen == slen && !memcmp(atom, s, alen);
-}
-
-static void expand_atom(struct strbuf *sb, const char *atom, int len,
-			void *vdata)
-{
-	struct expand_data *data = vdata;
-
-	if (is_atom("objectname", atom, len)) {
-		if (!data->mark_query)
-			strbuf_addstr(sb, oid_to_hex(&data->oid));
-	} else if (is_atom("objecttype", atom, len)) {
-		if (data->mark_query)
-			data->info.typep = &data->type;
-		else
-			strbuf_addstr(sb, type_name(data->type));
-	} else if (is_atom("objectsize", atom, len)) {
-		if (data->mark_query)
-			data->info.sizep = &data->size;
-		else
-			strbuf_addf(sb, "%"PRIuMAX , (uintmax_t)data->size);
-	} else if (is_atom("objectsize:disk", atom, len)) {
-		if (data->mark_query)
-			data->info.disk_sizep = &data->disk_size;
-		else
-			strbuf_addf(sb, "%"PRIuMAX, (uintmax_t)data->disk_size);
-	} else if (is_atom("rest", atom, len)) {
-		if (data->mark_query)
-			data->split_on_whitespace = 1;
-		else if (data->rest)
-			strbuf_addstr(sb, data->rest);
-	} else if (is_atom("deltabase", atom, len)) {
-		if (data->mark_query)
-			data->info.delta_base_oid = &data->delta_base_oid;
-		else
-			strbuf_addstr(sb,
-				      oid_to_hex(&data->delta_base_oid));
-	} else
-		die("unknown format element: %.*s", len, atom);
-}
-
-static size_t expand_format(struct strbuf *sb, const char *start, void *data)
-{
-	const char *end;
-
-	if (*start != '(')
-		return 0;
-	end = strchr(start + 1, ')');
-	if (!end)
-		die("format element '%s' does not end in ')'", start);
-
-	expand_atom(sb, start + 1, end - start - 1, data);
-
-	return end - start + 1;
-}
-
 static void batch_write(struct batch_options *opt, const void *data, int len)
 {
 	if (opt->buffer_output) {
@@ -297,87 +209,31 @@ static void batch_write(struct batch_options *opt, const void *data, int len)
 		write_or_die(1, data, len);
 }
 
-static void print_object_or_die(struct batch_options *opt, struct expand_data *data)
-{
-	const struct object_id *oid = &data->oid;
-
-	assert(data->info.typep);
-
-	if (data->type == OBJ_BLOB) {
-		if (opt->buffer_output)
-			fflush(stdout);
-		if (opt->cmdmode) {
-			char *contents;
-			unsigned long size;
-
-			if (!data->rest)
-				die("missing path for '%s'", oid_to_hex(oid));
-
-			if (opt->cmdmode == 'w') {
-				if (filter_object(data->rest, 0100644, oid,
-						  &contents, &size))
-					die("could not convert '%s' %s",
-					    oid_to_hex(oid), data->rest);
-			} else if (opt->cmdmode == 'c') {
-				enum object_type type;
-				if (!textconv_object(the_repository,
-						     data->rest, 0100644, oid,
-						     1, &contents, &size))
-					contents = read_object_file(oid,
-								    &type,
-								    &size);
-				if (!contents)
-					die("could not convert '%s' %s",
-					    oid_to_hex(oid), data->rest);
-			} else
-				BUG("invalid cmdmode: %c", opt->cmdmode);
-			batch_write(opt, contents, size);
-			free(contents);
-		} else {
-			stream_blob(oid);
-		}
-	}
-	else {
-		enum object_type type;
-		unsigned long size;
-		void *contents;
-
-		contents = read_object_file(oid, &type, &size);
-		if (!contents)
-			die("object %s disappeared", oid_to_hex(oid));
-		if (type != data->type)
-			die("object %s changed type!?", oid_to_hex(oid));
-		if (data->info.sizep && size != data->size)
-			die("object %s changed size!?", oid_to_hex(oid));
-
-		batch_write(opt, contents, size);
-		free(contents);
-	}
-}
 
 static void batch_object_write(const char *obj_name,
 			       struct strbuf *scratch,
 			       struct batch_options *opt,
 			       struct expand_data *data)
 {
-	if (!data->skip_object_info &&
-	    oid_object_info_extended(the_repository, &data->oid, &data->info,
-				     OBJECT_INFO_LOOKUP_REPLACE) < 0) {
-		printf("%s missing\n",
-		       obj_name ? obj_name : oid_to_hex(&data->oid));
-		fflush(stdout);
-		return;
-	}
+	int ret = 0;
+	struct strbuf err = STRBUF_INIT;
+	struct ref_array_item item = { data->oid, data->rest };
 
 	strbuf_reset(scratch);
-	strbuf_expand(scratch, opt->format, expand_format, data);
-	strbuf_addch(scratch, '\n');
-	batch_write(opt, scratch->buf, scratch->len);
 
-	if (opt->print_contents) {
-		print_object_or_die(opt, data);
-		batch_write(opt, "\n", 1);
+	ret = format_ref_array_item(&item, &opt->format, scratch, &err);
+	if (!ret) {
+		strbuf_addch(scratch, '\n');
+		batch_write(opt, scratch->buf, scratch->len);
+	} else if (ret < 0) {
+		die("%s\n", err.buf);
+	} else {
+		/* when ret > 0 , don't call die and print the err to stdout*/
+		printf("%s\n", err.buf);
+		fflush(stdout);
 	}
+	free_ref_array_item_value(&item);
+	strbuf_release(&err);
 }
 
 static void batch_one_object(const char *obj_name,
@@ -428,6 +284,13 @@ static void batch_one_object(const char *obj_name,
 		return;
 	}
 
+	if (!has_object_file(&data->oid)) {
+		printf("%s missing\n",
+		       obj_name ? obj_name : oid_to_hex(&data->oid));
+		fflush(stdout);
+		return;
+	}
+
 	batch_object_write(obj_name, scratch, opt, data);
 }
 
@@ -488,42 +351,34 @@ static int batch_unordered_packed(const struct object_id *oid,
 	return batch_unordered_object(oid, data);
 }
 
-static int batch_objects(struct batch_options *opt)
+static const char * const cat_file_usage[] = {
+	N_("git cat-file (-t [--allow-unknown-type] | -s [--allow-unknown-type] | -e | -p | <type> | --textconv | --filters) [--path=<path>] <object>"),
+	N_("git cat-file (--batch[=<format>] | --batch-check[=<format>]) [--follow-symlinks] [--textconv | --filters]"),
+	NULL
+};
+
+static int batch_objects(struct batch_options *opt, const struct option *options)
 {
 	struct strbuf input = STRBUF_INIT;
 	struct strbuf output = STRBUF_INIT;
+	struct strbuf format = STRBUF_INIT;
 	struct expand_data data;
 	int save_warning;
 	int retval = 0;
 
-	if (!opt->format)
-		opt->format = "%(objectname) %(objecttype) %(objectsize)";
-
-	/*
-	 * Expand once with our special mark_query flag, which will prime the
-	 * object_info to be handed to oid_object_info_extended for each
-	 * object.
-	 */
 	memset(&data, 0, sizeof(data));
-	data.mark_query = 1;
-	strbuf_expand(&output, opt->format, expand_format, &data);
-	data.mark_query = 0;
-	strbuf_release(&output);
-	if (opt->cmdmode)
-		data.split_on_whitespace = 1;
-
-	if (opt->all_objects) {
-		struct object_info empty = OBJECT_INFO_INIT;
-		if (!memcmp(&data.info, &empty, sizeof(empty)))
-			data.skip_object_info = 1;
-	}
-
-	/*
-	 * If we are printing out the object, then always fill in the type,
-	 * since we will want to decide whether or not to stream.
-	 */
+	if (!opt->format.format)
+		strbuf_addstr(&format, "%(objectname) %(objecttype) %(objectsize)");
+	else
+		strbuf_addstr(&format, opt->format.format);
 	if (opt->print_contents)
-		data.info.typep = &data.type;
+		strbuf_addstr(&format, "\n%(raw)");
+	opt->format.format = format.buf;
+	if (verify_ref_format(&opt->format))
+		usage_with_options(cat_file_usage, options);
+
+	if (opt->cmdmode || opt->format.use_rest)
+		data.split_on_whitespace = 1;
 
 	if (opt->all_objects) {
 		struct object_cb_data cb;
@@ -556,6 +411,7 @@ static int batch_objects(struct batch_options *opt)
 			oid_array_clear(&sa);
 		}
 
+		strbuf_release(&format);
 		strbuf_release(&output);
 		return 0;
 	}
@@ -588,18 +444,13 @@ static int batch_objects(struct batch_options *opt)
 		batch_one_object(input.buf, &output, opt, &data);
 	}
 
+	strbuf_release(&format);
 	strbuf_release(&input);
 	strbuf_release(&output);
 	warn_on_object_refname_ambiguity = save_warning;
 	return retval;
 }
 
-static const char * const cat_file_usage[] = {
-	N_("git cat-file (-t [--allow-unknown-type] | -s [--allow-unknown-type] | -e | -p | <type> | --textconv | --filters) [--path=<path>] <object>"),
-	N_("git cat-file (--batch[=<format>] | --batch-check[=<format>]) [--follow-symlinks] [--textconv | --filters]"),
-	NULL
-};
-
 static int git_cat_file_config(const char *var, const char *value, void *cb)
 {
 	if (userdiff_config(var, value) < 0)
@@ -622,7 +473,7 @@ static int batch_option_callback(const struct option *opt,
 
 	bo->enabled = 1;
 	bo->print_contents = !strcmp(opt->long_name, "batch");
-	bo->format = arg;
+	bo->format.format = arg;
 
 	return 0;
 }
@@ -631,7 +482,9 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 {
 	int opt = 0;
 	const char *exp_type = NULL, *obj_name = NULL;
-	struct batch_options batch = {0};
+	struct batch_options batch = {
+		.format = REF_FORMAT_INIT
+	};
 	int unknown_type = 0;
 
 	const struct option options[] = {
@@ -670,6 +523,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 	git_config(git_cat_file_config, NULL);
 
 	batch.buffer_output = -1;
+	batch.format.cat_file_mode = 1;
 	argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
 
 	if (opt) {
@@ -713,7 +567,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		batch.buffer_output = batch.all_objects;
 
 	if (batch.enabled)
-		return batch_objects(&batch);
+		return batch_objects(&batch, options);
 
 	if (unknown_type && opt != 't' && opt != 's')
 		die("git cat-file --allow-unknown-type: use with -s or -t");
diff --git a/ref-filter.c b/ref-filter.c
index 22315d4809dc..181d99c92735 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1021,8 +1021,15 @@ int verify_ref_format(struct ref_format *format)
 		if (at < 0)
 			die("%s", err.buf);
 
-		if (used_atom[at].atom_type == ATOM_REST)
-			die("this command reject atom %%(%.*s)", (int)(ep - sp - 2), sp + 2);
+		if ((!format->cat_file_mode && used_atom[at].atom_type == ATOM_REST) ||
+		    (format->cat_file_mode && (used_atom[at].atom_type == ATOM_FLAG ||
+					       used_atom[at].atom_type == ATOM_HEAD ||
+					       used_atom[at].atom_type == ATOM_PUSH ||
+					       used_atom[at].atom_type == ATOM_REFNAME ||
+					       used_atom[at].atom_type == ATOM_SYMREF ||
+					       used_atom[at].atom_type == ATOM_UPSTREAM ||
+					       used_atom[at].atom_type == ATOM_WORKTREEPATH)))
+			die(_("this command reject atom %%(%.*s)"), (int)(ep - sp - 2), sp + 2);
 
 		if ((format->quote_style == QUOTE_PYTHON ||
 		     format->quote_style == QUOTE_SHELL ||
@@ -1742,8 +1749,8 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 	}
 	if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
 				     OBJECT_INFO_LOOKUP_REPLACE))
-		return strbuf_addf_ret(err, -1, _("missing object %s for %s"),
-				       oid_to_hex(&oi->oid), ref->refname);
+		return strbuf_addf_ret(err, 1, _("%s missing"),
+				       oid_to_hex(&oi->oid));
 	if (oi->info.disk_sizep && oi->disk_size < 0)
 		BUG("Object size is less than zero.");
 
diff --git a/ref-filter.h b/ref-filter.h
index 76f9af7b4676..e5fe26e3fdf9 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -78,6 +78,7 @@ struct ref_format {
 	 */
 	const char *format;
 	const char *rest;
+	int cat_file_mode;
 	int quote_style;
 	int use_rest;
 	int use_color;
@@ -86,7 +87,7 @@ struct ref_format {
 	int need_color_reset_at_eol;
 };
 
-#define REF_FORMAT_INIT { NULL, NULL, 0, 0, -1 }
+#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }
 
 /*  Macros for checking --merged and --no-merged options */
 #define _OPT_MERGED_NO_MERGED(option, filter, h) \
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 5d2dc99b74ad..c028e6548fed 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -586,4 +586,256 @@ test_expect_success 'cat-file --unordered works' '
 	test_cmp expect actual
 '
 
+. "$TEST_DIRECTORY"/lib-gpg.sh
+. "$TEST_DIRECTORY"/lib-terminal.sh
+
+test_expect_success 'cat-file --batch|--batch-check setup' '
+	echo 1>blob1 &&
+	printf "a\0b\0\c" >blob2 &&
+	git add blob1 blob2 &&
+	git commit -m "Commit Message" &&
+	git branch -M main &&
+	git tag -a -m "v0.0.0" testtag &&
+	git update-ref refs/myblobs/blob1 HEAD:blob1 &&
+	git update-ref refs/myblobs/blob2 HEAD:blob2 &&
+	git update-ref refs/mytrees/tree1 HEAD^{tree}
+'
+
+batch_test_atom() {
+	if test "$3" = "fail"
+	then
+		test_expect_${4:-success} $PREREQ "basic atom: $1 $2 must fail" "
+			test_must_fail git cat-file --batch-check='$2' >bad <<-EOF
+			$1
+			EOF
+		"
+	else
+		test_expect_${4:-success} $PREREQ "basic atom: $1 $2" "
+			git for-each-ref --format='$2' $1 >expected &&
+			git cat-file --batch-check='$2' >actual <<-EOF &&
+			$1
+			EOF
+			sanitize_pgp <actual >actual.clean &&
+			cmp expected actual.clean
+		"
+	fi
+}
+
+batch_test_atom refs/heads/main '%(refname)' fail
+batch_test_atom refs/heads/main '%(refname:)' fail
+batch_test_atom refs/heads/main '%(refname:short)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=1)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=2)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=-1)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=-2)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=1)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=2)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=-1)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=-2)' fail
+batch_test_atom refs/heads/main '%(refname:strip=1)' fail
+batch_test_atom refs/heads/main '%(refname:strip=2)' fail
+batch_test_atom refs/heads/main '%(refname:strip=-1)' fail
+batch_test_atom refs/heads/main '%(refname:strip=-2)' fail
+batch_test_atom refs/heads/main '%(upstream)' fail
+batch_test_atom refs/heads/main '%(upstream:short)' fail
+batch_test_atom refs/heads/main '%(upstream:lstrip=2)' fail
+batch_test_atom refs/heads/main '%(upstream:lstrip=-2)' fail
+batch_test_atom refs/heads/main '%(upstream:rstrip=2)' fail
+batch_test_atom refs/heads/main '%(upstream:rstrip=-2)' fail
+batch_test_atom refs/heads/main '%(upstream:strip=2)' fail
+batch_test_atom refs/heads/main '%(upstream:strip=-2)' fail
+batch_test_atom refs/heads/main '%(push)' fail
+batch_test_atom refs/heads/main '%(push:short)' fail
+batch_test_atom refs/heads/main '%(push:lstrip=1)' fail
+batch_test_atom refs/heads/main '%(push:lstrip=-1)' fail
+batch_test_atom refs/heads/main '%(push:rstrip=1)' fail
+batch_test_atom refs/heads/main '%(push:rstrip=-1)' fail
+batch_test_atom refs/heads/main '%(push:strip=1)' fail
+batch_test_atom refs/heads/main '%(push:strip=-1)' fail
+batch_test_atom refs/heads/main '%(objecttype)'
+batch_test_atom refs/heads/main '%(objectsize)'
+batch_test_atom refs/heads/main '%(objectsize:disk)'
+batch_test_atom refs/heads/main '%(deltabase)'
+batch_test_atom refs/heads/main '%(objectname)'
+batch_test_atom refs/heads/main '%(objectname:short)'
+batch_test_atom refs/heads/main '%(objectname:short=1)'
+batch_test_atom refs/heads/main '%(objectname:short=10)'
+batch_test_atom refs/heads/main '%(tree)'
+batch_test_atom refs/heads/main '%(tree:short)'
+batch_test_atom refs/heads/main '%(tree:short=1)'
+batch_test_atom refs/heads/main '%(tree:short=10)'
+batch_test_atom refs/heads/main '%(parent)'
+batch_test_atom refs/heads/main '%(parent:short)'
+batch_test_atom refs/heads/main '%(parent:short=1)'
+batch_test_atom refs/heads/main '%(parent:short=10)'
+batch_test_atom refs/heads/main '%(numparent)'
+batch_test_atom refs/heads/main '%(object)'
+batch_test_atom refs/heads/main '%(type)'
+batch_test_atom refs/heads/main '%(raw)'
+batch_test_atom refs/heads/main '%(*objectname)'
+batch_test_atom refs/heads/main '%(*objecttype)'
+batch_test_atom refs/heads/main '%(author)'
+batch_test_atom refs/heads/main '%(authorname)'
+batch_test_atom refs/heads/main '%(authoremail)'
+batch_test_atom refs/heads/main '%(authoremail:trim)'
+batch_test_atom refs/heads/main '%(authoremail:localpart)'
+batch_test_atom refs/heads/main '%(authordate)'
+batch_test_atom refs/heads/main '%(committer)'
+batch_test_atom refs/heads/main '%(committername)'
+batch_test_atom refs/heads/main '%(committeremail)'
+batch_test_atom refs/heads/main '%(committeremail:trim)'
+batch_test_atom refs/heads/main '%(committeremail:localpart)'
+batch_test_atom refs/heads/main '%(committerdate)'
+batch_test_atom refs/heads/main '%(tag)'
+batch_test_atom refs/heads/main '%(tagger)'
+batch_test_atom refs/heads/main '%(taggername)'
+batch_test_atom refs/heads/main '%(taggeremail)'
+batch_test_atom refs/heads/main '%(taggeremail:trim)'
+batch_test_atom refs/heads/main '%(taggeremail:localpart)'
+batch_test_atom refs/heads/main '%(taggerdate)'
+batch_test_atom refs/heads/main '%(creator)'
+batch_test_atom refs/heads/main '%(creatordate)'
+batch_test_atom refs/heads/main '%(subject)'
+batch_test_atom refs/heads/main '%(subject:sanitize)'
+batch_test_atom refs/heads/main '%(contents:subject)'
+batch_test_atom refs/heads/main '%(body)'
+batch_test_atom refs/heads/main '%(contents:body)'
+batch_test_atom refs/heads/main '%(contents:signature)'
+batch_test_atom refs/heads/main '%(contents)'
+batch_test_atom refs/heads/main '%(HEAD)' fail
+batch_test_atom refs/heads/main '%(upstream:track)' fail
+batch_test_atom refs/heads/main '%(upstream:trackshort)' fail
+batch_test_atom refs/heads/main '%(upstream:track,nobracket)' fail
+batch_test_atom refs/heads/main '%(upstream:nobracket,track)' fail
+batch_test_atom refs/heads/main '%(push:track)' fail
+batch_test_atom refs/heads/main '%(push:trackshort)' fail
+batch_test_atom refs/heads/main '%(worktreepath)' fail
+batch_test_atom refs/heads/main '%(symref)' fail
+batch_test_atom refs/heads/main '%(flag)' fail
+
+batch_test_atom refs/tags/testtag '%(refname)' fail
+batch_test_atom refs/tags/testtag '%(refname:short)' fail
+batch_test_atom refs/tags/testtag '%(upstream)' fail
+batch_test_atom refs/tags/testtag '%(push)' fail
+batch_test_atom refs/tags/testtag '%(objecttype)'
+batch_test_atom refs/tags/testtag '%(objectsize)'
+batch_test_atom refs/tags/testtag '%(objectsize:disk)'
+batch_test_atom refs/tags/testtag '%(*objectsize:disk)'
+batch_test_atom refs/tags/testtag '%(deltabase)'
+batch_test_atom refs/tags/testtag '%(*deltabase)'
+batch_test_atom refs/tags/testtag '%(objectname)'
+batch_test_atom refs/tags/testtag '%(objectname:short)'
+batch_test_atom refs/tags/testtag '%(tree)'
+batch_test_atom refs/tags/testtag '%(tree:short)'
+batch_test_atom refs/tags/testtag '%(tree:short=1)'
+batch_test_atom refs/tags/testtag '%(tree:short=10)'
+batch_test_atom refs/tags/testtag '%(parent)'
+batch_test_atom refs/tags/testtag '%(parent:short)'
+batch_test_atom refs/tags/testtag '%(parent:short=1)'
+batch_test_atom refs/tags/testtag '%(parent:short=10)'
+batch_test_atom refs/tags/testtag '%(numparent)'
+batch_test_atom refs/tags/testtag '%(object)'
+batch_test_atom refs/tags/testtag '%(type)'
+batch_test_atom refs/tags/testtag '%(*objectname)'
+batch_test_atom refs/tags/testtag '%(*objecttype)'
+batch_test_atom refs/tags/testtag '%(author)'
+batch_test_atom refs/tags/testtag '%(authorname)'
+batch_test_atom refs/tags/testtag '%(authoremail)'
+batch_test_atom refs/tags/testtag '%(authoremail:trim)'
+batch_test_atom refs/tags/testtag '%(authoremail:localpart)'
+batch_test_atom refs/tags/testtag '%(authordate)'
+batch_test_atom refs/tags/testtag '%(committer)'
+batch_test_atom refs/tags/testtag '%(committername)'
+batch_test_atom refs/tags/testtag '%(committeremail)'
+batch_test_atom refs/tags/testtag '%(committeremail:trim)'
+batch_test_atom refs/tags/testtag '%(committeremail:localpart)'
+batch_test_atom refs/tags/testtag '%(committerdate)'
+batch_test_atom refs/tags/testtag '%(tag)'
+batch_test_atom refs/tags/testtag '%(tagger)'
+batch_test_atom refs/tags/testtag '%(taggername)'
+batch_test_atom refs/tags/testtag '%(taggeremail)'
+batch_test_atom refs/tags/testtag '%(taggeremail:trim)'
+batch_test_atom refs/tags/testtag '%(taggeremail:localpart)'
+batch_test_atom refs/tags/testtag '%(taggerdate)'
+batch_test_atom refs/tags/testtag '%(creator)'
+batch_test_atom refs/tags/testtag '%(creatordate)'
+batch_test_atom refs/tags/testtag '%(subject)'
+batch_test_atom refs/tags/testtag '%(subject:sanitize)'
+batch_test_atom refs/tags/testtag '%(contents:subject)'
+batch_test_atom refs/tags/testtag '%(body)'
+batch_test_atom refs/tags/testtag '%(contents:body)'
+batch_test_atom refs/tags/testtag '%(contents:signature)'
+batch_test_atom refs/tags/testtag '%(contents)'
+batch_test_atom refs/tags/testtag '%(HEAD)' fail
+
+batch_test_atom refs/myblobs/blob1 '%(refname)' fail
+batch_test_atom refs/myblobs/blob1 '%(upstream)' fail
+batch_test_atom refs/myblobs/blob1 '%(push)' fail
+batch_test_atom refs/myblobs/blob1 '%(HEAD)' fail
+
+batch_test_atom refs/myblobs/blob1 '%(objectname)'
+batch_test_atom refs/myblobs/blob1 '%(objecttype)'
+batch_test_atom refs/myblobs/blob1 '%(objectsize)'
+batch_test_atom refs/myblobs/blob1 '%(objectsize:disk)'
+batch_test_atom refs/myblobs/blob1 '%(deltabase)'
+
+batch_test_atom refs/myblobs/blob1 '%(contents)'
+batch_test_atom refs/myblobs/blob2 '%(contents)'
+
+batch_test_atom refs/myblobs/blob1 '%(raw)'
+batch_test_atom refs/myblobs/blob2 '%(raw)'
+batch_test_atom refs/mytrees/tree1 '%(raw)'
+
+batch_test_atom refs/myblobs/blob1 '%(raw:size)'
+batch_test_atom refs/myblobs/blob2 '%(raw:size)'
+batch_test_atom refs/mytrees/tree1 '%(raw:size)'
+
+batch_test_atom refs/myblobs/blob1 '%(if:equals=blob)%(objecttype)%(then)commit%(else)not commit%(end)'
+batch_test_atom refs/myblobs/blob2 '%(if:equals=blob)%(objecttype)%(then)commit%(else)not commit%(end)'
+batch_test_atom refs/mytrees/tree1 '%(if:equals=tree)%(objecttype)%(then)tree%(else)not tree%(end)'
+
+batch_test_atom refs/heads/main '%(align:60) objectname is %(objectname)%(end)|%(objectname)'
+batch_test_atom refs/heads/main '%(align:left,60) objectname is %(objectname)%(end)|%(objectname)'
+batch_test_atom refs/heads/main '%(align:middle,60) objectname is %(objectname)%(end)|%(objectname)'
+batch_test_atom refs/heads/main '%(align:60,right) objectname is %(objectname)%(end)|%(objectname)'
+
+batch_test_atom refs/heads/main 'VALID'
+batch_test_atom refs/heads/main '%(INVALID)' fail
+batch_test_atom refs/heads/main '%(authordate:INVALID)' fail
+
+test_expect_success 'cat-file refs/heads/main refs/tags/testtag %(rest)' '
+	cat >expected <<-EOF &&
+	123 commit 123
+	456 tag 456
+	EOF
+	git cat-file --batch-check="%(rest) %(objecttype) %(rest)" >actual <<-EOF &&
+	refs/heads/main 123
+	refs/tags/testtag 456
+	EOF
+	test_cmp expected actual
+'
+
+batch_test_atom refs/heads/main '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+batch_test_atom refs/tags/testtag '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+batch_test_atom refs/myblobs/blob1 '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+batch_test_atom refs/myblobs/blob2 '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+
+
+test_expect_success 'cat-file --batch equals to --batch-check with atoms' '
+	git cat-file --batch-check="%(objectname) %(objecttype) %(objectsize)
+%(raw)" >expected <<-EOF &&
+	refs/heads/main
+	refs/tags/testtag
+	EOF
+	git cat-file --batch >actual <<-EOF &&
+	refs/heads/main
+	refs/tags/testtag
+	EOF
+	cmp expected actual
+'
+
 test_done
diff --git a/t/t6301-for-each-ref-errors.sh b/t/t6301-for-each-ref-errors.sh
index 40edf9dab534..3553f84a00c1 100755
--- a/t/t6301-for-each-ref-errors.sh
+++ b/t/t6301-for-each-ref-errors.sh
@@ -41,7 +41,7 @@ test_expect_success 'Missing objects are reported correctly' '
 	r=refs/heads/missing &&
 	echo $MISSING >.git/$r &&
 	test_when_finished "rm -f .git/$r" &&
-	echo "fatal: missing object $MISSING for $r" >missing-err &&
+	echo "fatal: $MISSING missing" >missing-err &&
 	test_must_fail git for-each-ref 2>err &&
 	test_cmp missing-err err &&
 	(
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v3 09/10] [GSOC] cat-file: reuse err buf in batch_object_write()
  2021-06-19  7:02   ` [PATCH v3 00/10] " ZheNing Hu via GitGitGadget
                       ` (7 preceding siblings ...)
  2021-06-19  7:02     ` [PATCH v3 08/10] [GSOC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
@ 2021-06-19  7:02     ` ZheNing Hu via GitGitGadget
  2021-06-19  7:03     ` [PATCH v3 10/10] [GSOC] cat-file: re-implement --textconv, --filters options ZheNing Hu via GitGitGadget
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  10 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-19  7:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Reuse the `err` buffer in batch_object_write(), as the
buffer `scratch` does. This will reduce the overhead
of multiple allocations of memory of the err buffer.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/cat-file.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index cbedb8e1c471..b5204493bd56 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -212,32 +212,33 @@ static void batch_write(struct batch_options *opt, const void *data, int len)
 
 static void batch_object_write(const char *obj_name,
 			       struct strbuf *scratch,
+			       struct strbuf *err,
 			       struct batch_options *opt,
 			       struct expand_data *data)
 {
 	int ret = 0;
-	struct strbuf err = STRBUF_INIT;
 	struct ref_array_item item = { data->oid, data->rest };
 
 	strbuf_reset(scratch);
+	strbuf_reset(err);
 
-	ret = format_ref_array_item(&item, &opt->format, scratch, &err);
+	ret = format_ref_array_item(&item, &opt->format, scratch, err);
 	if (!ret) {
 		strbuf_addch(scratch, '\n');
 		batch_write(opt, scratch->buf, scratch->len);
 	} else if (ret < 0) {
-		die("%s\n", err.buf);
+		die("%s\n", err->buf);
 	} else {
 		/* when ret > 0 , don't call die and print the err to stdout*/
-		printf("%s\n", err.buf);
+		printf("%s\n", err->buf);
 		fflush(stdout);
 	}
 	free_ref_array_item_value(&item);
-	strbuf_release(&err);
 }
 
 static void batch_one_object(const char *obj_name,
 			     struct strbuf *scratch,
+			     struct strbuf *err,
 			     struct batch_options *opt,
 			     struct expand_data *data)
 {
@@ -291,7 +292,7 @@ static void batch_one_object(const char *obj_name,
 		return;
 	}
 
-	batch_object_write(obj_name, scratch, opt, data);
+	batch_object_write(obj_name, scratch, err, opt, data);
 }
 
 struct object_cb_data {
@@ -299,13 +300,14 @@ struct object_cb_data {
 	struct expand_data *expand;
 	struct oidset *seen;
 	struct strbuf *scratch;
+	struct strbuf *err;
 };
 
 static int batch_object_cb(const struct object_id *oid, void *vdata)
 {
 	struct object_cb_data *data = vdata;
 	oidcpy(&data->expand->oid, oid);
-	batch_object_write(NULL, data->scratch, data->opt, data->expand);
+	batch_object_write(NULL, data->scratch, data->err, data->opt, data->expand);
 	return 0;
 }
 
@@ -361,6 +363,7 @@ static int batch_objects(struct batch_options *opt, const struct option *options
 {
 	struct strbuf input = STRBUF_INIT;
 	struct strbuf output = STRBUF_INIT;
+	struct strbuf err = STRBUF_INIT;
 	struct strbuf format = STRBUF_INIT;
 	struct expand_data data;
 	int save_warning;
@@ -389,6 +392,7 @@ static int batch_objects(struct batch_options *opt, const struct option *options
 		cb.opt = opt;
 		cb.expand = &data;
 		cb.scratch = &output;
+		cb.err = &err;
 
 		if (opt->unordered) {
 			struct oidset seen = OIDSET_INIT;
@@ -413,6 +417,7 @@ static int batch_objects(struct batch_options *opt, const struct option *options
 
 		strbuf_release(&format);
 		strbuf_release(&output);
+		strbuf_release(&err);
 		return 0;
 	}
 
@@ -441,12 +446,13 @@ static int batch_objects(struct batch_options *opt, const struct option *options
 			data.rest = p;
 		}
 
-		batch_one_object(input.buf, &output, opt, &data);
+		batch_one_object(input.buf, &output, &err, opt, &data);
 	}
 
 	strbuf_release(&format);
 	strbuf_release(&input);
 	strbuf_release(&output);
+	strbuf_release(&err);
 	warn_on_object_refname_ambiguity = save_warning;
 	return retval;
 }
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v3 10/10] [GSOC] cat-file: re-implement --textconv, --filters options
  2021-06-19  7:02   ` [PATCH v3 00/10] " ZheNing Hu via GitGitGadget
                       ` (8 preceding siblings ...)
  2021-06-19  7:02     ` [PATCH v3 09/10] [GSOC] cat-file: reuse err buf in batch_object_write() ZheNing Hu via GitGitGadget
@ 2021-06-19  7:03     ` ZheNing Hu via GitGitGadget
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  10 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-19  7:03 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

After cat-file reuses the ref-filter logic, we re-implement the
functions of --textconv and --filters options.

Add members `use_textconv` and `use_filters` in struct `ref_format`,
and use global variables `use_filters` and `use_textconv` in
`ref-filter.c`, so that we can filter the content of the object
in get_object(). Use `actual_oi` to record the real expand_data:
it may point to the original `oi` or the `act_oi` processed by
`textconv_object()` or `convert_to_working_tree()`. `grab_values()`
will grab the contents of `actual_oi` and `grab_common_values()`
to grab the contents of origin `oi`, this ensures that `%(objectsize)`
still uses the size of the unfiltered data.

In `get_object()`, we made an optimization: Firstly, get the size and
type of the object instead of directly getting the object data.
If using --textconv, after successfully obtaining the filtered object
data, an extra oid_object_info_extended() will be skipped, which can
reduce the cost of object data copy; If using --filter, the data of
the object first will be getted first, and then convert_to_working_tree()
will be used to get the filtered object data.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/cat-file.c |  5 ++++
 ref-filter.c       | 59 ++++++++++++++++++++++++++++++++++++++++++++--
 ref-filter.h       |  4 +++-
 3 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index b5204493bd56..a78cc076aa59 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -377,6 +377,11 @@ static int batch_objects(struct batch_options *opt, const struct option *options
 	if (opt->print_contents)
 		strbuf_addstr(&format, "\n%(raw)");
 	opt->format.format = format.buf;
+	if (opt->cmdmode == 'c')
+		opt->format.use_textconv = 1;
+	else if (opt->cmdmode == 'w')
+		opt->format.use_filters = 1;
+
 	if (verify_ref_format(&opt->format))
 		usage_with_options(cat_file_usage, options);
 
diff --git a/ref-filter.c b/ref-filter.c
index 181d99c92735..99b87742b0fd 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1,3 +1,4 @@
+#define USE_THE_INDEX_COMPATIBILITY_MACROS
 #include "builtin.h"
 #include "cache.h"
 #include "parse-options.h"
@@ -84,6 +85,9 @@ static struct expand_data {
 	struct object_info info;
 } oi, oi_deref;
 
+int use_filters;
+int use_textconv;
+
 struct ref_to_worktree_entry {
 	struct hashmap_entry ent;
 	struct worktree *wt; /* key is wt->head_ref */
@@ -1031,6 +1035,9 @@ int verify_ref_format(struct ref_format *format)
 					       used_atom[at].atom_type == ATOM_WORKTREEPATH)))
 			die(_("this command reject atom %%(%.*s)"), (int)(ep - sp - 2), sp + 2);
 
+		use_filters = format->use_filters;
+		use_textconv = format->use_textconv;
+
 		if ((format->quote_style == QUOTE_PYTHON ||
 		     format->quote_style == QUOTE_SHELL ||
 		     format->quote_style == QUOTE_TCL) &&
@@ -1742,10 +1749,38 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 {
 	/* parse_object_buffer() will set eaten to 0 if free() will be needed */
 	int eaten = 1;
+	struct expand_data *actual_oi = oi;
+	struct expand_data act_oi = {0};
+
 	if (oi->info.contentp) {
 		/* We need to know that to use parse_object_buffer properly */
+		void **temp_contentp = oi->info.contentp;
+		oi->info.contentp = NULL;
 		oi->info.sizep = &oi->size;
 		oi->info.typep = &oi->type;
+
+		/* get the type and size */
+		if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
+					OBJECT_INFO_LOOKUP_REPLACE))
+			return strbuf_addf_ret(err, 1, _("%s missing"),
+					       oid_to_hex(&oi->oid));
+
+		oi->info.sizep = NULL;
+		oi->info.typep = NULL;
+		oi->info.contentp = temp_contentp;
+
+		if (use_textconv && !ref->rest)
+			return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
+					       oid_to_hex(&act_oi.oid));
+		if (use_textconv && oi->type == OBJ_BLOB) {
+			act_oi = *oi;
+			if (textconv_object(the_repository,
+					    ref->rest, 0100644, &act_oi.oid,
+					    1, (char **)(&act_oi.content), &act_oi.size)) {
+				actual_oi = &act_oi;
+				goto success;
+			}
+		}
 	}
 	if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
 				     OBJECT_INFO_LOOKUP_REPLACE))
@@ -1755,19 +1790,39 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 		BUG("Object size is less than zero.");
 
 	if (oi->info.contentp) {
-		*obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten);
+		if (use_filters && !ref->rest)
+			return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
+					       oid_to_hex(&oi->oid));
+		if (use_filters && oi->type == OBJ_BLOB) {
+			struct strbuf strbuf = STRBUF_INIT;
+			struct checkout_metadata meta;
+			act_oi = *oi;
+
+			init_checkout_metadata(&meta, NULL, NULL, &act_oi.oid);
+			if (!convert_to_working_tree(&the_index, ref->rest, act_oi.content, act_oi.size, &strbuf, &meta))
+				die("could not convert '%s' %s",
+					oid_to_hex(&oi->oid), ref->rest);
+			act_oi.size = strbuf.len;
+			act_oi.content = strbuf_detach(&strbuf, NULL);
+			actual_oi = &act_oi;
+		}
+
+success:
+		*obj = parse_object_buffer(the_repository, &actual_oi->oid, actual_oi->type, actual_oi->size, actual_oi->content, &eaten);
 		if (!*obj) {
 			if (!eaten)
 				free(oi->content);
 			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
 					       oid_to_hex(&oi->oid), ref->refname);
 		}
-		grab_values(ref->value, deref, *obj, oi);
+		grab_values(ref->value, deref, *obj, actual_oi);
 	}
 
 	grab_common_values(ref->value, deref, oi);
 	if (!eaten)
 		free(oi->content);
+	if (actual_oi != oi)
+		free(actual_oi->content);
 	return 0;
 }
 
diff --git a/ref-filter.h b/ref-filter.h
index e5fe26e3fdf9..497e3e93632f 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -80,6 +80,8 @@ struct ref_format {
 	const char *rest;
 	int cat_file_mode;
 	int quote_style;
+	int use_textconv;
+	int use_filters;
 	int use_rest;
 	int use_color;
 
@@ -87,7 +89,7 @@ struct ref_format {
 	int need_color_reset_at_eol;
 };
 
-#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }
+#define REF_FORMAT_INIT { .use_color = -1 }
 
 /*  Macros for checking --merged and --no-merged options */
 #define _OPT_MERGED_NO_MERGED(option, filter, h) \
-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v3 08/10] [GSOC] cat-file: reuse ref-filter logic
  2021-06-19  7:02     ` [PATCH v3 08/10] [GSOC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
@ 2021-06-21  5:55       ` Christian Couder
  2021-06-21 13:05         ` ZheNing Hu
  0 siblings, 1 reply; 121+ messages in thread
From: Christian Couder @ 2021-06-21  5:55 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget
  Cc: git, Junio C Hamano, Hariom Verma, Bagas Sanjaya, Jeff King,
	Ævar Arnfjörð Bjarmason, ZheNing Hu

On Sat, Jun 19, 2021 at 9:03 AM ZheNing Hu via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: ZheNing Hu <adlternative@gmail.com>
>
> In order to let cat-file use ref-filter logic, the following
> methods are used:

Maybe: s/the following methods are used/let's do the following/

> 1. Add `cat_file_mode` member in struct `ref_format`, this can
> help us reject atoms in verify_ref_format() which cat-file
> cannot use, e.g. `%(refname)`, `%(push)`, `%(upstream)`...
> 2. Change the type of member `format` in struct `batch_options`
> to `ref_format`, We can add format data in it.

Not sure what "We can add format data in it." means.

> 3. Let `batch_objects()` add atoms to format, and use
> `verify_ref_format()` to check atoms.
> 4. Use `has_object_file()` in `batch_one_object()` to check
> whether the input object exists.
> 5. Let get_object() return 1 and print "<oid> missing" instead
> of returning -1 and printing "missing object <oid> for <refname>",
> this can help `format_ref_array_item()` just report that the
> object is missing without letting Git exit.
> 6. Use `format_ref_array_item()` in `batch_object_write()` to
> get the formatted data corresponding to the object. If the
> return value of `format_ref_array_item()` is equals to zero,
> use `batch_write()` to print object data; else if the return
> value less than zero, use `die()` to print the error message
> and exit; else return value greater than zero, only print the

s/else return value greater/else if the return value is greater/

> error message, but not exit.

s/not exit/don't exit/

> 7. Use free_ref_array_item_value() to free ref_array_item's
> value.

That looks like a lot of changes in a single commit. I wonder if this
commit could be split.

> Most of the atoms in `for-each-ref --format` are now supported,
> such as `%(tree)`, `%(parent)`, `%(author)`, `%(tagger)`, `%(if)`,
> `%(then)`, `%(else)`, `%(end)`. But these atoms will be rejected:
> `%(refname)`, `%(symref)`, `%(upstream)`, `%(push)`, `%(worktreepath)`,
> `%(flag)`, `%(HEAD)`, because our objects don't have refname.

s/refname/a refname/

It might be worth talking a bit about possible performance changes.

[...]

> +       ret = format_ref_array_item(&item, &opt->format, scratch, &err);
> +       if (!ret) {
> +               strbuf_addch(scratch, '\n');
> +               batch_write(opt, scratch->buf, scratch->len);
> +       } else if (ret < 0) {
> +               die("%s\n", err.buf);

This if (ret < 0) could be checked first.

> +       } else {
> +               /* when ret > 0 , don't call die and print the err to stdout*/

I think it would be more helpful to tell what ret > 0 means, rather
than what we do below (which can easily be seen).

> +               printf("%s\n", err.buf);
> +               fflush(stdout);
>         }

For example:

       if (ret < 0) {
               die("%s\n", err.buf);
       if (ret) {
               /* ret > 0 means ... */
               printf("%s\n", err.buf);
               fflush(stdout);
       } else {
               strbuf_addch(scratch, '\n');
               batch_write(opt, scratch->buf, scratch->len);
       }

> +       free_ref_array_item_value(&item);
> +       strbuf_release(&err);
>  }

[...]

> +static int batch_objects(struct batch_options *opt, const struct option *options)

It's unfortunate that one argument is called "opt" and the other one
"options". I wonder if the first one could be called "batch" as it
seems to be called this way somewhere else.

> +       if (!opt->format.format)
> +               strbuf_addstr(&format, "%(objectname) %(objecttype) %(objectsize)");
> +       else
> +               strbuf_addstr(&format, opt->format.format);

If there is no reason for the condition to be (!X) over just (X), I
think the latter is a bit better.

>         if (opt->print_contents)
> -               data.info.typep = &data.type;
> +               strbuf_addstr(&format, "\n%(raw)");
> +       opt->format.format = format.buf;

I wonder if this should be:

      opt->format.format = strbuf_detach(&format, NULL);

> +       if (verify_ref_format(&opt->format))
> +               usage_with_options(cat_file_usage, options);

[...]

> @@ -86,7 +87,7 @@ struct ref_format {
>         int need_color_reset_at_eol;
>  };
>
> -#define REF_FORMAT_INIT { NULL, NULL, 0, 0, -1 }
> +#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }

Maybe this can already be changed to a designated initializer, like
Ævar suggested recently.

[...]

> +test_expect_success 'cat-file refs/heads/main refs/tags/testtag %(rest)' '

If this test is about checking that %(rest) works with both a branch
and a tag, it might be better to say it more explicitly.

> +       cat >expected <<-EOF &&
> +       123 commit 123
> +       456 tag 456
> +       EOF
> +       git cat-file --batch-check="%(rest) %(objecttype) %(rest)" >actual <<-EOF &&
> +       refs/heads/main 123
> +       refs/tags/testtag 456
> +       EOF
> +       test_cmp expected actual
> +'

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v3 08/10] [GSOC] cat-file: reuse ref-filter logic
  2021-06-21  5:55       ` Christian Couder
@ 2021-06-21 13:05         ` ZheNing Hu
  0 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu @ 2021-06-21 13:05 UTC (permalink / raw)
  To: Christian Couder
  Cc: ZheNing Hu via GitGitGadget, git, Junio C Hamano, Hariom Verma,
	Bagas Sanjaya, Jeff King, Ævar Arnfjörð Bjarmason

Christian Couder <christian.couder@gmail.com> 于2021年6月21日周一 下午1:55写道:
>
> > 1. Add `cat_file_mode` member in struct `ref_format`, this can
> > help us reject atoms in verify_ref_format() which cat-file
> > cannot use, e.g. `%(refname)`, `%(push)`, `%(upstream)`...
> > 2. Change the type of member `format` in struct `batch_options`
> > to `ref_format`, We can add format data in it.
>
> Not sure what "We can add format data in it." means.

Well, there is something wrong with the expression here. What I want
to express is that we can fill its member "format" with the atoms like
"%(objectname) %(refname)", and then pass it to the ref-filter.

> > 7. Use free_ref_array_item_value() to free ref_array_item's
> > value.
>
> That looks like a lot of changes in a single commit. I wonder if this
> commit could be split.
>

Yeah, But I don’t know if I should take it apart step by step, If taken apart,
those intermediate commits are likely to fail the test.

> > Most of the atoms in `for-each-ref --format` are now supported,
> > such as `%(tree)`, `%(parent)`, `%(author)`, `%(tagger)`, `%(if)`,
> > `%(then)`, `%(else)`, `%(end)`. But these atoms will be rejected:
> > `%(refname)`, `%(symref)`, `%(upstream)`, `%(push)`, `%(worktreepath)`,
> > `%(flag)`, `%(HEAD)`, because our objects don't have refname.
>
> s/refname/a refname/
>
> It might be worth talking a bit about possible performance changes.
>

Makes sense. The performance has indeed deteriorated.

> [...]
>
> > +       ret = format_ref_array_item(&item, &opt->format, scratch, &err);
> > +       if (!ret) {
> > +               strbuf_addch(scratch, '\n');
> > +               batch_write(opt, scratch->buf, scratch->len);
> > +       } else if (ret < 0) {
> > +               die("%s\n", err.buf);
>
> This if (ret < 0) could be checked first.

Yes, it is better to put error checking first.

>
> > +       } else {
> > +               /* when ret > 0 , don't call die and print the err to stdout*/
>
> I think it would be more helpful to tell what ret > 0 means, rather
> than what we do below (which can easily be seen).
>

Ah, There is indeed only one situation for ret > 0 for the time being:
Show "<oid> missing" without exiting Git.

> > +               printf("%s\n", err.buf);
> > +               fflush(stdout);
> >         }
>
> For example:
>
>        if (ret < 0) {
>                die("%s\n", err.buf);
>        if (ret) {
>                /* ret > 0 means ... */
>                printf("%s\n", err.buf);
>                fflush(stdout);
>        } else {
>                strbuf_addch(scratch, '\n');
>                batch_write(opt, scratch->buf, scratch->len);
>        }
>

Yes, this might be better.

> > +       free_ref_array_item_value(&item);
> > +       strbuf_release(&err);
> >  }
>
> [...]
>
> > +static int batch_objects(struct batch_options *opt, const struct option *options)
>
> It's unfortunate that one argument is called "opt" and the other one
> "options". I wonder if the first one could be called "batch" as it
> seems to be called this way somewhere else.
>

OK.

> > +       if (!opt->format.format)
> > +               strbuf_addstr(&format, "%(objectname) %(objecttype) %(objectsize)");
> > +       else
> > +               strbuf_addstr(&format, opt->format.format);
>
> If there is no reason for the condition to be (!X) over just (X), I
> think the latter is a bit better.
>

Although I think it doesn’t matter which one to use first,
I will still follow your suggestions.

> >         if (opt->print_contents)
> > -               data.info.typep = &data.type;
> > +               strbuf_addstr(&format, "\n%(raw)");
> > +       opt->format.format = format.buf;
>
> I wonder if this should be:
>
>       opt->format.format = strbuf_detach(&format, NULL);
>

No. here our opt->format.format will not be changed, it would
be better for us to use `strbuf_release(&format)` for resource
recovery. (strbuf_detach() will forced to let us free opt->format.format)

> > +       if (verify_ref_format(&opt->format))
> > +               usage_with_options(cat_file_usage, options);
>
> [...]
>
> > @@ -86,7 +87,7 @@ struct ref_format {
> >         int need_color_reset_at_eol;
> >  };
> >
> > -#define REF_FORMAT_INIT { NULL, NULL, 0, 0, -1 }
> > +#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }
>
> Maybe this can already be changed to a designated initializer, like
> Ævar suggested recently.
>

I agree.

> [...]
>
> > +test_expect_success 'cat-file refs/heads/main refs/tags/testtag %(rest)' '
>
> If this test is about checking that %(rest) works with both a branch
> and a tag, it might be better to say it more explicitly.
>

OK.

Thanks.
--
ZheNing Hu

^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic
  2021-06-19  7:02   ` [PATCH v3 00/10] " ZheNing Hu via GitGitGadget
                       ` (9 preceding siblings ...)
  2021-06-19  7:03     ` [PATCH v3 10/10] [GSOC] cat-file: re-implement --textconv, --filters options ZheNing Hu via GitGitGadget
@ 2021-06-22  3:20     ` ZheNing Hu via GitGitGadget
  2021-06-22  3:20       ` [PATCH v4 01/14] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
                         ` (14 more replies)
  10 siblings, 15 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-22  3:20 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu

This patch series make cat-file reuse ref-filter logic.

Change from last version:

 1. At the suggestion of Christian Couder, split "[GSOC] cat-file: reuse
    ref-filter logic" into multiple small commits: "[GSOC] cat-file: add
    has_object_file() check", "[GSOC] ref-filter: modify the error message
    and value in get_object", "[GSOC] ref-filter: add cat_file_mode in
    struct ref_format".
 2. Change batch_objects parameter name from "opt" to "batch".
 3. Modify test subject.
 4. Begin to use the explicit initialization of REF_FORMAT_INIT in the
    design of %(rest).
 5. Fix some grammatical errors and code style issues.

ZheNing Hu (14):
  [GSOC] ref-filter: add obj-type check in grab contents
  [GSOC] ref-filter: add %(raw) atom
  [GSOC] ref-filter: --format=%(raw) re-support --perl
  [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  [GSOC] ref-filter: add %(rest) atom
  [GSOC] ref-filter: pass get_object() return value to their callers
  [GSOC] ref-filter: introduce free_ref_array_item_value() function
  [GSOC] ref-filter: add cat_file_mode in struct ref_format
  [GSOC] ref-filter: modify the error message and value in get_object
  [GSOC] cat-file: add has_object_file() check
  [GSOC] cat-file: change batch_objects parameter name
  [GSOC] cat-file: reuse ref-filter logic
  [GSOC] cat-file: reuse err buf in batch_object_write()
  [GSOC] cat-file: re-implement --textconv, --filters options

 Documentation/git-cat-file.txt     |   6 +
 Documentation/git-for-each-ref.txt |   9 +
 builtin/cat-file.c                 | 277 +++++++-----------------
 builtin/tag.c                      |   2 +-
 quote.c                            |  17 ++
 quote.h                            |   1 +
 ref-filter.c                       | 331 +++++++++++++++++++++++------
 ref-filter.h                       |  14 +-
 t/t1006-cat-file.sh                | 252 ++++++++++++++++++++++
 t/t3203-branch-output.sh           |   4 +
 t/t6300-for-each-ref.sh            | 235 ++++++++++++++++++++
 t/t6301-for-each-ref-errors.sh     |   2 +-
 t/t7004-tag.sh                     |   4 +
 t/t7030-verify-tag.sh              |   4 +
 14 files changed, 879 insertions(+), 279 deletions(-)


base-commit: 1197f1a46360d3ae96bd9c15908a3a6f8e562207
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-980%2Fadlternative%2Fcat-file-batch-refactor-v4
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-980/adlternative/cat-file-batch-refactor-v4
Pull-Request: https://github.com/gitgitgadget/git/pull/980

Range-diff vs v3:

  1:  f72ad9cc5e8b =  1:  f72ad9cc5e8b [GSOC] ref-filter: add obj-type check in grab contents
  2:  ab497d66c116 =  2:  ab497d66c116 [GSOC] ref-filter: add %(raw) atom
  3:  b54dbc431e04 =  3:  b54dbc431e04 [GSOC] ref-filter: --format=%(raw) re-support --perl
  4:  9fbbb3c492f5 =  4:  9fbbb3c492f5 [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  5:  39a0d93c7bc1 !  5:  08aa44e5e57b [GSOC] ref-filter: add %(rest) atom
     @@ ref-filter.h: struct ref_format {
       };
       
      -#define REF_FORMAT_INIT { NULL, 0, -1 }
     -+#define REF_FORMAT_INIT { NULL, NULL, 0, 0, -1 }
     ++#define REF_FORMAT_INIT { .use_color = -1 }
       
       /*  Macros for checking --merged and --no-merged options */
       #define _OPT_MERGED_NO_MERGED(option, filter, h) \
  6:  35a376db1fc1 =  6:  05682bccf9f9 [GSOC] ref-filter: pass get_object() return value to their callers
  7:  8c1d683ec6e9 =  7:  06db6cd6f1f9 [GSOC] ref-filter: introduce free_ref_array_item_value() function
  -:  ------------ >  8:  b0d9e139935f [GSOC] ref-filter: add cat_file_mode in struct ref_format
  -:  ------------ >  9:  db7dd8b042c2 [GSOC] ref-filter: modify the error message and value in get_object
  -:  ------------ > 10:  6b577969734e [GSOC] cat-file: add has_object_file() check
  -:  ------------ > 11:  069aa203666a [GSOC] cat-file: change batch_objects parameter name
  8:  bd534a266a40 ! 12:  258ec0a46c56 [GSOC] cat-file: reuse ref-filter logic
     @@ Metadata
       ## Commit message ##
          [GSOC] cat-file: reuse ref-filter logic
      
     -    In order to let cat-file use ref-filter logic, the following
     -    methods are used:
     +    In order to let cat-file use ref-filter logic, let's do the
     +    following:
      
     -    1. Add `cat_file_mode` member in struct `ref_format`, this can
     -    help us reject atoms in verify_ref_format() which cat-file
     -    cannot use, e.g. `%(refname)`, `%(push)`, `%(upstream)`...
     -    2. Change the type of member `format` in struct `batch_options`
     -    to `ref_format`, We can add format data in it.
     -    3. Let `batch_objects()` add atoms to format, and use
     +    1. Change the type of member `format` in struct `batch_options`
     +    to `ref_format`, we will pass it to ref-filter later.
     +    2. Let `batch_objects()` add atoms to format, and use
          `verify_ref_format()` to check atoms.
     -    4. Use `has_object_file()` in `batch_one_object()` to check
     -    whether the input object exists.
     -    5. Let get_object() return 1 and print "<oid> missing" instead
     -    of returning -1 and printing "missing object <oid> for <refname>",
     -    this can help `format_ref_array_item()` just report that the
     -    object is missing without letting Git exit.
     -    6. Use `format_ref_array_item()` in `batch_object_write()` to
     +    3. Use `format_ref_array_item()` in `batch_object_write()` to
          get the formatted data corresponding to the object. If the
          return value of `format_ref_array_item()` is equals to zero,
          use `batch_write()` to print object data; else if the return
     -    value less than zero, use `die()` to print the error message
     -    and exit; else return value greater than zero, only print the
     -    error message, but not exit.
     -    7. Use free_ref_array_item_value() to free ref_array_item's
     +    value is less than zero, use `die()` to print the error message
     +    and exit; else if return value is greater than zero, only print
     +    the error message, but don't exit.
     +    4. Use free_ref_array_item_value() to free ref_array_item's
          value.
      
          Most of the atoms in `for-each-ref --format` are now supported,
          such as `%(tree)`, `%(parent)`, `%(author)`, `%(tagger)`, `%(if)`,
          `%(then)`, `%(else)`, `%(end)`. But these atoms will be rejected:
          `%(refname)`, `%(symref)`, `%(upstream)`, `%(push)`, `%(worktreepath)`,
     -    `%(flag)`, `%(HEAD)`, because our objects don't have refname.
     +    `%(flag)`, `%(HEAD)`, because our objects don't have a refname.
     +
     +    The performance for `git cat-file --batch-all-objects
     +    --batch-check` on the Git repository itself with performance
     +    testing tool `hyperfine` changes from 669.4 ms ±  31.1 ms to
     +    1.134 s ±  0.063 s.
     +
     +    The performance for `git cat-file --batch-all-objects --batch
     +    >/dev/null` on the Git repository itself with performance testing
     +    tool `time` change from "27.37s user 0.29s system 98% cpu 28.089
     +    total" to "33.69s user 1.54s system 87% cpu 40.258 total".
      
          Mentored-by: Christian Couder <christian.couder@gmail.com>
          Mentored-by: Hariom Verma <hariom18599@gmail.com>
     @@ builtin/cat-file.c: static void batch_write(struct batch_options *opt, const voi
      -		print_object_or_die(opt, data);
      -		batch_write(opt, "\n", 1);
      +	ret = format_ref_array_item(&item, &opt->format, scratch, &err);
     -+	if (!ret) {
     -+		strbuf_addch(scratch, '\n');
     -+		batch_write(opt, scratch->buf, scratch->len);
     -+	} else if (ret < 0) {
     ++	if (ret < 0) {
      +		die("%s\n", err.buf);
     -+	} else {
     -+		/* when ret > 0 , don't call die and print the err to stdout*/
     ++	} if (ret) {
     ++		/* ret > 0 means when the object corresponding to oid
     ++		 * cannot be found in format_ref_array_item(), we only print
     ++		 * the error message.
     ++		 */
      +		printf("%s\n", err.buf);
      +		fflush(stdout);
     ++	} else {
     ++		strbuf_addch(scratch, '\n');
     ++		batch_write(opt, scratch->buf, scratch->len);
       	}
      +	free_ref_array_item_value(&item);
      +	strbuf_release(&err);
       }
       
       static void batch_one_object(const char *obj_name,
     -@@ builtin/cat-file.c: static void batch_one_object(const char *obj_name,
     - 		return;
     - 	}
     - 
     -+	if (!has_object_file(&data->oid)) {
     -+		printf("%s missing\n",
     -+		       obj_name ? obj_name : oid_to_hex(&data->oid));
     -+		fflush(stdout);
     -+		return;
     -+	}
     -+
     - 	batch_object_write(obj_name, scratch, opt, data);
     - }
     - 
      @@ builtin/cat-file.c: static int batch_unordered_packed(const struct object_id *oid,
       	return batch_unordered_object(oid, data);
       }
       
     --static int batch_objects(struct batch_options *opt)
     +-static int batch_objects(struct batch_options *batch)
      +static const char * const cat_file_usage[] = {
      +	N_("git cat-file (-t [--allow-unknown-type] | -s [--allow-unknown-type] | -e | -p | <type> | --textconv | --filters) [--path=<path>] <object>"),
      +	N_("git cat-file (--batch[=<format>] | --batch-check[=<format>]) [--follow-symlinks] [--textconv | --filters]"),
      +	NULL
      +};
      +
     -+static int batch_objects(struct batch_options *opt, const struct option *options)
     ++static int batch_objects(struct batch_options *batch, const struct option *options)
       {
       	struct strbuf input = STRBUF_INIT;
       	struct strbuf output = STRBUF_INIT;
     @@ builtin/cat-file.c: static int batch_unordered_packed(const struct object_id *oi
       	int save_warning;
       	int retval = 0;
       
     --	if (!opt->format)
     --		opt->format = "%(objectname) %(objecttype) %(objectsize)";
     +-	if (!batch->format)
     +-		batch->format = "%(objectname) %(objecttype) %(objectsize)";
      -
      -	/*
      -	 * Expand once with our special mark_query flag, which will prime the
     @@ builtin/cat-file.c: static int batch_unordered_packed(const struct object_id *oi
      -	 */
       	memset(&data, 0, sizeof(data));
      -	data.mark_query = 1;
     --	strbuf_expand(&output, opt->format, expand_format, &data);
     +-	strbuf_expand(&output, batch->format, expand_format, &data);
      -	data.mark_query = 0;
      -	strbuf_release(&output);
     --	if (opt->cmdmode)
     +-	if (batch->cmdmode)
      -		data.split_on_whitespace = 1;
      -
     --	if (opt->all_objects) {
     +-	if (batch->all_objects) {
      -		struct object_info empty = OBJECT_INFO_INIT;
      -		if (!memcmp(&data.info, &empty, sizeof(empty)))
      -			data.skip_object_info = 1;
     @@ builtin/cat-file.c: static int batch_unordered_packed(const struct object_id *oi
      -	 * If we are printing out the object, then always fill in the type,
      -	 * since we will want to decide whether or not to stream.
      -	 */
     -+	if (!opt->format.format)
     -+		strbuf_addstr(&format, "%(objectname) %(objecttype) %(objectsize)");
     ++	if (batch->format.format)
     ++		strbuf_addstr(&format, batch->format.format);
      +	else
     -+		strbuf_addstr(&format, opt->format.format);
     - 	if (opt->print_contents)
     ++		strbuf_addstr(&format, "%(objectname) %(objecttype) %(objectsize)");
     + 	if (batch->print_contents)
      -		data.info.typep = &data.type;
      +		strbuf_addstr(&format, "\n%(raw)");
     -+	opt->format.format = format.buf;
     -+	if (verify_ref_format(&opt->format))
     ++	batch->format.format = format.buf;
     ++	if (verify_ref_format(&batch->format))
      +		usage_with_options(cat_file_usage, options);
      +
     -+	if (opt->cmdmode || opt->format.use_rest)
     ++	if (batch->cmdmode || batch->format.use_rest)
      +		data.split_on_whitespace = 1;
       
     - 	if (opt->all_objects) {
     + 	if (batch->all_objects) {
       		struct object_cb_data cb;
     -@@ builtin/cat-file.c: static int batch_objects(struct batch_options *opt)
     +@@ builtin/cat-file.c: static int batch_objects(struct batch_options *batch)
       			oid_array_clear(&sa);
       		}
       
     @@ builtin/cat-file.c: static int batch_objects(struct batch_options *opt)
       		strbuf_release(&output);
       		return 0;
       	}
     -@@ builtin/cat-file.c: static int batch_objects(struct batch_options *opt)
     - 		batch_one_object(input.buf, &output, opt, &data);
     +@@ builtin/cat-file.c: static int batch_objects(struct batch_options *batch)
     + 		batch_one_object(input.buf, &output, batch, &data);
       	}
       
      +	strbuf_release(&format);
     @@ builtin/cat-file.c: int cmd_cat_file(int argc, const char **argv, const char *pr
       	if (unknown_type && opt != 't' && opt != 's')
       		die("git cat-file --allow-unknown-type: use with -s or -t");
      
     - ## ref-filter.c ##
     -@@ ref-filter.c: int verify_ref_format(struct ref_format *format)
     - 		if (at < 0)
     - 			die("%s", err.buf);
     - 
     --		if (used_atom[at].atom_type == ATOM_REST)
     --			die("this command reject atom %%(%.*s)", (int)(ep - sp - 2), sp + 2);
     -+		if ((!format->cat_file_mode && used_atom[at].atom_type == ATOM_REST) ||
     -+		    (format->cat_file_mode && (used_atom[at].atom_type == ATOM_FLAG ||
     -+					       used_atom[at].atom_type == ATOM_HEAD ||
     -+					       used_atom[at].atom_type == ATOM_PUSH ||
     -+					       used_atom[at].atom_type == ATOM_REFNAME ||
     -+					       used_atom[at].atom_type == ATOM_SYMREF ||
     -+					       used_atom[at].atom_type == ATOM_UPSTREAM ||
     -+					       used_atom[at].atom_type == ATOM_WORKTREEPATH)))
     -+			die(_("this command reject atom %%(%.*s)"), (int)(ep - sp - 2), sp + 2);
     - 
     - 		if ((format->quote_style == QUOTE_PYTHON ||
     - 		     format->quote_style == QUOTE_SHELL ||
     -@@ ref-filter.c: static int get_object(struct ref_array_item *ref, int deref, struct object **obj
     - 	}
     - 	if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
     - 				     OBJECT_INFO_LOOKUP_REPLACE))
     --		return strbuf_addf_ret(err, -1, _("missing object %s for %s"),
     --				       oid_to_hex(&oi->oid), ref->refname);
     -+		return strbuf_addf_ret(err, 1, _("%s missing"),
     -+				       oid_to_hex(&oi->oid));
     - 	if (oi->info.disk_sizep && oi->disk_size < 0)
     - 		BUG("Object size is less than zero.");
     - 
     -
     - ## ref-filter.h ##
     -@@ ref-filter.h: struct ref_format {
     - 	 */
     - 	const char *format;
     - 	const char *rest;
     -+	int cat_file_mode;
     - 	int quote_style;
     - 	int use_rest;
     - 	int use_color;
     -@@ ref-filter.h: struct ref_format {
     - 	int need_color_reset_at_eol;
     - };
     - 
     --#define REF_FORMAT_INIT { NULL, NULL, 0, 0, -1 }
     -+#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }
     - 
     - /*  Macros for checking --merged and --no-merged options */
     - #define _OPT_MERGED_NO_MERGED(option, filter, h) \
     -
       ## t/t1006-cat-file.sh ##
      @@ t/t1006-cat-file.sh: test_expect_success 'cat-file --unordered works' '
       	test_cmp expect actual
     @@ t/t1006-cat-file.sh: test_expect_success 'cat-file --unordered works' '
      +batch_test_atom refs/heads/main '%(INVALID)' fail
      +batch_test_atom refs/heads/main '%(authordate:INVALID)' fail
      +
     -+test_expect_success 'cat-file refs/heads/main refs/tags/testtag %(rest)' '
     ++test_expect_success '%(rest) works with both a branch and a tag' '
      +	cat >expected <<-EOF &&
      +	123 commit 123
      +	456 tag 456
     @@ t/t1006-cat-file.sh: test_expect_success 'cat-file --unordered works' '
      +'
      +
       test_done
     -
     - ## t/t6301-for-each-ref-errors.sh ##
     -@@ t/t6301-for-each-ref-errors.sh: test_expect_success 'Missing objects are reported correctly' '
     - 	r=refs/heads/missing &&
     - 	echo $MISSING >.git/$r &&
     - 	test_when_finished "rm -f .git/$r" &&
     --	echo "fatal: missing object $MISSING for $r" >missing-err &&
     -+	echo "fatal: $MISSING missing" >missing-err &&
     - 	test_must_fail git for-each-ref 2>err &&
     - 	test_cmp missing-err err &&
     - 	(
  9:  b66ab0f2d569 ! 13:  bda6aae9a6c9 [GSOC] cat-file: reuse err buf in batch_object_write()
     @@ builtin/cat-file.c: static void batch_write(struct batch_options *opt, const voi
       
      -	ret = format_ref_array_item(&item, &opt->format, scratch, &err);
      +	ret = format_ref_array_item(&item, &opt->format, scratch, err);
     - 	if (!ret) {
     - 		strbuf_addch(scratch, '\n');
     - 		batch_write(opt, scratch->buf, scratch->len);
     - 	} else if (ret < 0) {
     + 	if (ret < 0) {
      -		die("%s\n", err.buf);
      +		die("%s\n", err->buf);
     - 	} else {
     - 		/* when ret > 0 , don't call die and print the err to stdout*/
     + 	} if (ret) {
     + 		/* ret > 0 means when the object corresponding to oid
     + 		 * cannot be found in format_ref_array_item(), we only print
     + 		 * the error message.
     + 		 */
      -		printf("%s\n", err.buf);
      +		printf("%s\n", err->buf);
       		fflush(stdout);
     + 	} else {
     + 		strbuf_addch(scratch, '\n');
     + 		batch_write(opt, scratch->buf, scratch->len);
       	}
       	free_ref_array_item_value(&item);
      -	strbuf_release(&err);
     @@ builtin/cat-file.c: struct object_cb_data {
       	return 0;
       }
       
     -@@ builtin/cat-file.c: static int batch_objects(struct batch_options *opt, const struct option *options
     +@@ builtin/cat-file.c: static int batch_objects(struct batch_options *batch, const struct option *optio
       {
       	struct strbuf input = STRBUF_INIT;
       	struct strbuf output = STRBUF_INIT;
     @@ builtin/cat-file.c: static int batch_objects(struct batch_options *opt, const st
       	struct strbuf format = STRBUF_INIT;
       	struct expand_data data;
       	int save_warning;
     -@@ builtin/cat-file.c: static int batch_objects(struct batch_options *opt, const struct option *options
     - 		cb.opt = opt;
     +@@ builtin/cat-file.c: static int batch_objects(struct batch_options *batch, const struct option *optio
     + 		cb.opt = batch;
       		cb.expand = &data;
       		cb.scratch = &output;
      +		cb.err = &err;
       
     - 		if (opt->unordered) {
     + 		if (batch->unordered) {
       			struct oidset seen = OIDSET_INIT;
     -@@ builtin/cat-file.c: static int batch_objects(struct batch_options *opt, const struct option *options
     +@@ builtin/cat-file.c: static int batch_objects(struct batch_options *batch, const struct option *optio
       
       		strbuf_release(&format);
       		strbuf_release(&output);
     @@ builtin/cat-file.c: static int batch_objects(struct batch_options *opt, const st
       		return 0;
       	}
       
     -@@ builtin/cat-file.c: static int batch_objects(struct batch_options *opt, const struct option *options
     +@@ builtin/cat-file.c: static int batch_objects(struct batch_options *batch, const struct option *optio
       			data.rest = p;
       		}
       
     --		batch_one_object(input.buf, &output, opt, &data);
     -+		batch_one_object(input.buf, &output, &err, opt, &data);
     +-		batch_one_object(input.buf, &output, batch, &data);
     ++		batch_one_object(input.buf, &output, &err, batch, &data);
       	}
       
       	strbuf_release(&format);
 10:  86ac3bcaecea ! 14:  d1114a2bd743 [GSOC] cat-file: re-implement --textconv, --filters options
     @@ Commit message
          Signed-off-by: ZheNing Hu <adlternative@gmail.com>
      
       ## builtin/cat-file.c ##
     -@@ builtin/cat-file.c: static int batch_objects(struct batch_options *opt, const struct option *options
     - 	if (opt->print_contents)
     +@@ builtin/cat-file.c: static int batch_objects(struct batch_options *batch, const struct option *optio
     + 	if (batch->print_contents)
       		strbuf_addstr(&format, "\n%(raw)");
     - 	opt->format.format = format.buf;
     -+	if (opt->cmdmode == 'c')
     -+		opt->format.use_textconv = 1;
     -+	else if (opt->cmdmode == 'w')
     -+		opt->format.use_filters = 1;
     + 	batch->format.format = format.buf;
      +
     - 	if (verify_ref_format(&opt->format))
     ++	if (batch->cmdmode == 'c')
     ++		batch->format.use_textconv = 1;
     ++	else if (batch->cmdmode == 'w')
     ++		batch->format.use_filters = 1;
     ++
     + 	if (verify_ref_format(&batch->format))
       		usage_with_options(cat_file_usage, options);
       
      
     @@ ref-filter.h: struct ref_format {
       	int use_rest;
       	int use_color;
       
     -@@ ref-filter.h: struct ref_format {
     - 	int need_color_reset_at_eol;
     - };
     - 
     --#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }
     -+#define REF_FORMAT_INIT { .use_color = -1 }
     - 
     - /*  Macros for checking --merged and --no-merged options */
     - #define _OPT_MERGED_NO_MERGED(option, filter, h) \

-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v4 01/14] [GSOC] ref-filter: add obj-type check in grab contents
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
@ 2021-06-22  3:20       ` ZheNing Hu via GitGitGadget
  2021-06-22  3:20       ` [PATCH v4 02/14] [GSOC] ref-filter: add %(raw) atom ZheNing Hu via GitGitGadget
                         ` (13 subsequent siblings)
  14 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-22  3:20 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Only tag and commit objects use `grab_sub_body_contents()` to grab
object contents in the current codebase.  We want to teach the
function to also handle blobs and trees to get their raw data,
without parsing a blob (whose contents looks like a commit or a tag)
incorrectly as a commit or a tag.

Skip the block of code that is specific to handling commits and tags
early when the given object is of a wrong type to help later
addition to handle other types of objects in this function.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 4db0e40ff4c6..5cee6512fbaf 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1356,11 +1356,12 @@ static void append_lines(struct strbuf *out, const char *buf, unsigned long size
 }
 
 /* See grab_values */
-static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
+static void grab_sub_body_contents(struct atom_value *val, int deref, struct expand_data *data)
 {
 	int i;
 	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
 	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
+	void *buf = data->content;
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		struct used_atom *atom = &used_atom[i];
@@ -1371,10 +1372,13 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
 			continue;
 		if (deref)
 			name++;
-		if (strcmp(name, "body") &&
-		    !starts_with(name, "subject") &&
-		    !starts_with(name, "trailers") &&
-		    !starts_with(name, "contents"))
+
+		if ((data->type != OBJ_TAG &&
+		     data->type != OBJ_COMMIT) ||
+		    (strcmp(name, "body") &&
+		     !starts_with(name, "subject") &&
+		     !starts_with(name, "trailers") &&
+		     !starts_with(name, "contents")))
 			continue;
 		if (!subpos)
 			find_subpos(buf,
@@ -1438,17 +1442,19 @@ static void fill_missing_values(struct atom_value *val)
  * pointed at by the ref itself; otherwise it is the object the
  * ref (which is a tag) refers to.
  */
-static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf)
+static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
 {
+	void *buf = data->content;
+
 	switch (obj->type) {
 	case OBJ_TAG:
 		grab_tag_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, data);
 		grab_person("tagger", val, deref, buf);
 		break;
 	case OBJ_COMMIT:
 		grab_commit_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, data);
 		grab_person("author", val, deref, buf);
 		grab_person("committer", val, deref, buf);
 		break;
@@ -1678,7 +1684,7 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
 					       oid_to_hex(&oi->oid), ref->refname);
 		}
-		grab_values(ref->value, deref, *obj, oi->content);
+		grab_values(ref->value, deref, *obj, oi);
 	}
 
 	grab_common_values(ref->value, deref, oi);
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v4 02/14] [GSOC] ref-filter: add %(raw) atom
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  2021-06-22  3:20       ` [PATCH v4 01/14] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
@ 2021-06-22  3:20       ` ZheNing Hu via GitGitGadget
  2021-06-24  4:14         ` Bagas Sanjaya
  2021-06-22  3:20       ` [PATCH v4 03/14] [GSOC] ref-filter: --format=%(raw) re-support --perl ZheNing Hu via GitGitGadget
                         ` (12 subsequent siblings)
  14 siblings, 1 reply; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-22  3:20 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.

The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).

E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.

Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.

Beyond, `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, `--perl` because if our binary raw data is passed to a
variable in the host language, the host language may not support
arbitrary binary data in the variables of its string type.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-for-each-ref.txt |   9 ++
 ref-filter.c                       | 139 +++++++++++++++----
 t/t6300-for-each-ref.sh            | 216 +++++++++++++++++++++++++++++
 3 files changed, 337 insertions(+), 27 deletions(-)

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index 2ae2478de706..7f1f0a1ca3b6 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -235,6 +235,15 @@ and `date` to extract the named component.  For email fields (`authoremail`,
 without angle brackets, and `:localpart` to get the part before the `@` symbol
 out of the trimmed email.
 
+The raw data in an object is `raw`.
+
+raw:size::
+	The raw data size of the object.
+
+Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
+`--perl` because the host language may not support arbitrary binary data in the
+variables of its string type.
+
 The message in a commit or a tag object is `contents`, from which
 `contents:<part>` can be used to extract various parts out of:
 
diff --git a/ref-filter.c b/ref-filter.c
index 5cee6512fbaf..7822be903071 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -144,6 +144,7 @@ enum atom_type {
 	ATOM_BODY,
 	ATOM_TRAILERS,
 	ATOM_CONTENTS,
+	ATOM_RAW,
 	ATOM_UPSTREAM,
 	ATOM_PUSH,
 	ATOM_SYMREF,
@@ -189,6 +190,9 @@ static struct used_atom {
 			struct process_trailer_options trailer_opts;
 			unsigned int nlines;
 		} contents;
+		struct {
+			enum { RAW_BARE, RAW_LENGTH } option;
+		} raw_data;
 		struct {
 			cmp_status cmp_status;
 			const char *str;
@@ -426,6 +430,18 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
+static int raw_atom_parser(const struct ref_format *format, struct used_atom *atom,
+				const char *arg, struct strbuf *err)
+{
+	if (!arg)
+		atom->u.raw_data.option = RAW_BARE;
+	else if (!strcmp(arg, "size"))
+		atom->u.raw_data.option = RAW_LENGTH;
+	else
+		return strbuf_addf_ret(err, -1, _("unrecognized %%(raw) argument: %s"), arg);
+	return 0;
+}
+
 static int oid_atom_parser(const struct ref_format *format, struct used_atom *atom,
 			   const char *arg, struct strbuf *err)
 {
@@ -586,6 +602,7 @@ static struct {
 	[ATOM_BODY] = { "body", SOURCE_OBJ, FIELD_STR, body_atom_parser },
 	[ATOM_TRAILERS] = { "trailers", SOURCE_OBJ, FIELD_STR, trailers_atom_parser },
 	[ATOM_CONTENTS] = { "contents", SOURCE_OBJ, FIELD_STR, contents_atom_parser },
+	[ATOM_RAW] = { "raw", SOURCE_OBJ, FIELD_STR, raw_atom_parser },
 	[ATOM_UPSTREAM] = { "upstream", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
 	[ATOM_PUSH] = { "push", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
 	[ATOM_SYMREF] = { "symref", SOURCE_NONE, FIELD_STR, refname_atom_parser },
@@ -620,12 +637,15 @@ struct ref_formatting_state {
 
 struct atom_value {
 	const char *s;
+	size_t s_size;
 	int (*handler)(struct atom_value *atomv, struct ref_formatting_state *state,
 		       struct strbuf *err);
 	uintmax_t value; /* used for sorting when not FIELD_STR */
 	struct used_atom *atom;
 };
 
+#define ATOM_VALUE_S_SIZE_INIT (-1)
+
 /*
  * Used to parse format string and sort specifiers
  */
@@ -644,13 +664,6 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 		return strbuf_addf_ret(err, -1, _("malformed field name: %.*s"),
 				       (int)(ep-atom), atom);
 
-	/* Do we have the atom already used elsewhere? */
-	for (i = 0; i < used_atom_cnt; i++) {
-		int len = strlen(used_atom[i].name);
-		if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
-			return i;
-	}
-
 	/*
 	 * If the atom name has a colon, strip it and everything after
 	 * it off - it specifies the format for this entry, and
@@ -660,6 +673,13 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 	arg = memchr(sp, ':', ep - sp);
 	atom_len = (arg ? arg : ep) - sp;
 
+	/* Do we have the atom already used elsewhere? */
+	for (i = 0; i < used_atom_cnt; i++) {
+		int len = strlen(used_atom[i].name);
+		if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
+			return i;
+	}
+
 	/* Is the atom a valid one? */
 	for (i = 0; i < ARRAY_SIZE(valid_atom); i++) {
 		int len = strlen(valid_atom[i].name);
@@ -709,11 +729,14 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 	return at;
 }
 
-static void quote_formatting(struct strbuf *s, const char *str, int quote_style)
+static void quote_formatting(struct strbuf *s, const char *str, size_t len, int quote_style)
 {
 	switch (quote_style) {
 	case QUOTE_NONE:
-		strbuf_addstr(s, str);
+		if (len != ATOM_VALUE_S_SIZE_INIT)
+			strbuf_add(s, str, len);
+		else
+			strbuf_addstr(s, str);
 		break;
 	case QUOTE_SHELL:
 		sq_quote_buf(s, str);
@@ -740,9 +763,12 @@ static int append_atom(struct atom_value *v, struct ref_formatting_state *state,
 	 * encountered.
 	 */
 	if (!state->stack->prev)
-		quote_formatting(&state->stack->output, v->s, state->quote_style);
+		quote_formatting(&state->stack->output, v->s, v->s_size, state->quote_style);
 	else
-		strbuf_addstr(&state->stack->output, v->s);
+		if (v->s_size != ATOM_VALUE_S_SIZE_INIT)
+			strbuf_add(&state->stack->output, v->s, v->s_size);
+		else
+			strbuf_addstr(&state->stack->output, v->s);
 	return 0;
 }
 
@@ -842,21 +868,23 @@ static int if_atom_handler(struct atom_value *atomv, struct ref_formatting_state
 	return 0;
 }
 
-static int is_empty(const char *s)
+static int is_empty(struct strbuf *buf)
 {
-	while (*s != '\0') {
-		if (!isspace(*s))
-			return 0;
-		s++;
-	}
-	return 1;
-}
+	const char *cur = buf->buf;
+	const char *end = buf->buf + buf->len;
+
+	while (cur != end && (isspace(*cur)))
+		cur++;
+
+	return cur == end;
+ }
 
 static int then_atom_handler(struct atom_value *atomv, struct ref_formatting_state *state,
 			     struct strbuf *err)
 {
 	struct ref_formatting_stack *cur = state->stack;
 	struct if_then_else *if_then_else = NULL;
+	size_t str_len = 0;
 
 	if (cur->at_end == if_then_else_handler)
 		if_then_else = (struct if_then_else *)cur->at_end_data;
@@ -867,18 +895,22 @@ static int then_atom_handler(struct atom_value *atomv, struct ref_formatting_sta
 	if (if_then_else->else_atom_seen)
 		return strbuf_addf_ret(err, -1, _("format: %%(then) atom used after %%(else)"));
 	if_then_else->then_atom_seen = 1;
+	if (if_then_else->str)
+		str_len = strlen(if_then_else->str);
 	/*
 	 * If the 'equals' or 'notequals' attribute is used then
 	 * perform the required comparison. If not, only non-empty
 	 * strings satisfy the 'if' condition.
 	 */
 	if (if_then_else->cmp_status == COMPARE_EQUAL) {
-		if (!strcmp(if_then_else->str, cur->output.buf))
+		if (str_len == cur->output.len &&
+		    !memcmp(if_then_else->str, cur->output.buf, cur->output.len))
 			if_then_else->condition_satisfied = 1;
 	} else if (if_then_else->cmp_status == COMPARE_UNEQUAL) {
-		if (strcmp(if_then_else->str, cur->output.buf))
+		if (str_len != cur->output.len ||
+		    memcmp(if_then_else->str, cur->output.buf, cur->output.len))
 			if_then_else->condition_satisfied = 1;
-	} else if (cur->output.len && !is_empty(cur->output.buf))
+	} else if (cur->output.len && !is_empty(&cur->output))
 		if_then_else->condition_satisfied = 1;
 	strbuf_reset(&cur->output);
 	return 0;
@@ -924,7 +956,7 @@ static int end_atom_handler(struct atom_value *atomv, struct ref_formatting_stat
 	 * only on the topmost supporting atom.
 	 */
 	if (!current->prev->prev) {
-		quote_formatting(&s, current->output.buf, state->quote_style);
+		quote_formatting(&s, current->output.buf, current->output.len, state->quote_style);
 		strbuf_swap(&current->output, &s);
 	}
 	strbuf_release(&s);
@@ -974,6 +1006,10 @@ int verify_ref_format(struct ref_format *format)
 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
 		if (at < 0)
 			die("%s", err.buf);
+		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
+		    used_atom[at].u.raw_data.option == RAW_BARE)
+			die(_("--format=%.*s cannot be used with"
+			      "--python, --shell, --tcl, --perl"), (int)(ep - sp - 2), sp + 2);
 		cp = ep + 1;
 
 		if (skip_prefix(used_atom[at].name, "color:", &color))
@@ -1362,17 +1398,29 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, struct exp
 	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
 	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
 	void *buf = data->content;
+	unsigned long buf_size = data->size;
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		struct used_atom *atom = &used_atom[i];
 		const char *name = atom->name;
 		struct atom_value *v = &val[i];
+		enum atom_type atom_type = atom->atom_type;
 
 		if (!!deref != (*name == '*'))
 			continue;
 		if (deref)
 			name++;
 
+		if (atom_type == ATOM_RAW) {
+			if (atom->u.raw_data.option == RAW_BARE) {
+				v->s = xmemdupz(buf, buf_size);
+				v->s_size = buf_size;
+			} else if (atom->u.raw_data.option == RAW_LENGTH) {
+				v->s = xstrfmt("%"PRIuMAX, (uintmax_t)buf_size);
+			}
+			continue;
+		}
+
 		if ((data->type != OBJ_TAG &&
 		     data->type != OBJ_COMMIT) ||
 		    (strcmp(name, "body") &&
@@ -1460,9 +1508,11 @@ static void grab_values(struct atom_value *val, int deref, struct object *obj, s
 		break;
 	case OBJ_TREE:
 		/* grab_tree_values(val, deref, obj, buf, sz); */
+		grab_sub_body_contents(val, deref, data);
 		break;
 	case OBJ_BLOB:
 		/* grab_blob_values(val, deref, obj, buf, sz); */
+		grab_sub_body_contents(val, deref, data);
 		break;
 	default:
 		die("Eh?  Object of type %d?", obj->type);
@@ -1766,6 +1816,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 		const char *refname;
 		struct branch *branch = NULL;
 
+		v->s_size = ATOM_VALUE_S_SIZE_INIT;
 		v->handler = append_atom;
 		v->atom = atom;
 
@@ -2369,6 +2420,19 @@ static int compare_detached_head(struct ref_array_item *a, struct ref_array_item
 	return 0;
 }
 
+static int memcasecmp(const void *vs1, const void *vs2, size_t n)
+{
+	const char *s1 = vs1, *s2 = vs2;
+	const char *end = s1 + n;
+
+	for (; s1 < end; s1++, s2++) {
+		int diff = tolower(*s1) - tolower(*s2);
+		if (diff)
+			return diff;
+	}
+	return 0;
+}
+
 static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, struct ref_array_item *b)
 {
 	struct atom_value *va, *vb;
@@ -2389,10 +2453,30 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru
 	} else if (s->sort_flags & REF_SORTING_VERSION) {
 		cmp = versioncmp(va->s, vb->s);
 	} else if (cmp_type == FIELD_STR) {
-		int (*cmp_fn)(const char *, const char *);
-		cmp_fn = s->sort_flags & REF_SORTING_ICASE
-			? strcasecmp : strcmp;
-		cmp = cmp_fn(va->s, vb->s);
+		if (va->s_size == ATOM_VALUE_S_SIZE_INIT &&
+		    vb->s_size == ATOM_VALUE_S_SIZE_INIT) {
+			int (*cmp_fn)(const char *, const char *);
+			cmp_fn = s->sort_flags & REF_SORTING_ICASE
+				? strcasecmp : strcmp;
+			cmp = cmp_fn(va->s, vb->s);
+		} else {
+			size_t a_size = va->s_size == ATOM_VALUE_S_SIZE_INIT ?
+					strlen(va->s) : va->s_size;
+			size_t b_size = vb->s_size == ATOM_VALUE_S_SIZE_INIT ?
+					strlen(vb->s) : vb->s_size;
+			int (*cmp_fn)(const void *, const void *, size_t);
+			cmp_fn = s->sort_flags & REF_SORTING_ICASE
+				? memcasecmp : memcmp;
+
+			cmp = cmp_fn(va->s, vb->s, b_size > a_size ?
+				     a_size : b_size);
+			if (!cmp) {
+				if (a_size > b_size)
+					cmp = 1;
+				else if (a_size < b_size)
+					cmp = -1;
+			}
+		}
 	} else {
 		if (va->value < vb->value)
 			cmp = -1;
@@ -2492,6 +2576,7 @@ int format_ref_array_item(struct ref_array_item *info,
 	}
 	if (format->need_color_reset_at_eol) {
 		struct atom_value resetv;
+		resetv.s_size = ATOM_VALUE_S_SIZE_INIT;
 		resetv.s = GIT_COLOR_RESET;
 		if (append_atom(&resetv, &state, error_buf)) {
 			pop_stack_element(&state.stack);
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 9e0214076b4d..9c5379e2f56f 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -130,6 +130,8 @@ test_atom head parent:short=10 ''
 test_atom head numparent 0
 test_atom head object ''
 test_atom head type ''
+test_atom head raw "$(git cat-file commit refs/heads/main)
+"
 test_atom head '*objectname' ''
 test_atom head '*objecttype' ''
 test_atom head author 'A U Thor <author@example.com> 1151968724 +0200'
@@ -221,6 +223,15 @@ test_atom tag contents 'Tagging at 1151968727
 '
 test_atom tag HEAD ' '
 
+test_expect_success 'basic atom: refs/tags/testtag *raw' '
+	git cat-file commit refs/tags/testtag^{} >expected &&
+	git for-each-ref --format="%(*raw)" refs/tags/testtag >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_expect_success 'Check invalid atoms names are errors' '
 	test_must_fail git for-each-ref --format="%(INVALID)" refs/heads
 '
@@ -686,6 +697,15 @@ test_atom refs/tags/signed-empty contents:body ''
 test_atom refs/tags/signed-empty contents:signature "$sig"
 test_atom refs/tags/signed-empty contents "$sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-empty raw' '
+	git cat-file tag refs/tags/signed-empty >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-empty >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_atom refs/tags/signed-short subject 'subject line'
 test_atom refs/tags/signed-short subject:sanitize 'subject-line'
 test_atom refs/tags/signed-short contents:subject 'subject line'
@@ -695,6 +715,15 @@ test_atom refs/tags/signed-short contents:signature "$sig"
 test_atom refs/tags/signed-short contents "subject line
 $sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-short raw' '
+	git cat-file tag refs/tags/signed-short >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-short >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_atom refs/tags/signed-long subject 'subject line'
 test_atom refs/tags/signed-long subject:sanitize 'subject-line'
 test_atom refs/tags/signed-long contents:subject 'subject line'
@@ -708,6 +737,15 @@ test_atom refs/tags/signed-long contents "subject line
 body contents
 $sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-long raw' '
+	git cat-file tag refs/tags/signed-long >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-long >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_expect_success 'set up refs pointing to tree and blob' '
 	git update-ref refs/mytrees/first refs/heads/main^{tree} &&
 	git update-ref refs/myblobs/first refs/heads/main:one
@@ -720,6 +758,16 @@ test_atom refs/mytrees/first contents:body ""
 test_atom refs/mytrees/first contents:signature ""
 test_atom refs/mytrees/first contents ""
 
+test_expect_success 'basic atom: refs/mytrees/first raw' '
+	git cat-file tree refs/mytrees/first >expected &&
+	echo >>expected &&
+	git for-each-ref --format="%(raw)" refs/mytrees/first >actual &&
+	test_cmp expected actual &&
+	git cat-file -s refs/mytrees/first >expected &&
+	git for-each-ref --format="%(raw:size)" refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
 test_atom refs/myblobs/first subject ""
 test_atom refs/myblobs/first contents:subject ""
 test_atom refs/myblobs/first body ""
@@ -727,6 +775,174 @@ test_atom refs/myblobs/first contents:body ""
 test_atom refs/myblobs/first contents:signature ""
 test_atom refs/myblobs/first contents ""
 
+test_expect_success 'basic atom: refs/myblobs/first raw' '
+	git cat-file blob refs/myblobs/first >expected &&
+	echo >>expected &&
+	git for-each-ref --format="%(raw)" refs/myblobs/first >actual &&
+	test_cmp expected actual &&
+	git cat-file -s refs/myblobs/first >expected &&
+	git for-each-ref --format="%(raw:size)" refs/myblobs/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'set up refs pointing to binary blob' '
+	printf "a\0b\0c" >blob1 &&
+	printf "a\0c\0b" >blob2 &&
+	printf "\0a\0b\0c" >blob3 &&
+	printf "abc" >blob4 &&
+	printf "\0 \0 \0 " >blob5 &&
+	printf "\0 \0a\0 " >blob6 &&
+	printf "  " >blob7 &&
+	>blob8 &&
+	obj=$(git hash-object -w blob1) &&
+        git update-ref refs/myblobs/blob1 "$obj" &&
+	obj=$(git hash-object -w blob2) &&
+        git update-ref refs/myblobs/blob2 "$obj" &&
+	obj=$(git hash-object -w blob3) &&
+        git update-ref refs/myblobs/blob3 "$obj" &&
+	obj=$(git hash-object -w blob4) &&
+        git update-ref refs/myblobs/blob4 "$obj" &&
+	obj=$(git hash-object -w blob5) &&
+        git update-ref refs/myblobs/blob5 "$obj" &&
+	obj=$(git hash-object -w blob6) &&
+        git update-ref refs/myblobs/blob6 "$obj" &&
+	obj=$(git hash-object -w blob7) &&
+        git update-ref refs/myblobs/blob7 "$obj" &&
+	obj=$(git hash-object -w blob8) &&
+        git update-ref refs/myblobs/blob8 "$obj"
+'
+
+test_expect_success 'Verify sorts with raw' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob8
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/myblobs/blob3
+	refs/myblobs/blob7
+	refs/mytrees/first
+	refs/myblobs/first
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob4
+	refs/heads/main
+	EOF
+	git for-each-ref --format="%(refname)" --sort=raw \
+		refs/heads/main refs/myblobs/ refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'Verify sorts with raw:size' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob8
+	refs/myblobs/first
+	refs/myblobs/blob7
+	refs/heads/main
+	refs/myblobs/blob4
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob3
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/mytrees/first
+	EOF
+	git for-each-ref --format="%(refname)" --sort=raw:size \
+		refs/heads/main refs/myblobs/ refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'validate raw atom with %(if:equals)' '
+	cat >expected <<-EOF &&
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	refs/myblobs/blob4
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	EOF
+	git for-each-ref --format="%(if:equals=abc)%(raw)%(then)%(refname)%(else)not equals%(end)" \
+		refs/myblobs/ refs/heads/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'validate raw atom with %(if:notequals)' '
+	cat >expected <<-EOF &&
+	refs/heads/ambiguous
+	refs/heads/main
+	refs/heads/newtag
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob3
+	equals
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/myblobs/blob7
+	refs/myblobs/blob8
+	refs/myblobs/first
+	EOF
+	git for-each-ref --format="%(if:notequals=abc)%(raw)%(then)%(refname)%(else)equals%(end)" \
+		refs/myblobs/ refs/heads/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'empty raw refs with %(if)' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob1 not empty
+	refs/myblobs/blob2 not empty
+	refs/myblobs/blob3 not empty
+	refs/myblobs/blob4 not empty
+	refs/myblobs/blob5 not empty
+	refs/myblobs/blob6 not empty
+	refs/myblobs/blob7 empty
+	refs/myblobs/blob8 empty
+	refs/myblobs/first not empty
+	EOF
+	git for-each-ref --format="%(refname) %(if)%(raw)%(then)not empty%(else)empty%(end)" \
+		refs/myblobs/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success '%(raw) with --python must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --python
+'
+
+test_expect_success '%(raw) with --tcl must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --tcl
+'
+
+test_expect_success '%(raw) with --perl must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --perl
+'
+
+test_expect_success '%(raw) with --shell must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --shell
+'
+
+test_expect_success '%(raw) with --shell and --sort=raw must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --sort=raw --shell
+'
+
+test_expect_success '%(raw:size) with --shell' '
+	git for-each-ref --format="%(raw:size)" | while read line
+	do
+		echo "'\''$line'\''" >>expect
+	done &&
+	git for-each-ref --format="%(raw:size)" --shell >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'for-each-ref --format compare with cat-file --batch' '
+	git rev-parse refs/mytrees/first | git cat-file --batch >expected &&
+	git for-each-ref --format="%(objectname) %(objecttype) %(objectsize)
+%(raw)" refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
 test_expect_success 'set up multiple-sort tags' '
 	for when in 100000 200000
 	do
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v4 03/14] [GSOC] ref-filter: --format=%(raw) re-support --perl
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  2021-06-22  3:20       ` [PATCH v4 01/14] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
  2021-06-22  3:20       ` [PATCH v4 02/14] [GSOC] ref-filter: add %(raw) atom ZheNing Hu via GitGitGadget
@ 2021-06-22  3:20       ` ZheNing Hu via GitGitGadget
  2021-06-22  3:20       ` [PATCH v4 04/14] [GSOC] ref-filter: use non-const ref_format in *_atom_parser() ZheNing Hu via GitGitGadget
                         ` (11 subsequent siblings)
  14 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-22  3:20 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Because the perl language can handle binary data correctly,
add the function perl_quote_buf_with_len(), which can specify
the length of the data and prevent the data from being truncated
at '\0' to help `--format="%(raw)"` re-support `--perl`.

Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-for-each-ref.txt |  2 +-
 quote.c                            | 17 +++++++++++++++++
 quote.h                            |  1 +
 ref-filter.c                       | 15 +++++++++++----
 t/t6300-for-each-ref.sh            | 19 +++++++++++++++++--
 5 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index 7f1f0a1ca3b6..ea9b438c16f6 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -241,7 +241,7 @@ raw:size::
 	The raw data size of the object.
 
 Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
-`--perl` because the host language may not support arbitrary binary data in the
+because the host language may not support arbitrary binary data in the
 variables of its string type.
 
 The message in a commit or a tag object is `contents`, from which
diff --git a/quote.c b/quote.c
index 8a3a5e39eb12..26719d21d1e7 100644
--- a/quote.c
+++ b/quote.c
@@ -471,6 +471,23 @@ void perl_quote_buf(struct strbuf *sb, const char *src)
 	strbuf_addch(sb, sq);
 }
 
+void perl_quote_buf_with_len(struct strbuf *sb, const char *src, size_t len)
+{
+	const char sq = '\'';
+	const char bq = '\\';
+	const char *c = src;
+	const char *end = src + len;
+
+	strbuf_addch(sb, sq);
+	while (c != end) {
+		if (*c == sq || *c == bq)
+			strbuf_addch(sb, bq);
+		strbuf_addch(sb, *c);
+		c++;
+	}
+	strbuf_addch(sb, sq);
+}
+
 void python_quote_buf(struct strbuf *sb, const char *src)
 {
 	const char sq = '\'';
diff --git a/quote.h b/quote.h
index 768cc6338e27..0fe69e264b0e 100644
--- a/quote.h
+++ b/quote.h
@@ -94,6 +94,7 @@ char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigne
 
 /* quoting as a string literal for other languages */
 void perl_quote_buf(struct strbuf *sb, const char *src);
+void perl_quote_buf_with_len(struct strbuf *sb, const char *src, size_t len);
 void python_quote_buf(struct strbuf *sb, const char *src);
 void tcl_quote_buf(struct strbuf *sb, const char *src);
 void basic_regex_quote_buf(struct strbuf *sb, const char *src);
diff --git a/ref-filter.c b/ref-filter.c
index 7822be903071..797b20ffa612 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -742,7 +742,10 @@ static void quote_formatting(struct strbuf *s, const char *str, size_t len, int
 		sq_quote_buf(s, str);
 		break;
 	case QUOTE_PERL:
-		perl_quote_buf(s, str);
+		if (len != ATOM_VALUE_S_SIZE_INIT)
+			perl_quote_buf_with_len(s, str, len);
+		else
+			perl_quote_buf(s, str);
 		break;
 	case QUOTE_PYTHON:
 		python_quote_buf(s, str);
@@ -1006,10 +1009,14 @@ int verify_ref_format(struct ref_format *format)
 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
 		if (at < 0)
 			die("%s", err.buf);
-		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
-		    used_atom[at].u.raw_data.option == RAW_BARE)
+
+		if ((format->quote_style == QUOTE_PYTHON ||
+		     format->quote_style == QUOTE_SHELL ||
+		     format->quote_style == QUOTE_TCL) &&
+		     used_atom[at].atom_type == ATOM_RAW &&
+		     used_atom[at].u.raw_data.option == RAW_BARE)
 			die(_("--format=%.*s cannot be used with"
-			      "--python, --shell, --tcl, --perl"), (int)(ep - sp - 2), sp + 2);
+			      "--python, --shell, --tcl"), (int)(ep - sp - 2), sp + 2);
 		cp = ep + 1;
 
 		if (skip_prefix(used_atom[at].name, "color:", &color))
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 9c5379e2f56f..5556063c347d 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -915,8 +915,23 @@ test_expect_success '%(raw) with --tcl must fail' '
 	test_must_fail git for-each-ref --format="%(raw)" --tcl
 '
 
-test_expect_success '%(raw) with --perl must fail' '
-	test_must_fail git for-each-ref --format="%(raw)" --perl
+test_expect_success '%(raw) with --perl' '
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/blob1 --perl | perl > actual &&
+	cmp blob1 actual &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/blob3 --perl | perl > actual &&
+	cmp blob3 actual &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/blob8 --perl | perl > actual &&
+	cmp blob8 actual &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/first --perl | perl > actual &&
+	cmp one actual &&
+	git cat-file tree refs/mytrees/first > expected &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/mytrees/first --perl | perl > actual &&
+	cmp expected actual
 '
 
 test_expect_success '%(raw) with --shell must fail' '
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v4 04/14] [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                         ` (2 preceding siblings ...)
  2021-06-22  3:20       ` [PATCH v4 03/14] [GSOC] ref-filter: --format=%(raw) re-support --perl ZheNing Hu via GitGitGadget
@ 2021-06-22  3:20       ` ZheNing Hu via GitGitGadget
  2021-06-22  3:20       ` [PATCH v4 05/14] [GSOC] ref-filter: add %(rest) atom ZheNing Hu via GitGitGadget
                         ` (10 subsequent siblings)
  14 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-22  3:20 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Use non-const ref_format in *_atom_parser(), which can help us
modify the members of ref_format in *_atom_parser().

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/tag.c |  2 +-
 ref-filter.c  | 44 ++++++++++++++++++++++----------------------
 ref-filter.h  |  4 ++--
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/builtin/tag.c b/builtin/tag.c
index 82fcfc098242..452558ec9575 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -146,7 +146,7 @@ static int verify_tag(const char *name, const char *ref,
 		      const struct object_id *oid, void *cb_data)
 {
 	int flags;
-	const struct ref_format *format = cb_data;
+	struct ref_format *format = cb_data;
 	flags = GPG_VERIFY_VERBOSE;
 
 	if (format->format)
diff --git a/ref-filter.c b/ref-filter.c
index 797b20ffa612..d01a0266fb89 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -226,7 +226,7 @@ static int strbuf_addf_ret(struct strbuf *sb, int ret, const char *fmt, ...)
 	return ret;
 }
 
-static int color_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int color_atom_parser(struct ref_format *format, struct used_atom *atom,
 			     const char *color_value, struct strbuf *err)
 {
 	if (!color_value)
@@ -264,7 +264,7 @@ static int refname_atom_parser_internal(struct refname_atom *atom, const char *a
 	return 0;
 }
 
-static int remote_ref_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int remote_ref_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	struct string_list params = STRING_LIST_INIT_DUP;
@@ -311,7 +311,7 @@ static int remote_ref_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int objecttype_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int objecttype_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -323,7 +323,7 @@ static int objecttype_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int objectsize_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int objectsize_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	if (!arg) {
@@ -343,7 +343,7 @@ static int objectsize_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int deltabase_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int deltabase_atom_parser(struct ref_format *format, struct used_atom *atom,
 				 const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -355,7 +355,7 @@ static int deltabase_atom_parser(const struct ref_format *format, struct used_at
 	return 0;
 }
 
-static int body_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int body_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -364,7 +364,7 @@ static int body_atom_parser(const struct ref_format *format, struct used_atom *a
 	return 0;
 }
 
-static int subject_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int subject_atom_parser(struct ref_format *format, struct used_atom *atom,
 			       const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -376,7 +376,7 @@ static int subject_atom_parser(const struct ref_format *format, struct used_atom
 	return 0;
 }
 
-static int trailers_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int trailers_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	atom->u.contents.trailer_opts.no_divider = 1;
@@ -402,7 +402,7 @@ static int trailers_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
-static int contents_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int contents_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -430,7 +430,7 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
-static int raw_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int raw_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -442,7 +442,7 @@ static int raw_atom_parser(const struct ref_format *format, struct used_atom *at
 	return 0;
 }
 
-static int oid_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int oid_atom_parser(struct ref_format *format, struct used_atom *atom,
 			   const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -461,7 +461,7 @@ static int oid_atom_parser(const struct ref_format *format, struct used_atom *at
 	return 0;
 }
 
-static int person_email_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int person_email_atom_parser(struct ref_format *format, struct used_atom *atom,
 				    const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -475,7 +475,7 @@ static int person_email_atom_parser(const struct ref_format *format, struct used
 	return 0;
 }
 
-static int refname_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int refname_atom_parser(struct ref_format *format, struct used_atom *atom,
 			       const char *arg, struct strbuf *err)
 {
 	return refname_atom_parser_internal(&atom->u.refname, arg, atom->name, err);
@@ -492,7 +492,7 @@ static align_type parse_align_position(const char *s)
 	return -1;
 }
 
-static int align_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int align_atom_parser(struct ref_format *format, struct used_atom *atom,
 			     const char *arg, struct strbuf *err)
 {
 	struct align *align = &atom->u.align;
@@ -544,7 +544,7 @@ static int align_atom_parser(const struct ref_format *format, struct used_atom *
 	return 0;
 }
 
-static int if_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int if_atom_parser(struct ref_format *format, struct used_atom *atom,
 			  const char *arg, struct strbuf *err)
 {
 	if (!arg) {
@@ -559,7 +559,7 @@ static int if_atom_parser(const struct ref_format *format, struct used_atom *ato
 	return 0;
 }
 
-static int head_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int head_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *unused_err)
 {
 	atom->u.head = resolve_refdup("HEAD", RESOLVE_REF_READING, NULL, NULL);
@@ -570,7 +570,7 @@ static struct {
 	const char *name;
 	info_source source;
 	cmp_type cmp_type;
-	int (*parser)(const struct ref_format *format, struct used_atom *atom,
+	int (*parser)(struct ref_format *format, struct used_atom *atom,
 		      const char *arg, struct strbuf *err);
 } valid_atom[] = {
 	[ATOM_REFNAME] = { "refname", SOURCE_NONE, FIELD_STR, refname_atom_parser },
@@ -649,7 +649,7 @@ struct atom_value {
 /*
  * Used to parse format string and sort specifiers
  */
-static int parse_ref_filter_atom(const struct ref_format *format,
+static int parse_ref_filter_atom(struct ref_format *format,
 				 const char *atom, const char *ep,
 				 struct strbuf *err)
 {
@@ -2553,9 +2553,9 @@ static void append_literal(const char *cp, const char *ep, struct ref_formatting
 }
 
 int format_ref_array_item(struct ref_array_item *info,
-			   const struct ref_format *format,
-			   struct strbuf *final_buf,
-			   struct strbuf *error_buf)
+			  struct ref_format *format,
+			  struct strbuf *final_buf,
+			  struct strbuf *error_buf)
 {
 	const char *cp, *sp, *ep;
 	struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
@@ -2600,7 +2600,7 @@ int format_ref_array_item(struct ref_array_item *info,
 }
 
 void pretty_print_ref(const char *name, const struct object_id *oid,
-		      const struct ref_format *format)
+		      struct ref_format *format)
 {
 	struct ref_array_item *ref_item;
 	struct strbuf output = STRBUF_INIT;
diff --git a/ref-filter.h b/ref-filter.h
index baf72a718965..74fb423fc89f 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -116,7 +116,7 @@ void ref_array_sort(struct ref_sorting *sort, struct ref_array *array);
 void ref_sorting_set_sort_flags_all(struct ref_sorting *sorting, unsigned int mask, int on);
 /*  Based on the given format and quote_style, fill the strbuf */
 int format_ref_array_item(struct ref_array_item *info,
-			  const struct ref_format *format,
+			  struct ref_format *format,
 			  struct strbuf *final_buf,
 			  struct strbuf *error_buf);
 /*  Parse a single sort specifier and add it to the list */
@@ -137,7 +137,7 @@ void setup_ref_filter_porcelain_msg(void);
  * name must be a fully qualified refname.
  */
 void pretty_print_ref(const char *name, const struct object_id *oid,
-		      const struct ref_format *format);
+		      struct ref_format *format);
 
 /*
  * Push a single ref onto the array; this can be used to construct your own
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v4 05/14] [GSOC] ref-filter: add %(rest) atom
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                         ` (3 preceding siblings ...)
  2021-06-22  3:20       ` [PATCH v4 04/14] [GSOC] ref-filter: use non-const ref_format in *_atom_parser() ZheNing Hu via GitGitGadget
@ 2021-06-22  3:20       ` ZheNing Hu via GitGitGadget
  2021-06-22  3:20       ` [PATCH v4 06/14] [GSOC] ref-filter: pass get_object() return value to their callers ZheNing Hu via GitGitGadget
                         ` (9 subsequent siblings)
  14 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-22  3:20 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

In order to let "cat-file --batch=%(rest)" use the ref-filter
interface, add %(rest) atom for ref-filter. "git for-each-ref",
"git branch", "git tag" and "git verify-tag" will reject %(rest)
by default.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c             | 21 +++++++++++++++++++++
 ref-filter.h             |  5 ++++-
 t/t3203-branch-output.sh |  4 ++++
 t/t6300-for-each-ref.sh  |  4 ++++
 t/t7004-tag.sh           |  4 ++++
 t/t7030-verify-tag.sh    |  4 ++++
 6 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/ref-filter.c b/ref-filter.c
index d01a0266fb89..10c78de9cfa4 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -157,6 +157,7 @@ enum atom_type {
 	ATOM_IF,
 	ATOM_THEN,
 	ATOM_ELSE,
+	ATOM_REST,
 };
 
 /*
@@ -559,6 +560,15 @@ static int if_atom_parser(struct ref_format *format, struct used_atom *atom,
 	return 0;
 }
 
+static int rest_atom_parser(struct ref_format *format, struct used_atom *atom,
+			    const char *arg, struct strbuf *err)
+{
+	if (arg)
+		return strbuf_addf_ret(err, -1, _("%%(rest) does not take arguments"));
+	format->use_rest = 1;
+	return 0;
+}
+
 static int head_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *unused_err)
 {
@@ -615,6 +625,7 @@ static struct {
 	[ATOM_IF] = { "if", SOURCE_NONE, FIELD_STR, if_atom_parser },
 	[ATOM_THEN] = { "then", SOURCE_NONE },
 	[ATOM_ELSE] = { "else", SOURCE_NONE },
+	[ATOM_REST] = { "rest", SOURCE_NONE, FIELD_STR, rest_atom_parser },
 	/*
 	 * Please update $__git_ref_fieldlist in git-completion.bash
 	 * when you add new atoms
@@ -1010,6 +1021,9 @@ int verify_ref_format(struct ref_format *format)
 		if (at < 0)
 			die("%s", err.buf);
 
+		if (used_atom[at].atom_type == ATOM_REST)
+			die("this command reject atom %%(%.*s)", (int)(ep - sp - 2), sp + 2);
+
 		if ((format->quote_style == QUOTE_PYTHON ||
 		     format->quote_style == QUOTE_SHELL ||
 		     format->quote_style == QUOTE_TCL) &&
@@ -1927,6 +1941,12 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 			v->handler = else_atom_handler;
 			v->s = xstrdup("");
 			continue;
+		} else if (atom_type == ATOM_REST) {
+			if (ref->rest)
+				v->s = xstrdup(ref->rest);
+			else
+				v->s = xstrdup("");
+			continue;
 		} else
 			continue;
 
@@ -2144,6 +2164,7 @@ static struct ref_array_item *new_ref_array_item(const char *refname,
 
 	FLEX_ALLOC_STR(ref, refname, refname);
 	oidcpy(&ref->objectname, oid);
+	ref->rest = NULL;
 
 	return ref;
 }
diff --git a/ref-filter.h b/ref-filter.h
index 74fb423fc89f..c15dee8d6b95 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -38,6 +38,7 @@ struct ref_sorting {
 
 struct ref_array_item {
 	struct object_id objectname;
+	const char *rest;
 	int flag;
 	unsigned int kind;
 	const char *symref;
@@ -76,14 +77,16 @@ struct ref_format {
 	 * verify_ref_format() afterwards to finalize.
 	 */
 	const char *format;
+	const char *rest;
 	int quote_style;
+	int use_rest;
 	int use_color;
 
 	/* Internal state to ref-filter */
 	int need_color_reset_at_eol;
 };
 
-#define REF_FORMAT_INIT { NULL, 0, -1 }
+#define REF_FORMAT_INIT { .use_color = -1 }
 
 /*  Macros for checking --merged and --no-merged options */
 #define _OPT_MERGED_NO_MERGED(option, filter, h) \
diff --git a/t/t3203-branch-output.sh b/t/t3203-branch-output.sh
index 5325b9f67a00..6e94c6db7b5a 100755
--- a/t/t3203-branch-output.sh
+++ b/t/t3203-branch-output.sh
@@ -340,6 +340,10 @@ test_expect_success 'git branch --format option' '
 	test_cmp expect actual
 '
 
+test_expect_success 'git branch with --format=%(rest) must fail' '
+	test_must_fail git branch --format="%(rest)" >actual
+'
+
 test_expect_success 'worktree colors correct' '
 	cat >expect <<-EOF &&
 	* <GREEN>(HEAD detached from fromtag)<RESET>
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 5556063c347d..82c0ad2cb115 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -1211,6 +1211,10 @@ test_expect_success 'basic atom: head contents:trailers' '
 	test_cmp expect actual.clean
 '
 
+test_expect_success 'basic atom: rest must fail' '
+	test_must_fail git for-each-ref --format="%(rest)" refs/heads/main
+'
+
 test_expect_success 'trailer parsing not fooled by --- line' '
 	git commit --allow-empty -F - <<-\EOF &&
 	this is the subject
diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
index 2f72c5c6883e..082be85dffc7 100755
--- a/t/t7004-tag.sh
+++ b/t/t7004-tag.sh
@@ -1998,6 +1998,10 @@ test_expect_success '--format should list tags as per format given' '
 	test_cmp expect actual
 '
 
+test_expect_success 'git tag -l with --format="%(rest)" must fail' '
+	test_must_fail git tag -l --format="%(rest)" "v1*"
+'
+
 test_expect_success "set up color tests" '
 	echo "<RED>v1.0<RESET>" >expect.color &&
 	echo "v1.0" >expect.bare &&
diff --git a/t/t7030-verify-tag.sh b/t/t7030-verify-tag.sh
index 3cefde9602bf..10faa645157e 100755
--- a/t/t7030-verify-tag.sh
+++ b/t/t7030-verify-tag.sh
@@ -194,6 +194,10 @@ test_expect_success GPG 'verifying tag with --format' '
 	test_cmp expect actual
 '
 
+test_expect_success GPG 'verifying tag with --format="%(rest)" must fail' '
+	test_must_fail git verify-tag --format="%(rest)" "fourth-signed"
+'
+
 test_expect_success GPG 'verifying a forged tag with --format should fail silently' '
 	test_must_fail git verify-tag --format="tagname : %(tag)" $(cat forged1.tag) >actual-forged &&
 	test_must_be_empty actual-forged
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v4 06/14] [GSOC] ref-filter: pass get_object() return value to their callers
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                         ` (4 preceding siblings ...)
  2021-06-22  3:20       ` [PATCH v4 05/14] [GSOC] ref-filter: add %(rest) atom ZheNing Hu via GitGitGadget
@ 2021-06-22  3:20       ` ZheNing Hu via GitGitGadget
  2021-06-24  4:02         ` Bagas Sanjaya
  2021-06-22  3:20       ` [PATCH v4 07/14] [GSOC] ref-filter: introduce free_ref_array_item_value() function ZheNing Hu via GitGitGadget
                         ` (8 subsequent siblings)
  14 siblings, 1 reply; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-22  3:20 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Since in the refactor of `git cat-file --batch` later,
oid_object_info_extended() in get_object() will be used to obtain
the info of an object with it's oid. When the object cannot be
obtained in the git repository, `cat-file --batch` expects to output
"<oid> missing" and continue the next oid query instead of letting
Git exit. In other error conditions, Git should exit normally. So we
can achieve this function by passing the return value of get_object().

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 10c78de9cfa4..58def6ccd33a 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1816,6 +1816,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 {
 	struct object *obj;
 	int i;
+	int ret = 0;
 	struct object_info empty = OBJECT_INFO_INIT;
 
 	CALLOC_ARRAY(ref->value, used_atom_cnt);
@@ -1972,8 +1973,9 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 
 
 	oi.oid = ref->objectname;
-	if (get_object(ref, 0, &obj, &oi, err))
-		return -1;
+	ret = get_object(ref, 0, &obj, &oi, err);
+	if (ret)
+		return ret;
 
 	/*
 	 * If there is no atom that wants to know about tagged
@@ -2005,8 +2007,10 @@ static int get_ref_atom_value(struct ref_array_item *ref, int atom,
 			      struct atom_value **v, struct strbuf *err)
 {
 	if (!ref->value) {
-		if (populate_value(ref, err))
-			return -1;
+		int ret = populate_value(ref, err);
+
+		if (ret)
+			return ret;
 		fill_missing_values(ref->value);
 	}
 	*v = &ref->value[atom];
@@ -2580,6 +2584,7 @@ int format_ref_array_item(struct ref_array_item *info,
 {
 	const char *cp, *sp, *ep;
 	struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
+	int ret = 0;
 
 	state.quote_style = format->quote_style;
 	push_stack_element(&state.stack);
@@ -2592,10 +2597,10 @@ int format_ref_array_item(struct ref_array_item *info,
 		if (cp < sp)
 			append_literal(cp, sp, &state);
 		pos = parse_ref_filter_atom(format, sp + 2, ep, error_buf);
-		if (pos < 0 || get_ref_atom_value(info, pos, &atomv, error_buf) ||
+		if (pos < 0 || (ret = get_ref_atom_value(info, pos, &atomv, error_buf)) ||
 		    atomv->handler(atomv, &state, error_buf)) {
 			pop_stack_element(&state.stack);
-			return -1;
+			return ret ? ret : -1;
 		}
 	}
 	if (*cp) {
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v4 07/14] [GSOC] ref-filter: introduce free_ref_array_item_value() function
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                         ` (5 preceding siblings ...)
  2021-06-22  3:20       ` [PATCH v4 06/14] [GSOC] ref-filter: pass get_object() return value to their callers ZheNing Hu via GitGitGadget
@ 2021-06-22  3:20       ` ZheNing Hu via GitGitGadget
  2021-06-22  3:20       ` [PATCH v4 08/14] [GSOC] ref-filter: add cat_file_mode in struct ref_format ZheNing Hu via GitGitGadget
                         ` (7 subsequent siblings)
  14 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-22  3:20 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

When we use ref_array_item which is not dynamically allocated and
want to free the space of its member "value" after the end of use,
free_array_item() does not meet our needs, because it tries to free
ref_array_item itself and its member "symref".

Introduce free_ref_array_item_value() for freeing ref_array_item value.
It will be called internally by free_array_item(), and it will help
`cat-file --batch` free ref_array_item's value memory later.

Helped-by: Junio C Hamano <gitster@pobox.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 11 ++++++++---
 ref-filter.h |  2 ++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 58def6ccd33a..22315d4809dc 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -2291,16 +2291,21 @@ static int ref_filter_handler(const char *refname, const struct object_id *oid,
 	return 0;
 }
 
-/*  Free memory allocated for a ref_array_item */
-static void free_array_item(struct ref_array_item *item)
+void free_ref_array_item_value(struct ref_array_item *item)
 {
-	free((char *)item->symref);
 	if (item->value) {
 		int i;
 		for (i = 0; i < used_atom_cnt; i++)
 			free((char *)item->value[i].s);
 		free(item->value);
 	}
+}
+
+/*  Free memory allocated for a ref_array_item */
+static void free_array_item(struct ref_array_item *item)
+{
+	free((char *)item->symref);
+	free_ref_array_item_value(item);
 	free(item);
 }
 
diff --git a/ref-filter.h b/ref-filter.h
index c15dee8d6b95..44e6dc05ac2f 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -111,6 +111,8 @@ struct ref_format {
 int filter_refs(struct ref_array *array, struct ref_filter *filter, unsigned int type);
 /*  Clear all memory allocated to ref_array */
 void ref_array_clear(struct ref_array *array);
+/* Free ref_array_item's value */
+void free_ref_array_item_value(struct ref_array_item *item);
 /*  Used to verify if the given format is correct and to parse out the used atoms */
 int verify_ref_format(struct ref_format *format);
 /*  Sort the given ref_array as per the ref_sorting provided */
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v4 08/14] [GSOC] ref-filter: add cat_file_mode in struct ref_format
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                         ` (6 preceding siblings ...)
  2021-06-22  3:20       ` [PATCH v4 07/14] [GSOC] ref-filter: introduce free_ref_array_item_value() function ZheNing Hu via GitGitGadget
@ 2021-06-22  3:20       ` ZheNing Hu via GitGitGadget
  2021-06-22  3:20       ` [PATCH v4 09/14] [GSOC] ref-filter: modify the error message and value in get_object ZheNing Hu via GitGitGadget
                         ` (6 subsequent siblings)
  14 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-22  3:20 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Add `cat_file_mode` member in struct `ref_format`, when
`cat-file --batch` use ref-filter logic later, it can help
us reject atoms in verify_ref_format() which cat-file cannot
use, e.g. `%(refname)`, `%(push)`, `%(upstream)`...

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 11 +++++++++--
 ref-filter.h |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 22315d4809dc..f21f41df0d88 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1021,8 +1021,15 @@ int verify_ref_format(struct ref_format *format)
 		if (at < 0)
 			die("%s", err.buf);
 
-		if (used_atom[at].atom_type == ATOM_REST)
-			die("this command reject atom %%(%.*s)", (int)(ep - sp - 2), sp + 2);
+		if ((!format->cat_file_mode && used_atom[at].atom_type == ATOM_REST) ||
+		    (format->cat_file_mode && (used_atom[at].atom_type == ATOM_FLAG ||
+					       used_atom[at].atom_type == ATOM_HEAD ||
+					       used_atom[at].atom_type == ATOM_PUSH ||
+					       used_atom[at].atom_type == ATOM_REFNAME ||
+					       used_atom[at].atom_type == ATOM_SYMREF ||
+					       used_atom[at].atom_type == ATOM_UPSTREAM ||
+					       used_atom[at].atom_type == ATOM_WORKTREEPATH)))
+			die(_("this command reject atom %%(%.*s)"), (int)(ep - sp - 2), sp + 2);
 
 		if ((format->quote_style == QUOTE_PYTHON ||
 		     format->quote_style == QUOTE_SHELL ||
diff --git a/ref-filter.h b/ref-filter.h
index 44e6dc05ac2f..053980a6a426 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -78,6 +78,7 @@ struct ref_format {
 	 */
 	const char *format;
 	const char *rest;
+	int cat_file_mode;
 	int quote_style;
 	int use_rest;
 	int use_color;
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v4 09/14] [GSOC] ref-filter: modify the error message and value in get_object
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                         ` (7 preceding siblings ...)
  2021-06-22  3:20       ` [PATCH v4 08/14] [GSOC] ref-filter: add cat_file_mode in struct ref_format ZheNing Hu via GitGitGadget
@ 2021-06-22  3:20       ` ZheNing Hu via GitGitGadget
  2021-06-22  3:20       ` [PATCH v4 10/14] [GSOC] cat-file: add has_object_file() check ZheNing Hu via GitGitGadget
                         ` (5 subsequent siblings)
  14 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-22  3:20 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Let get_object() return 1 and print "<oid> missing" instead
of returning -1 and printing "missing object <oid> for <refname>"
if oid_object_info_extended() unable to find the data corresponding
to oid. When `cat-file --batch` use ref-filter logic later it can
help `format_ref_array_item()` just report that the object is missing
without letting Git exit.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c                   | 4 ++--
 t/t6301-for-each-ref-errors.sh | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index f21f41df0d88..181d99c92735 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1749,8 +1749,8 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 	}
 	if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
 				     OBJECT_INFO_LOOKUP_REPLACE))
-		return strbuf_addf_ret(err, -1, _("missing object %s for %s"),
-				       oid_to_hex(&oi->oid), ref->refname);
+		return strbuf_addf_ret(err, 1, _("%s missing"),
+				       oid_to_hex(&oi->oid));
 	if (oi->info.disk_sizep && oi->disk_size < 0)
 		BUG("Object size is less than zero.");
 
diff --git a/t/t6301-for-each-ref-errors.sh b/t/t6301-for-each-ref-errors.sh
index 40edf9dab534..3553f84a00c1 100755
--- a/t/t6301-for-each-ref-errors.sh
+++ b/t/t6301-for-each-ref-errors.sh
@@ -41,7 +41,7 @@ test_expect_success 'Missing objects are reported correctly' '
 	r=refs/heads/missing &&
 	echo $MISSING >.git/$r &&
 	test_when_finished "rm -f .git/$r" &&
-	echo "fatal: missing object $MISSING for $r" >missing-err &&
+	echo "fatal: $MISSING missing" >missing-err &&
 	test_must_fail git for-each-ref 2>err &&
 	test_cmp missing-err err &&
 	(
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v4 10/14] [GSOC] cat-file: add has_object_file() check
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                         ` (8 preceding siblings ...)
  2021-06-22  3:20       ` [PATCH v4 09/14] [GSOC] ref-filter: modify the error message and value in get_object ZheNing Hu via GitGitGadget
@ 2021-06-22  3:20       ` ZheNing Hu via GitGitGadget
  2021-06-22  3:20       ` [PATCH v4 11/14] [GSOC] cat-file: change batch_objects parameter name ZheNing Hu via GitGitGadget
                         ` (4 subsequent siblings)
  14 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-22  3:20 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Use `has_object_file()` in `batch_one_object()` to check
whether the input object exists. This can help us reject
the missing oid when we let `cat-file --batch` use ref-filter
logic later.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/cat-file.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 5ebf13359e83..9fd3c04ff20b 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -428,6 +428,13 @@ static void batch_one_object(const char *obj_name,
 		return;
 	}
 
+	if (!has_object_file(&data->oid)) {
+		printf("%s missing\n",
+		       obj_name ? obj_name : oid_to_hex(&data->oid));
+		fflush(stdout);
+		return;
+	}
+
 	batch_object_write(obj_name, scratch, opt, data);
 }
 
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v4 11/14] [GSOC] cat-file: change batch_objects parameter name
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                         ` (9 preceding siblings ...)
  2021-06-22  3:20       ` [PATCH v4 10/14] [GSOC] cat-file: add has_object_file() check ZheNing Hu via GitGitGadget
@ 2021-06-22  3:20       ` ZheNing Hu via GitGitGadget
  2021-06-24  4:07         ` Bagas Sanjaya
  2021-06-22  3:20       ` [PATCH v4 12/14] [GSOC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                         ` (3 subsequent siblings)
  14 siblings, 1 reply; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-22  3:20 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Since cat-file reuses ref-filter logic later will add the
formal parameter "const struct option *options" to
batch_objects(), the two synonymous parameters of "opt"
and "options" may confuse readers, so change batch_options
parameter of batch_objects() from "opt" to "batch".

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/cat-file.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 9fd3c04ff20b..cd84c39df968 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -495,7 +495,7 @@ static int batch_unordered_packed(const struct object_id *oid,
 	return batch_unordered_object(oid, data);
 }
 
-static int batch_objects(struct batch_options *opt)
+static int batch_objects(struct batch_options *batch)
 {
 	struct strbuf input = STRBUF_INIT;
 	struct strbuf output = STRBUF_INIT;
@@ -503,8 +503,8 @@ static int batch_objects(struct batch_options *opt)
 	int save_warning;
 	int retval = 0;
 
-	if (!opt->format)
-		opt->format = "%(objectname) %(objecttype) %(objectsize)";
+	if (!batch->format)
+		batch->format = "%(objectname) %(objecttype) %(objectsize)";
 
 	/*
 	 * Expand once with our special mark_query flag, which will prime the
@@ -513,13 +513,13 @@ static int batch_objects(struct batch_options *opt)
 	 */
 	memset(&data, 0, sizeof(data));
 	data.mark_query = 1;
-	strbuf_expand(&output, opt->format, expand_format, &data);
+	strbuf_expand(&output, batch->format, expand_format, &data);
 	data.mark_query = 0;
 	strbuf_release(&output);
-	if (opt->cmdmode)
+	if (batch->cmdmode)
 		data.split_on_whitespace = 1;
 
-	if (opt->all_objects) {
+	if (batch->all_objects) {
 		struct object_info empty = OBJECT_INFO_INIT;
 		if (!memcmp(&data.info, &empty, sizeof(empty)))
 			data.skip_object_info = 1;
@@ -529,20 +529,20 @@ static int batch_objects(struct batch_options *opt)
 	 * If we are printing out the object, then always fill in the type,
 	 * since we will want to decide whether or not to stream.
 	 */
-	if (opt->print_contents)
+	if (batch->print_contents)
 		data.info.typep = &data.type;
 
-	if (opt->all_objects) {
+	if (batch->all_objects) {
 		struct object_cb_data cb;
 
 		if (has_promisor_remote())
 			warning("This repository uses promisor remotes. Some objects may not be loaded.");
 
-		cb.opt = opt;
+		cb.opt = batch;
 		cb.expand = &data;
 		cb.scratch = &output;
 
-		if (opt->unordered) {
+		if (batch->unordered) {
 			struct oidset seen = OIDSET_INIT;
 
 			cb.seen = &seen;
@@ -592,7 +592,7 @@ static int batch_objects(struct batch_options *opt)
 			data.rest = p;
 		}
 
-		batch_one_object(input.buf, &output, opt, &data);
+		batch_one_object(input.buf, &output, batch, &data);
 	}
 
 	strbuf_release(&input);
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v4 12/14] [GSOC] cat-file: reuse ref-filter logic
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                         ` (10 preceding siblings ...)
  2021-06-22  3:20       ` [PATCH v4 11/14] [GSOC] cat-file: change batch_objects parameter name ZheNing Hu via GitGitGadget
@ 2021-06-22  3:20       ` ZheNing Hu via GitGitGadget
  2021-06-22  3:20       ` [PATCH v4 13/14] [GSOC] cat-file: reuse err buf in batch_object_write() ZheNing Hu via GitGitGadget
                         ` (2 subsequent siblings)
  14 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-22  3:20 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

In order to let cat-file use ref-filter logic, let's do the
following:

1. Change the type of member `format` in struct `batch_options`
to `ref_format`, we will pass it to ref-filter later.
2. Let `batch_objects()` add atoms to format, and use
`verify_ref_format()` to check atoms.
3. Use `format_ref_array_item()` in `batch_object_write()` to
get the formatted data corresponding to the object. If the
return value of `format_ref_array_item()` is equals to zero,
use `batch_write()` to print object data; else if the return
value is less than zero, use `die()` to print the error message
and exit; else if return value is greater than zero, only print
the error message, but don't exit.
4. Use free_ref_array_item_value() to free ref_array_item's
value.

Most of the atoms in `for-each-ref --format` are now supported,
such as `%(tree)`, `%(parent)`, `%(author)`, `%(tagger)`, `%(if)`,
`%(then)`, `%(else)`, `%(end)`. But these atoms will be rejected:
`%(refname)`, `%(symref)`, `%(upstream)`, `%(push)`, `%(worktreepath)`,
`%(flag)`, `%(HEAD)`, because our objects don't have a refname.

The performance for `git cat-file --batch-all-objects
--batch-check` on the Git repository itself with performance
testing tool `hyperfine` changes from 669.4 ms ±  31.1 ms to
1.134 s ±  0.063 s.

The performance for `git cat-file --batch-all-objects --batch
>/dev/null` on the Git repository itself with performance testing
tool `time` change from "27.37s user 0.29s system 98% cpu 28.089
total" to "33.69s user 1.54s system 87% cpu 40.258 total".

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-cat-file.txt |   6 +
 builtin/cat-file.c             | 244 ++++++-------------------------
 t/t1006-cat-file.sh            | 252 +++++++++++++++++++++++++++++++++
 3 files changed, 305 insertions(+), 197 deletions(-)

diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index 4eb0421b3fd9..ef8ab952b2fa 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -226,6 +226,12 @@ newline. The available atoms are:
 	after that first run of whitespace (i.e., the "rest" of the
 	line) are output in place of the `%(rest)` atom.
 
+Note that most of the atoms in `for-each-ref --format` are now supported,
+such as `%(tree)`, `%(parent)`, `%(author)`, `%(tagger)`, `%(if)`,
+`%(then)`, `%(else)`, `%(end)`. But these atoms will be rejected:
+`%(refname)`, `%(symref)`, `%(upstream)`, `%(push)`, `%(worktreepath)`,
+`%(flag)`, `%(HEAD)`. See linkgit:git-for-each-ref[1].
+
 If no format is specified, the default format is `%(objectname)
 %(objecttype) %(objectsize)`.
 
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index cd84c39df968..0e7ad038e5fb 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -16,6 +16,7 @@
 #include "packfile.h"
 #include "object-store.h"
 #include "promisor-remote.h"
+#include "ref-filter.h"
 
 struct batch_options {
 	int enabled;
@@ -25,7 +26,7 @@ struct batch_options {
 	int all_objects;
 	int unordered;
 	int cmdmode; /* may be 'w' or 'c' for --filters or --textconv */
-	const char *format;
+	struct ref_format format;
 };
 
 static const char *force_path;
@@ -195,99 +196,10 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
 
 struct expand_data {
 	struct object_id oid;
-	enum object_type type;
-	unsigned long size;
-	off_t disk_size;
 	const char *rest;
-	struct object_id delta_base_oid;
-
-	/*
-	 * If mark_query is true, we do not expand anything, but rather
-	 * just mark the object_info with items we wish to query.
-	 */
-	int mark_query;
-
-	/*
-	 * Whether to split the input on whitespace before feeding it to
-	 * get_sha1; this is decided during the mark_query phase based on
-	 * whether we have a %(rest) token in our format.
-	 */
 	int split_on_whitespace;
-
-	/*
-	 * After a mark_query run, this object_info is set up to be
-	 * passed to oid_object_info_extended. It will point to the data
-	 * elements above, so you can retrieve the response from there.
-	 */
-	struct object_info info;
-
-	/*
-	 * This flag will be true if the requested batch format and options
-	 * don't require us to call oid_object_info, which can then be
-	 * optimized out.
-	 */
-	unsigned skip_object_info : 1;
 };
 
-static int is_atom(const char *atom, const char *s, int slen)
-{
-	int alen = strlen(atom);
-	return alen == slen && !memcmp(atom, s, alen);
-}
-
-static void expand_atom(struct strbuf *sb, const char *atom, int len,
-			void *vdata)
-{
-	struct expand_data *data = vdata;
-
-	if (is_atom("objectname", atom, len)) {
-		if (!data->mark_query)
-			strbuf_addstr(sb, oid_to_hex(&data->oid));
-	} else if (is_atom("objecttype", atom, len)) {
-		if (data->mark_query)
-			data->info.typep = &data->type;
-		else
-			strbuf_addstr(sb, type_name(data->type));
-	} else if (is_atom("objectsize", atom, len)) {
-		if (data->mark_query)
-			data->info.sizep = &data->size;
-		else
-			strbuf_addf(sb, "%"PRIuMAX , (uintmax_t)data->size);
-	} else if (is_atom("objectsize:disk", atom, len)) {
-		if (data->mark_query)
-			data->info.disk_sizep = &data->disk_size;
-		else
-			strbuf_addf(sb, "%"PRIuMAX, (uintmax_t)data->disk_size);
-	} else if (is_atom("rest", atom, len)) {
-		if (data->mark_query)
-			data->split_on_whitespace = 1;
-		else if (data->rest)
-			strbuf_addstr(sb, data->rest);
-	} else if (is_atom("deltabase", atom, len)) {
-		if (data->mark_query)
-			data->info.delta_base_oid = &data->delta_base_oid;
-		else
-			strbuf_addstr(sb,
-				      oid_to_hex(&data->delta_base_oid));
-	} else
-		die("unknown format element: %.*s", len, atom);
-}
-
-static size_t expand_format(struct strbuf *sb, const char *start, void *data)
-{
-	const char *end;
-
-	if (*start != '(')
-		return 0;
-	end = strchr(start + 1, ')');
-	if (!end)
-		die("format element '%s' does not end in ')'", start);
-
-	expand_atom(sb, start + 1, end - start - 1, data);
-
-	return end - start + 1;
-}
-
 static void batch_write(struct batch_options *opt, const void *data, int len)
 {
 	if (opt->buffer_output) {
@@ -297,87 +209,34 @@ static void batch_write(struct batch_options *opt, const void *data, int len)
 		write_or_die(1, data, len);
 }
 
-static void print_object_or_die(struct batch_options *opt, struct expand_data *data)
-{
-	const struct object_id *oid = &data->oid;
-
-	assert(data->info.typep);
-
-	if (data->type == OBJ_BLOB) {
-		if (opt->buffer_output)
-			fflush(stdout);
-		if (opt->cmdmode) {
-			char *contents;
-			unsigned long size;
-
-			if (!data->rest)
-				die("missing path for '%s'", oid_to_hex(oid));
-
-			if (opt->cmdmode == 'w') {
-				if (filter_object(data->rest, 0100644, oid,
-						  &contents, &size))
-					die("could not convert '%s' %s",
-					    oid_to_hex(oid), data->rest);
-			} else if (opt->cmdmode == 'c') {
-				enum object_type type;
-				if (!textconv_object(the_repository,
-						     data->rest, 0100644, oid,
-						     1, &contents, &size))
-					contents = read_object_file(oid,
-								    &type,
-								    &size);
-				if (!contents)
-					die("could not convert '%s' %s",
-					    oid_to_hex(oid), data->rest);
-			} else
-				BUG("invalid cmdmode: %c", opt->cmdmode);
-			batch_write(opt, contents, size);
-			free(contents);
-		} else {
-			stream_blob(oid);
-		}
-	}
-	else {
-		enum object_type type;
-		unsigned long size;
-		void *contents;
-
-		contents = read_object_file(oid, &type, &size);
-		if (!contents)
-			die("object %s disappeared", oid_to_hex(oid));
-		if (type != data->type)
-			die("object %s changed type!?", oid_to_hex(oid));
-		if (data->info.sizep && size != data->size)
-			die("object %s changed size!?", oid_to_hex(oid));
-
-		batch_write(opt, contents, size);
-		free(contents);
-	}
-}
 
 static void batch_object_write(const char *obj_name,
 			       struct strbuf *scratch,
 			       struct batch_options *opt,
 			       struct expand_data *data)
 {
-	if (!data->skip_object_info &&
-	    oid_object_info_extended(the_repository, &data->oid, &data->info,
-				     OBJECT_INFO_LOOKUP_REPLACE) < 0) {
-		printf("%s missing\n",
-		       obj_name ? obj_name : oid_to_hex(&data->oid));
-		fflush(stdout);
-		return;
-	}
+	int ret = 0;
+	struct strbuf err = STRBUF_INIT;
+	struct ref_array_item item = { data->oid, data->rest };
 
 	strbuf_reset(scratch);
-	strbuf_expand(scratch, opt->format, expand_format, data);
-	strbuf_addch(scratch, '\n');
-	batch_write(opt, scratch->buf, scratch->len);
 
-	if (opt->print_contents) {
-		print_object_or_die(opt, data);
-		batch_write(opt, "\n", 1);
+	ret = format_ref_array_item(&item, &opt->format, scratch, &err);
+	if (ret < 0) {
+		die("%s\n", err.buf);
+	} if (ret) {
+		/* ret > 0 means when the object corresponding to oid
+		 * cannot be found in format_ref_array_item(), we only print
+		 * the error message.
+		 */
+		printf("%s\n", err.buf);
+		fflush(stdout);
+	} else {
+		strbuf_addch(scratch, '\n');
+		batch_write(opt, scratch->buf, scratch->len);
 	}
+	free_ref_array_item_value(&item);
+	strbuf_release(&err);
 }
 
 static void batch_one_object(const char *obj_name,
@@ -495,42 +354,34 @@ static int batch_unordered_packed(const struct object_id *oid,
 	return batch_unordered_object(oid, data);
 }
 
-static int batch_objects(struct batch_options *batch)
+static const char * const cat_file_usage[] = {
+	N_("git cat-file (-t [--allow-unknown-type] | -s [--allow-unknown-type] | -e | -p | <type> | --textconv | --filters) [--path=<path>] <object>"),
+	N_("git cat-file (--batch[=<format>] | --batch-check[=<format>]) [--follow-symlinks] [--textconv | --filters]"),
+	NULL
+};
+
+static int batch_objects(struct batch_options *batch, const struct option *options)
 {
 	struct strbuf input = STRBUF_INIT;
 	struct strbuf output = STRBUF_INIT;
+	struct strbuf format = STRBUF_INIT;
 	struct expand_data data;
 	int save_warning;
 	int retval = 0;
 
-	if (!batch->format)
-		batch->format = "%(objectname) %(objecttype) %(objectsize)";
-
-	/*
-	 * Expand once with our special mark_query flag, which will prime the
-	 * object_info to be handed to oid_object_info_extended for each
-	 * object.
-	 */
 	memset(&data, 0, sizeof(data));
-	data.mark_query = 1;
-	strbuf_expand(&output, batch->format, expand_format, &data);
-	data.mark_query = 0;
-	strbuf_release(&output);
-	if (batch->cmdmode)
-		data.split_on_whitespace = 1;
-
-	if (batch->all_objects) {
-		struct object_info empty = OBJECT_INFO_INIT;
-		if (!memcmp(&data.info, &empty, sizeof(empty)))
-			data.skip_object_info = 1;
-	}
-
-	/*
-	 * If we are printing out the object, then always fill in the type,
-	 * since we will want to decide whether or not to stream.
-	 */
+	if (batch->format.format)
+		strbuf_addstr(&format, batch->format.format);
+	else
+		strbuf_addstr(&format, "%(objectname) %(objecttype) %(objectsize)");
 	if (batch->print_contents)
-		data.info.typep = &data.type;
+		strbuf_addstr(&format, "\n%(raw)");
+	batch->format.format = format.buf;
+	if (verify_ref_format(&batch->format))
+		usage_with_options(cat_file_usage, options);
+
+	if (batch->cmdmode || batch->format.use_rest)
+		data.split_on_whitespace = 1;
 
 	if (batch->all_objects) {
 		struct object_cb_data cb;
@@ -563,6 +414,7 @@ static int batch_objects(struct batch_options *batch)
 			oid_array_clear(&sa);
 		}
 
+		strbuf_release(&format);
 		strbuf_release(&output);
 		return 0;
 	}
@@ -595,18 +447,13 @@ static int batch_objects(struct batch_options *batch)
 		batch_one_object(input.buf, &output, batch, &data);
 	}
 
+	strbuf_release(&format);
 	strbuf_release(&input);
 	strbuf_release(&output);
 	warn_on_object_refname_ambiguity = save_warning;
 	return retval;
 }
 
-static const char * const cat_file_usage[] = {
-	N_("git cat-file (-t [--allow-unknown-type] | -s [--allow-unknown-type] | -e | -p | <type> | --textconv | --filters) [--path=<path>] <object>"),
-	N_("git cat-file (--batch[=<format>] | --batch-check[=<format>]) [--follow-symlinks] [--textconv | --filters]"),
-	NULL
-};
-
 static int git_cat_file_config(const char *var, const char *value, void *cb)
 {
 	if (userdiff_config(var, value) < 0)
@@ -629,7 +476,7 @@ static int batch_option_callback(const struct option *opt,
 
 	bo->enabled = 1;
 	bo->print_contents = !strcmp(opt->long_name, "batch");
-	bo->format = arg;
+	bo->format.format = arg;
 
 	return 0;
 }
@@ -638,7 +485,9 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 {
 	int opt = 0;
 	const char *exp_type = NULL, *obj_name = NULL;
-	struct batch_options batch = {0};
+	struct batch_options batch = {
+		.format = REF_FORMAT_INIT
+	};
 	int unknown_type = 0;
 
 	const struct option options[] = {
@@ -677,6 +526,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 	git_config(git_cat_file_config, NULL);
 
 	batch.buffer_output = -1;
+	batch.format.cat_file_mode = 1;
 	argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
 
 	if (opt) {
@@ -720,7 +570,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		batch.buffer_output = batch.all_objects;
 
 	if (batch.enabled)
-		return batch_objects(&batch);
+		return batch_objects(&batch, options);
 
 	if (unknown_type && opt != 't' && opt != 's')
 		die("git cat-file --allow-unknown-type: use with -s or -t");
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 5d2dc99b74ad..69eb627774d9 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -586,4 +586,256 @@ test_expect_success 'cat-file --unordered works' '
 	test_cmp expect actual
 '
 
+. "$TEST_DIRECTORY"/lib-gpg.sh
+. "$TEST_DIRECTORY"/lib-terminal.sh
+
+test_expect_success 'cat-file --batch|--batch-check setup' '
+	echo 1>blob1 &&
+	printf "a\0b\0\c" >blob2 &&
+	git add blob1 blob2 &&
+	git commit -m "Commit Message" &&
+	git branch -M main &&
+	git tag -a -m "v0.0.0" testtag &&
+	git update-ref refs/myblobs/blob1 HEAD:blob1 &&
+	git update-ref refs/myblobs/blob2 HEAD:blob2 &&
+	git update-ref refs/mytrees/tree1 HEAD^{tree}
+'
+
+batch_test_atom() {
+	if test "$3" = "fail"
+	then
+		test_expect_${4:-success} $PREREQ "basic atom: $1 $2 must fail" "
+			test_must_fail git cat-file --batch-check='$2' >bad <<-EOF
+			$1
+			EOF
+		"
+	else
+		test_expect_${4:-success} $PREREQ "basic atom: $1 $2" "
+			git for-each-ref --format='$2' $1 >expected &&
+			git cat-file --batch-check='$2' >actual <<-EOF &&
+			$1
+			EOF
+			sanitize_pgp <actual >actual.clean &&
+			cmp expected actual.clean
+		"
+	fi
+}
+
+batch_test_atom refs/heads/main '%(refname)' fail
+batch_test_atom refs/heads/main '%(refname:)' fail
+batch_test_atom refs/heads/main '%(refname:short)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=1)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=2)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=-1)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=-2)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=1)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=2)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=-1)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=-2)' fail
+batch_test_atom refs/heads/main '%(refname:strip=1)' fail
+batch_test_atom refs/heads/main '%(refname:strip=2)' fail
+batch_test_atom refs/heads/main '%(refname:strip=-1)' fail
+batch_test_atom refs/heads/main '%(refname:strip=-2)' fail
+batch_test_atom refs/heads/main '%(upstream)' fail
+batch_test_atom refs/heads/main '%(upstream:short)' fail
+batch_test_atom refs/heads/main '%(upstream:lstrip=2)' fail
+batch_test_atom refs/heads/main '%(upstream:lstrip=-2)' fail
+batch_test_atom refs/heads/main '%(upstream:rstrip=2)' fail
+batch_test_atom refs/heads/main '%(upstream:rstrip=-2)' fail
+batch_test_atom refs/heads/main '%(upstream:strip=2)' fail
+batch_test_atom refs/heads/main '%(upstream:strip=-2)' fail
+batch_test_atom refs/heads/main '%(push)' fail
+batch_test_atom refs/heads/main '%(push:short)' fail
+batch_test_atom refs/heads/main '%(push:lstrip=1)' fail
+batch_test_atom refs/heads/main '%(push:lstrip=-1)' fail
+batch_test_atom refs/heads/main '%(push:rstrip=1)' fail
+batch_test_atom refs/heads/main '%(push:rstrip=-1)' fail
+batch_test_atom refs/heads/main '%(push:strip=1)' fail
+batch_test_atom refs/heads/main '%(push:strip=-1)' fail
+batch_test_atom refs/heads/main '%(objecttype)'
+batch_test_atom refs/heads/main '%(objectsize)'
+batch_test_atom refs/heads/main '%(objectsize:disk)'
+batch_test_atom refs/heads/main '%(deltabase)'
+batch_test_atom refs/heads/main '%(objectname)'
+batch_test_atom refs/heads/main '%(objectname:short)'
+batch_test_atom refs/heads/main '%(objectname:short=1)'
+batch_test_atom refs/heads/main '%(objectname:short=10)'
+batch_test_atom refs/heads/main '%(tree)'
+batch_test_atom refs/heads/main '%(tree:short)'
+batch_test_atom refs/heads/main '%(tree:short=1)'
+batch_test_atom refs/heads/main '%(tree:short=10)'
+batch_test_atom refs/heads/main '%(parent)'
+batch_test_atom refs/heads/main '%(parent:short)'
+batch_test_atom refs/heads/main '%(parent:short=1)'
+batch_test_atom refs/heads/main '%(parent:short=10)'
+batch_test_atom refs/heads/main '%(numparent)'
+batch_test_atom refs/heads/main '%(object)'
+batch_test_atom refs/heads/main '%(type)'
+batch_test_atom refs/heads/main '%(raw)'
+batch_test_atom refs/heads/main '%(*objectname)'
+batch_test_atom refs/heads/main '%(*objecttype)'
+batch_test_atom refs/heads/main '%(author)'
+batch_test_atom refs/heads/main '%(authorname)'
+batch_test_atom refs/heads/main '%(authoremail)'
+batch_test_atom refs/heads/main '%(authoremail:trim)'
+batch_test_atom refs/heads/main '%(authoremail:localpart)'
+batch_test_atom refs/heads/main '%(authordate)'
+batch_test_atom refs/heads/main '%(committer)'
+batch_test_atom refs/heads/main '%(committername)'
+batch_test_atom refs/heads/main '%(committeremail)'
+batch_test_atom refs/heads/main '%(committeremail:trim)'
+batch_test_atom refs/heads/main '%(committeremail:localpart)'
+batch_test_atom refs/heads/main '%(committerdate)'
+batch_test_atom refs/heads/main '%(tag)'
+batch_test_atom refs/heads/main '%(tagger)'
+batch_test_atom refs/heads/main '%(taggername)'
+batch_test_atom refs/heads/main '%(taggeremail)'
+batch_test_atom refs/heads/main '%(taggeremail:trim)'
+batch_test_atom refs/heads/main '%(taggeremail:localpart)'
+batch_test_atom refs/heads/main '%(taggerdate)'
+batch_test_atom refs/heads/main '%(creator)'
+batch_test_atom refs/heads/main '%(creatordate)'
+batch_test_atom refs/heads/main '%(subject)'
+batch_test_atom refs/heads/main '%(subject:sanitize)'
+batch_test_atom refs/heads/main '%(contents:subject)'
+batch_test_atom refs/heads/main '%(body)'
+batch_test_atom refs/heads/main '%(contents:body)'
+batch_test_atom refs/heads/main '%(contents:signature)'
+batch_test_atom refs/heads/main '%(contents)'
+batch_test_atom refs/heads/main '%(HEAD)' fail
+batch_test_atom refs/heads/main '%(upstream:track)' fail
+batch_test_atom refs/heads/main '%(upstream:trackshort)' fail
+batch_test_atom refs/heads/main '%(upstream:track,nobracket)' fail
+batch_test_atom refs/heads/main '%(upstream:nobracket,track)' fail
+batch_test_atom refs/heads/main '%(push:track)' fail
+batch_test_atom refs/heads/main '%(push:trackshort)' fail
+batch_test_atom refs/heads/main '%(worktreepath)' fail
+batch_test_atom refs/heads/main '%(symref)' fail
+batch_test_atom refs/heads/main '%(flag)' fail
+
+batch_test_atom refs/tags/testtag '%(refname)' fail
+batch_test_atom refs/tags/testtag '%(refname:short)' fail
+batch_test_atom refs/tags/testtag '%(upstream)' fail
+batch_test_atom refs/tags/testtag '%(push)' fail
+batch_test_atom refs/tags/testtag '%(objecttype)'
+batch_test_atom refs/tags/testtag '%(objectsize)'
+batch_test_atom refs/tags/testtag '%(objectsize:disk)'
+batch_test_atom refs/tags/testtag '%(*objectsize:disk)'
+batch_test_atom refs/tags/testtag '%(deltabase)'
+batch_test_atom refs/tags/testtag '%(*deltabase)'
+batch_test_atom refs/tags/testtag '%(objectname)'
+batch_test_atom refs/tags/testtag '%(objectname:short)'
+batch_test_atom refs/tags/testtag '%(tree)'
+batch_test_atom refs/tags/testtag '%(tree:short)'
+batch_test_atom refs/tags/testtag '%(tree:short=1)'
+batch_test_atom refs/tags/testtag '%(tree:short=10)'
+batch_test_atom refs/tags/testtag '%(parent)'
+batch_test_atom refs/tags/testtag '%(parent:short)'
+batch_test_atom refs/tags/testtag '%(parent:short=1)'
+batch_test_atom refs/tags/testtag '%(parent:short=10)'
+batch_test_atom refs/tags/testtag '%(numparent)'
+batch_test_atom refs/tags/testtag '%(object)'
+batch_test_atom refs/tags/testtag '%(type)'
+batch_test_atom refs/tags/testtag '%(*objectname)'
+batch_test_atom refs/tags/testtag '%(*objecttype)'
+batch_test_atom refs/tags/testtag '%(author)'
+batch_test_atom refs/tags/testtag '%(authorname)'
+batch_test_atom refs/tags/testtag '%(authoremail)'
+batch_test_atom refs/tags/testtag '%(authoremail:trim)'
+batch_test_atom refs/tags/testtag '%(authoremail:localpart)'
+batch_test_atom refs/tags/testtag '%(authordate)'
+batch_test_atom refs/tags/testtag '%(committer)'
+batch_test_atom refs/tags/testtag '%(committername)'
+batch_test_atom refs/tags/testtag '%(committeremail)'
+batch_test_atom refs/tags/testtag '%(committeremail:trim)'
+batch_test_atom refs/tags/testtag '%(committeremail:localpart)'
+batch_test_atom refs/tags/testtag '%(committerdate)'
+batch_test_atom refs/tags/testtag '%(tag)'
+batch_test_atom refs/tags/testtag '%(tagger)'
+batch_test_atom refs/tags/testtag '%(taggername)'
+batch_test_atom refs/tags/testtag '%(taggeremail)'
+batch_test_atom refs/tags/testtag '%(taggeremail:trim)'
+batch_test_atom refs/tags/testtag '%(taggeremail:localpart)'
+batch_test_atom refs/tags/testtag '%(taggerdate)'
+batch_test_atom refs/tags/testtag '%(creator)'
+batch_test_atom refs/tags/testtag '%(creatordate)'
+batch_test_atom refs/tags/testtag '%(subject)'
+batch_test_atom refs/tags/testtag '%(subject:sanitize)'
+batch_test_atom refs/tags/testtag '%(contents:subject)'
+batch_test_atom refs/tags/testtag '%(body)'
+batch_test_atom refs/tags/testtag '%(contents:body)'
+batch_test_atom refs/tags/testtag '%(contents:signature)'
+batch_test_atom refs/tags/testtag '%(contents)'
+batch_test_atom refs/tags/testtag '%(HEAD)' fail
+
+batch_test_atom refs/myblobs/blob1 '%(refname)' fail
+batch_test_atom refs/myblobs/blob1 '%(upstream)' fail
+batch_test_atom refs/myblobs/blob1 '%(push)' fail
+batch_test_atom refs/myblobs/blob1 '%(HEAD)' fail
+
+batch_test_atom refs/myblobs/blob1 '%(objectname)'
+batch_test_atom refs/myblobs/blob1 '%(objecttype)'
+batch_test_atom refs/myblobs/blob1 '%(objectsize)'
+batch_test_atom refs/myblobs/blob1 '%(objectsize:disk)'
+batch_test_atom refs/myblobs/blob1 '%(deltabase)'
+
+batch_test_atom refs/myblobs/blob1 '%(contents)'
+batch_test_atom refs/myblobs/blob2 '%(contents)'
+
+batch_test_atom refs/myblobs/blob1 '%(raw)'
+batch_test_atom refs/myblobs/blob2 '%(raw)'
+batch_test_atom refs/mytrees/tree1 '%(raw)'
+
+batch_test_atom refs/myblobs/blob1 '%(raw:size)'
+batch_test_atom refs/myblobs/blob2 '%(raw:size)'
+batch_test_atom refs/mytrees/tree1 '%(raw:size)'
+
+batch_test_atom refs/myblobs/blob1 '%(if:equals=blob)%(objecttype)%(then)commit%(else)not commit%(end)'
+batch_test_atom refs/myblobs/blob2 '%(if:equals=blob)%(objecttype)%(then)commit%(else)not commit%(end)'
+batch_test_atom refs/mytrees/tree1 '%(if:equals=tree)%(objecttype)%(then)tree%(else)not tree%(end)'
+
+batch_test_atom refs/heads/main '%(align:60) objectname is %(objectname)%(end)|%(objectname)'
+batch_test_atom refs/heads/main '%(align:left,60) objectname is %(objectname)%(end)|%(objectname)'
+batch_test_atom refs/heads/main '%(align:middle,60) objectname is %(objectname)%(end)|%(objectname)'
+batch_test_atom refs/heads/main '%(align:60,right) objectname is %(objectname)%(end)|%(objectname)'
+
+batch_test_atom refs/heads/main 'VALID'
+batch_test_atom refs/heads/main '%(INVALID)' fail
+batch_test_atom refs/heads/main '%(authordate:INVALID)' fail
+
+test_expect_success '%(rest) works with both a branch and a tag' '
+	cat >expected <<-EOF &&
+	123 commit 123
+	456 tag 456
+	EOF
+	git cat-file --batch-check="%(rest) %(objecttype) %(rest)" >actual <<-EOF &&
+	refs/heads/main 123
+	refs/tags/testtag 456
+	EOF
+	test_cmp expected actual
+'
+
+batch_test_atom refs/heads/main '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+batch_test_atom refs/tags/testtag '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+batch_test_atom refs/myblobs/blob1 '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+batch_test_atom refs/myblobs/blob2 '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+
+
+test_expect_success 'cat-file --batch equals to --batch-check with atoms' '
+	git cat-file --batch-check="%(objectname) %(objecttype) %(objectsize)
+%(raw)" >expected <<-EOF &&
+	refs/heads/main
+	refs/tags/testtag
+	EOF
+	git cat-file --batch >actual <<-EOF &&
+	refs/heads/main
+	refs/tags/testtag
+	EOF
+	cmp expected actual
+'
+
 test_done
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v4 13/14] [GSOC] cat-file: reuse err buf in batch_object_write()
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                         ` (11 preceding siblings ...)
  2021-06-22  3:20       ` [PATCH v4 12/14] [GSOC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
@ 2021-06-22  3:20       ` ZheNing Hu via GitGitGadget
  2021-06-22  3:20       ` [PATCH v4 14/14] [GSOC] cat-file: re-implement --textconv, --filters options ZheNing Hu via GitGitGadget
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  14 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-22  3:20 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Reuse the `err` buffer in batch_object_write(), as the
buffer `scratch` does. This will reduce the overhead
of multiple allocations of memory of the err buffer.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/cat-file.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 0e7ad038e5fb..27403326e7a7 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -212,35 +212,36 @@ static void batch_write(struct batch_options *opt, const void *data, int len)
 
 static void batch_object_write(const char *obj_name,
 			       struct strbuf *scratch,
+			       struct strbuf *err,
 			       struct batch_options *opt,
 			       struct expand_data *data)
 {
 	int ret = 0;
-	struct strbuf err = STRBUF_INIT;
 	struct ref_array_item item = { data->oid, data->rest };
 
 	strbuf_reset(scratch);
+	strbuf_reset(err);
 
-	ret = format_ref_array_item(&item, &opt->format, scratch, &err);
+	ret = format_ref_array_item(&item, &opt->format, scratch, err);
 	if (ret < 0) {
-		die("%s\n", err.buf);
+		die("%s\n", err->buf);
 	} if (ret) {
 		/* ret > 0 means when the object corresponding to oid
 		 * cannot be found in format_ref_array_item(), we only print
 		 * the error message.
 		 */
-		printf("%s\n", err.buf);
+		printf("%s\n", err->buf);
 		fflush(stdout);
 	} else {
 		strbuf_addch(scratch, '\n');
 		batch_write(opt, scratch->buf, scratch->len);
 	}
 	free_ref_array_item_value(&item);
-	strbuf_release(&err);
 }
 
 static void batch_one_object(const char *obj_name,
 			     struct strbuf *scratch,
+			     struct strbuf *err,
 			     struct batch_options *opt,
 			     struct expand_data *data)
 {
@@ -294,7 +295,7 @@ static void batch_one_object(const char *obj_name,
 		return;
 	}
 
-	batch_object_write(obj_name, scratch, opt, data);
+	batch_object_write(obj_name, scratch, err, opt, data);
 }
 
 struct object_cb_data {
@@ -302,13 +303,14 @@ struct object_cb_data {
 	struct expand_data *expand;
 	struct oidset *seen;
 	struct strbuf *scratch;
+	struct strbuf *err;
 };
 
 static int batch_object_cb(const struct object_id *oid, void *vdata)
 {
 	struct object_cb_data *data = vdata;
 	oidcpy(&data->expand->oid, oid);
-	batch_object_write(NULL, data->scratch, data->opt, data->expand);
+	batch_object_write(NULL, data->scratch, data->err, data->opt, data->expand);
 	return 0;
 }
 
@@ -364,6 +366,7 @@ static int batch_objects(struct batch_options *batch, const struct option *optio
 {
 	struct strbuf input = STRBUF_INIT;
 	struct strbuf output = STRBUF_INIT;
+	struct strbuf err = STRBUF_INIT;
 	struct strbuf format = STRBUF_INIT;
 	struct expand_data data;
 	int save_warning;
@@ -392,6 +395,7 @@ static int batch_objects(struct batch_options *batch, const struct option *optio
 		cb.opt = batch;
 		cb.expand = &data;
 		cb.scratch = &output;
+		cb.err = &err;
 
 		if (batch->unordered) {
 			struct oidset seen = OIDSET_INIT;
@@ -416,6 +420,7 @@ static int batch_objects(struct batch_options *batch, const struct option *optio
 
 		strbuf_release(&format);
 		strbuf_release(&output);
+		strbuf_release(&err);
 		return 0;
 	}
 
@@ -444,12 +449,13 @@ static int batch_objects(struct batch_options *batch, const struct option *optio
 			data.rest = p;
 		}
 
-		batch_one_object(input.buf, &output, batch, &data);
+		batch_one_object(input.buf, &output, &err, batch, &data);
 	}
 
 	strbuf_release(&format);
 	strbuf_release(&input);
 	strbuf_release(&output);
+	strbuf_release(&err);
 	warn_on_object_refname_ambiguity = save_warning;
 	return retval;
 }
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v4 14/14] [GSOC] cat-file: re-implement --textconv, --filters options
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                         ` (12 preceding siblings ...)
  2021-06-22  3:20       ` [PATCH v4 13/14] [GSOC] cat-file: reuse err buf in batch_object_write() ZheNing Hu via GitGitGadget
@ 2021-06-22  3:20       ` ZheNing Hu via GitGitGadget
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  14 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-22  3:20 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

After cat-file reuses the ref-filter logic, we re-implement the
functions of --textconv and --filters options.

Add members `use_textconv` and `use_filters` in struct `ref_format`,
and use global variables `use_filters` and `use_textconv` in
`ref-filter.c`, so that we can filter the content of the object
in get_object(). Use `actual_oi` to record the real expand_data:
it may point to the original `oi` or the `act_oi` processed by
`textconv_object()` or `convert_to_working_tree()`. `grab_values()`
will grab the contents of `actual_oi` and `grab_common_values()`
to grab the contents of origin `oi`, this ensures that `%(objectsize)`
still uses the size of the unfiltered data.

In `get_object()`, we made an optimization: Firstly, get the size and
type of the object instead of directly getting the object data.
If using --textconv, after successfully obtaining the filtered object
data, an extra oid_object_info_extended() will be skipped, which can
reduce the cost of object data copy; If using --filter, the data of
the object first will be getted first, and then convert_to_working_tree()
will be used to get the filtered object data.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/cat-file.c |  6 +++++
 ref-filter.c       | 59 ++++++++++++++++++++++++++++++++++++++++++++--
 ref-filter.h       |  2 ++
 3 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 27403326e7a7..f3140d927f7d 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -380,6 +380,12 @@ static int batch_objects(struct batch_options *batch, const struct option *optio
 	if (batch->print_contents)
 		strbuf_addstr(&format, "\n%(raw)");
 	batch->format.format = format.buf;
+
+	if (batch->cmdmode == 'c')
+		batch->format.use_textconv = 1;
+	else if (batch->cmdmode == 'w')
+		batch->format.use_filters = 1;
+
 	if (verify_ref_format(&batch->format))
 		usage_with_options(cat_file_usage, options);
 
diff --git a/ref-filter.c b/ref-filter.c
index 181d99c92735..99b87742b0fd 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1,3 +1,4 @@
+#define USE_THE_INDEX_COMPATIBILITY_MACROS
 #include "builtin.h"
 #include "cache.h"
 #include "parse-options.h"
@@ -84,6 +85,9 @@ static struct expand_data {
 	struct object_info info;
 } oi, oi_deref;
 
+int use_filters;
+int use_textconv;
+
 struct ref_to_worktree_entry {
 	struct hashmap_entry ent;
 	struct worktree *wt; /* key is wt->head_ref */
@@ -1031,6 +1035,9 @@ int verify_ref_format(struct ref_format *format)
 					       used_atom[at].atom_type == ATOM_WORKTREEPATH)))
 			die(_("this command reject atom %%(%.*s)"), (int)(ep - sp - 2), sp + 2);
 
+		use_filters = format->use_filters;
+		use_textconv = format->use_textconv;
+
 		if ((format->quote_style == QUOTE_PYTHON ||
 		     format->quote_style == QUOTE_SHELL ||
 		     format->quote_style == QUOTE_TCL) &&
@@ -1742,10 +1749,38 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 {
 	/* parse_object_buffer() will set eaten to 0 if free() will be needed */
 	int eaten = 1;
+	struct expand_data *actual_oi = oi;
+	struct expand_data act_oi = {0};
+
 	if (oi->info.contentp) {
 		/* We need to know that to use parse_object_buffer properly */
+		void **temp_contentp = oi->info.contentp;
+		oi->info.contentp = NULL;
 		oi->info.sizep = &oi->size;
 		oi->info.typep = &oi->type;
+
+		/* get the type and size */
+		if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
+					OBJECT_INFO_LOOKUP_REPLACE))
+			return strbuf_addf_ret(err, 1, _("%s missing"),
+					       oid_to_hex(&oi->oid));
+
+		oi->info.sizep = NULL;
+		oi->info.typep = NULL;
+		oi->info.contentp = temp_contentp;
+
+		if (use_textconv && !ref->rest)
+			return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
+					       oid_to_hex(&act_oi.oid));
+		if (use_textconv && oi->type == OBJ_BLOB) {
+			act_oi = *oi;
+			if (textconv_object(the_repository,
+					    ref->rest, 0100644, &act_oi.oid,
+					    1, (char **)(&act_oi.content), &act_oi.size)) {
+				actual_oi = &act_oi;
+				goto success;
+			}
+		}
 	}
 	if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
 				     OBJECT_INFO_LOOKUP_REPLACE))
@@ -1755,19 +1790,39 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 		BUG("Object size is less than zero.");
 
 	if (oi->info.contentp) {
-		*obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten);
+		if (use_filters && !ref->rest)
+			return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
+					       oid_to_hex(&oi->oid));
+		if (use_filters && oi->type == OBJ_BLOB) {
+			struct strbuf strbuf = STRBUF_INIT;
+			struct checkout_metadata meta;
+			act_oi = *oi;
+
+			init_checkout_metadata(&meta, NULL, NULL, &act_oi.oid);
+			if (!convert_to_working_tree(&the_index, ref->rest, act_oi.content, act_oi.size, &strbuf, &meta))
+				die("could not convert '%s' %s",
+					oid_to_hex(&oi->oid), ref->rest);
+			act_oi.size = strbuf.len;
+			act_oi.content = strbuf_detach(&strbuf, NULL);
+			actual_oi = &act_oi;
+		}
+
+success:
+		*obj = parse_object_buffer(the_repository, &actual_oi->oid, actual_oi->type, actual_oi->size, actual_oi->content, &eaten);
 		if (!*obj) {
 			if (!eaten)
 				free(oi->content);
 			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
 					       oid_to_hex(&oi->oid), ref->refname);
 		}
-		grab_values(ref->value, deref, *obj, oi);
+		grab_values(ref->value, deref, *obj, actual_oi);
 	}
 
 	grab_common_values(ref->value, deref, oi);
 	if (!eaten)
 		free(oi->content);
+	if (actual_oi != oi)
+		free(actual_oi->content);
 	return 0;
 }
 
diff --git a/ref-filter.h b/ref-filter.h
index 053980a6a426..497e3e93632f 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -80,6 +80,8 @@ struct ref_format {
 	const char *rest;
 	int cat_file_mode;
 	int quote_style;
+	int use_textconv;
+	int use_filters;
 	int use_rest;
 	int use_color;
 
-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v4 06/14] [GSOC] ref-filter: pass get_object() return value to their callers
  2021-06-22  3:20       ` [PATCH v4 06/14] [GSOC] ref-filter: pass get_object() return value to their callers ZheNing Hu via GitGitGadget
@ 2021-06-24  4:02         ` Bagas Sanjaya
  0 siblings, 0 replies; 121+ messages in thread
From: Bagas Sanjaya @ 2021-06-24  4:02 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget, git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Jeff King,
	Ævar Arnfjörð Bjarmason, ZheNing Hu

On 22/06/21 10.20, ZheNing Hu via GitGitGadget wrote:
> From: ZheNing Hu <adlternative@gmail.com>
> 
> Since in the refactor of `git cat-file --batch` later,
> oid_object_info_extended() in get_object() will be used to obtain
> the info of an object with it's oid. When the object cannot be
> obtained in the git repository, `cat-file --batch` expects to output
> "<oid> missing" and continue the next oid query instead of letting
> Git exit. In other error conditions, Git should exit normally. So we
> can achieve this function by passing the return value of get_object().
> 

s/Since/Because/

-- 
An old man doll... just what I always wanted! - Clara

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v4 11/14] [GSOC] cat-file: change batch_objects parameter name
  2021-06-22  3:20       ` [PATCH v4 11/14] [GSOC] cat-file: change batch_objects parameter name ZheNing Hu via GitGitGadget
@ 2021-06-24  4:07         ` Bagas Sanjaya
  0 siblings, 0 replies; 121+ messages in thread
From: Bagas Sanjaya @ 2021-06-24  4:07 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget, git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Jeff King,
	Ævar Arnfjörð Bjarmason, ZheNing Hu

On 22/06/21 10.20, ZheNing Hu via GitGitGadget wrote:
> From: ZheNing Hu <adlternative@gmail.com>
> 
> Since cat-file reuses ref-filter logic later will add the
> formal parameter "const struct option *options" to
> batch_objects(), the two synonymous parameters of "opt"
> and "options" may confuse readers, so change batch_options
> parameter of batch_objects() from "opt" to "batch".
> 

Better say "Because later cat-file reuses ref-filter logic that will add 
parameter ... ".

-- 
An old man doll... just what I always wanted! - Clara

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v4 02/14] [GSOC] ref-filter: add %(raw) atom
  2021-06-22  3:20       ` [PATCH v4 02/14] [GSOC] ref-filter: add %(raw) atom ZheNing Hu via GitGitGadget
@ 2021-06-24  4:14         ` Bagas Sanjaya
  2021-06-24  8:23           ` ZheNing Hu
  0 siblings, 1 reply; 121+ messages in thread
From: Bagas Sanjaya @ 2021-06-24  4:14 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget, git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Jeff King,
	Ævar Arnfjörð Bjarmason, ZheNing Hu

On 22/06/21 10.20, ZheNing Hu via GitGitGadget wrote:
> Beyond, `--format=%(raw)` cannot be used with `--python`, `--shell`,
> `--tcl`, `--perl` because if our binary raw data is passed to a
> variable in the host language, the host language may not support
> arbitrary binary data in the variables of its string type.

Better say:

"Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, 
`--tcl`, and `--perl` because if the binary raw data is passed to a 
variable in such languages, these may not support arbitrary binary data 
in their string variable type."

-- 
An old man doll... just what I always wanted! - Clara

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v4 02/14] [GSOC] ref-filter: add %(raw) atom
  2021-06-24  4:14         ` Bagas Sanjaya
@ 2021-06-24  8:23           ` ZheNing Hu
  0 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu @ 2021-06-24  8:23 UTC (permalink / raw)
  To: Bagas Sanjaya
  Cc: ZheNing Hu via GitGitGadget, Git List, Junio C Hamano,
	Christian Couder, Hariom Verma, Jeff King,
	Ævar Arnfjörð Bjarmason

Bagas Sanjaya <bagasdotme@gmail.com> 于2021年6月24日周四 下午12:14写道:
>
> On 22/06/21 10.20, ZheNing Hu via GitGitGadget wrote:
> > Beyond, `--format=%(raw)` cannot be used with `--python`, `--shell`,
> > `--tcl`, `--perl` because if our binary raw data is passed to a
> > variable in the host language, the host language may not support
> > arbitrary binary data in the variables of its string type.
>
> Better say:
>
> "Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
> `--tcl`, and `--perl` because if the binary raw data is passed to a
> variable in such languages, these may not support arbitrary binary data
> in their string variable type."
>

Thanks, Bagas Sanjaya, I will change all of them.


> --
> An old man doll... just what I always wanted! - Clara

--
ZheNing Hu

^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic
  2021-06-22  3:20     ` [PATCH v4 00/14] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                         ` (13 preceding siblings ...)
  2021-06-22  3:20       ` [PATCH v4 14/14] [GSOC] cat-file: re-implement --textconv, --filters options ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02       ` ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 01/15] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
                           ` (15 more replies)
  14 siblings, 16 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu

This patch series make cat-file reuse ref-filter logic.

Change from last version:

 1. At the suggestion of Bagas Sanjaya, modified the expression of submitted
    information.
 2. Remove grab_oid() function in ref-filter to reduce repeated checks.

ZheNing Hu (15):
  [GSOC] ref-filter: add obj-type check in grab contents
  [GSOC] ref-filter: add %(raw) atom
  [GSOC] ref-filter: --format=%(raw) re-support --perl
  [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  [GSOC] ref-filter: add %(rest) atom
  [GSOC] ref-filter: pass get_object() return value to their callers
  [GSOC] ref-filter: introduce free_ref_array_item_value() function
  [GSOC] ref-filter: add cat_file_mode in struct ref_format
  [GSOC] ref-filter: modify the error message and value in get_object
  [GSOC] cat-file: add has_object_file() check
  [GSOC] cat-file: change batch_objects parameter name
  [GSOC] cat-file: reuse ref-filter logic
  [GSOC] cat-file: reuse err buf in batch_object_write()
  [GSOC] cat-file: re-implement --textconv, --filters options
  [GSOC] ref-filter: remove grab_oid() function

 Documentation/git-cat-file.txt     |   6 +
 Documentation/git-for-each-ref.txt |   9 +
 builtin/cat-file.c                 | 277 ++++++----------------
 builtin/tag.c                      |   2 +-
 quote.c                            |  17 ++
 quote.h                            |   1 +
 ref-filter.c                       | 357 ++++++++++++++++++++++-------
 ref-filter.h                       |  14 +-
 t/t1006-cat-file.sh                | 252 ++++++++++++++++++++
 t/t3203-branch-output.sh           |   4 +
 t/t6300-for-each-ref.sh            | 235 +++++++++++++++++++
 t/t6301-for-each-ref-errors.sh     |   2 +-
 t/t7004-tag.sh                     |   4 +
 t/t7030-verify-tag.sh              |   4 +
 14 files changed, 888 insertions(+), 296 deletions(-)


base-commit: 1197f1a46360d3ae96bd9c15908a3a6f8e562207
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-980%2Fadlternative%2Fcat-file-batch-refactor-v5
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-980/adlternative/cat-file-batch-refactor-v5
Pull-Request: https://github.com/gitgitgadget/git/pull/980

Range-diff vs v4:

  1:  f72ad9cc5e8 =  1:  f72ad9cc5e8 [GSOC] ref-filter: add obj-type check in grab contents
  2:  ab497d66c11 !  2:  4e473838b9d [GSOC] ref-filter: add %(raw) atom
     @@ Commit message
          can help us add raw object data to the buffer or compare two buffers
          which contain raw object data.
      
     -    Beyond, `--format=%(raw)` cannot be used with `--python`, `--shell`,
     -    `--tcl`, `--perl` because if our binary raw data is passed to a
     -    variable in the host language, the host language may not support
     -    arbitrary binary data in the variables of its string type.
     +    Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
     +    `--tcl`, and `--perl` because if the binary raw data is passed to a
     +    variable in such languages, these may not support arbitrary binary data
     +    in their string variable type.
      
          Mentored-by: Christian Couder <christian.couder@gmail.com>
          Mentored-by: Hariom Verma <hariom18599@gmail.com>
  3:  b54dbc431e0 =  3:  765cf08a108 [GSOC] ref-filter: --format=%(raw) re-support --perl
  4:  9fbbb3c492f =  4:  d2aeafd0ef3 [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  5:  08aa44e5e57 =  5:  1ca3a42f041 [GSOC] ref-filter: add %(rest) atom
  6:  05682bccf9f !  6:  67f1a3cca9a [GSOC] ref-filter: pass get_object() return value to their callers
     @@ Metadata
       ## Commit message ##
          [GSOC] ref-filter: pass get_object() return value to their callers
      
     -    Since in the refactor of `git cat-file --batch` later,
     +    Because in the refactor of `git cat-file --batch` later,
          oid_object_info_extended() in get_object() will be used to obtain
          the info of an object with it's oid. When the object cannot be
          obtained in the git repository, `cat-file --batch` expects to output
  7:  06db6cd6f1f =  7:  2a48a48e81c [GSOC] ref-filter: introduce free_ref_array_item_value() function
  8:  b0d9e139935 =  8:  be55005be75 [GSOC] ref-filter: add cat_file_mode in struct ref_format
  9:  db7dd8b042c =  9:  937f88b7837 [GSOC] ref-filter: modify the error message and value in get_object
 10:  6b577969734 = 10:  45657499c55 [GSOC] cat-file: add has_object_file() check
 11:  069aa203666 ! 11:  bf5c0a017ad [GSOC] cat-file: change batch_objects parameter name
     @@ Metadata
       ## Commit message ##
          [GSOC] cat-file: change batch_objects parameter name
      
     -    Since cat-file reuses ref-filter logic later will add the
     -    formal parameter "const struct option *options" to
     -    batch_objects(), the two synonymous parameters of "opt"
     -    and "options" may confuse readers, so change batch_options
     -    parameter of batch_objects() from "opt" to "batch".
     +    Because later cat-file reuses ref-filter logic that will add
     +    parameter "const struct option *options" to batch_objects(),
     +    the two synonymous parameters of "opt" and "options" may
     +    confuse readers, so change batch_options parameter of
     +    batch_objects() from "opt" to "batch".
      
          Mentored-by: Christian Couder <christian.couder@gmail.com>
          Mentored-by: Hariom Verma <hariom18599@gmail.com>
 12:  258ec0a46c5 = 12:  370101ba65f [GSOC] cat-file: reuse ref-filter logic
 13:  bda6aae9a6c = 13:  69eef47065d [GSOC] cat-file: reuse err buf in batch_object_write()
 14:  d1114a2bd74 = 14:  a7ac037a946 [GSOC] cat-file: re-implement --textconv, --filters options
  -:  ----------- > 15:  843de8864a9 [GSOC] ref-filter: remove grab_oid() function

-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 01/15] [GSOC] ref-filter: add obj-type check in grab contents
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02         ` ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 02/15] [GSOC] ref-filter: add %(raw) atom ZheNing Hu via GitGitGadget
                           ` (14 subsequent siblings)
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Only tag and commit objects use `grab_sub_body_contents()` to grab
object contents in the current codebase.  We want to teach the
function to also handle blobs and trees to get their raw data,
without parsing a blob (whose contents looks like a commit or a tag)
incorrectly as a commit or a tag.

Skip the block of code that is specific to handling commits and tags
early when the given object is of a wrong type to help later
addition to handle other types of objects in this function.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 4db0e40ff4c..5cee6512fba 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1356,11 +1356,12 @@ static void append_lines(struct strbuf *out, const char *buf, unsigned long size
 }
 
 /* See grab_values */
-static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
+static void grab_sub_body_contents(struct atom_value *val, int deref, struct expand_data *data)
 {
 	int i;
 	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
 	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
+	void *buf = data->content;
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		struct used_atom *atom = &used_atom[i];
@@ -1371,10 +1372,13 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
 			continue;
 		if (deref)
 			name++;
-		if (strcmp(name, "body") &&
-		    !starts_with(name, "subject") &&
-		    !starts_with(name, "trailers") &&
-		    !starts_with(name, "contents"))
+
+		if ((data->type != OBJ_TAG &&
+		     data->type != OBJ_COMMIT) ||
+		    (strcmp(name, "body") &&
+		     !starts_with(name, "subject") &&
+		     !starts_with(name, "trailers") &&
+		     !starts_with(name, "contents")))
 			continue;
 		if (!subpos)
 			find_subpos(buf,
@@ -1438,17 +1442,19 @@ static void fill_missing_values(struct atom_value *val)
  * pointed at by the ref itself; otherwise it is the object the
  * ref (which is a tag) refers to.
  */
-static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf)
+static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
 {
+	void *buf = data->content;
+
 	switch (obj->type) {
 	case OBJ_TAG:
 		grab_tag_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, data);
 		grab_person("tagger", val, deref, buf);
 		break;
 	case OBJ_COMMIT:
 		grab_commit_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, data);
 		grab_person("author", val, deref, buf);
 		grab_person("committer", val, deref, buf);
 		break;
@@ -1678,7 +1684,7 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
 					       oid_to_hex(&oi->oid), ref->refname);
 		}
-		grab_values(ref->value, deref, *obj, oi->content);
+		grab_values(ref->value, deref, *obj, oi);
 	}
 
 	grab_common_values(ref->value, deref, oi);
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 02/15] [GSOC] ref-filter: add %(raw) atom
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 01/15] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02         ` ZheNing Hu via GitGitGadget
  2021-06-26  0:42           ` Bagas Sanjaya
  2021-06-25 16:02         ` [PATCH v5 03/15] [GSOC] ref-filter: --format=%(raw) re-support --perl ZheNing Hu via GitGitGadget
                           ` (13 subsequent siblings)
  15 siblings, 1 reply; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.

The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).

E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.

Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.

Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-for-each-ref.txt |   9 ++
 ref-filter.c                       | 139 +++++++++++++++----
 t/t6300-for-each-ref.sh            | 216 +++++++++++++++++++++++++++++
 3 files changed, 337 insertions(+), 27 deletions(-)

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index 2ae2478de70..7f1f0a1ca3b 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -235,6 +235,15 @@ and `date` to extract the named component.  For email fields (`authoremail`,
 without angle brackets, and `:localpart` to get the part before the `@` symbol
 out of the trimmed email.
 
+The raw data in an object is `raw`.
+
+raw:size::
+	The raw data size of the object.
+
+Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
+`--perl` because the host language may not support arbitrary binary data in the
+variables of its string type.
+
 The message in a commit or a tag object is `contents`, from which
 `contents:<part>` can be used to extract various parts out of:
 
diff --git a/ref-filter.c b/ref-filter.c
index 5cee6512fba..7822be90307 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -144,6 +144,7 @@ enum atom_type {
 	ATOM_BODY,
 	ATOM_TRAILERS,
 	ATOM_CONTENTS,
+	ATOM_RAW,
 	ATOM_UPSTREAM,
 	ATOM_PUSH,
 	ATOM_SYMREF,
@@ -189,6 +190,9 @@ static struct used_atom {
 			struct process_trailer_options trailer_opts;
 			unsigned int nlines;
 		} contents;
+		struct {
+			enum { RAW_BARE, RAW_LENGTH } option;
+		} raw_data;
 		struct {
 			cmp_status cmp_status;
 			const char *str;
@@ -426,6 +430,18 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
+static int raw_atom_parser(const struct ref_format *format, struct used_atom *atom,
+				const char *arg, struct strbuf *err)
+{
+	if (!arg)
+		atom->u.raw_data.option = RAW_BARE;
+	else if (!strcmp(arg, "size"))
+		atom->u.raw_data.option = RAW_LENGTH;
+	else
+		return strbuf_addf_ret(err, -1, _("unrecognized %%(raw) argument: %s"), arg);
+	return 0;
+}
+
 static int oid_atom_parser(const struct ref_format *format, struct used_atom *atom,
 			   const char *arg, struct strbuf *err)
 {
@@ -586,6 +602,7 @@ static struct {
 	[ATOM_BODY] = { "body", SOURCE_OBJ, FIELD_STR, body_atom_parser },
 	[ATOM_TRAILERS] = { "trailers", SOURCE_OBJ, FIELD_STR, trailers_atom_parser },
 	[ATOM_CONTENTS] = { "contents", SOURCE_OBJ, FIELD_STR, contents_atom_parser },
+	[ATOM_RAW] = { "raw", SOURCE_OBJ, FIELD_STR, raw_atom_parser },
 	[ATOM_UPSTREAM] = { "upstream", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
 	[ATOM_PUSH] = { "push", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
 	[ATOM_SYMREF] = { "symref", SOURCE_NONE, FIELD_STR, refname_atom_parser },
@@ -620,12 +637,15 @@ struct ref_formatting_state {
 
 struct atom_value {
 	const char *s;
+	size_t s_size;
 	int (*handler)(struct atom_value *atomv, struct ref_formatting_state *state,
 		       struct strbuf *err);
 	uintmax_t value; /* used for sorting when not FIELD_STR */
 	struct used_atom *atom;
 };
 
+#define ATOM_VALUE_S_SIZE_INIT (-1)
+
 /*
  * Used to parse format string and sort specifiers
  */
@@ -644,13 +664,6 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 		return strbuf_addf_ret(err, -1, _("malformed field name: %.*s"),
 				       (int)(ep-atom), atom);
 
-	/* Do we have the atom already used elsewhere? */
-	for (i = 0; i < used_atom_cnt; i++) {
-		int len = strlen(used_atom[i].name);
-		if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
-			return i;
-	}
-
 	/*
 	 * If the atom name has a colon, strip it and everything after
 	 * it off - it specifies the format for this entry, and
@@ -660,6 +673,13 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 	arg = memchr(sp, ':', ep - sp);
 	atom_len = (arg ? arg : ep) - sp;
 
+	/* Do we have the atom already used elsewhere? */
+	for (i = 0; i < used_atom_cnt; i++) {
+		int len = strlen(used_atom[i].name);
+		if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
+			return i;
+	}
+
 	/* Is the atom a valid one? */
 	for (i = 0; i < ARRAY_SIZE(valid_atom); i++) {
 		int len = strlen(valid_atom[i].name);
@@ -709,11 +729,14 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 	return at;
 }
 
-static void quote_formatting(struct strbuf *s, const char *str, int quote_style)
+static void quote_formatting(struct strbuf *s, const char *str, size_t len, int quote_style)
 {
 	switch (quote_style) {
 	case QUOTE_NONE:
-		strbuf_addstr(s, str);
+		if (len != ATOM_VALUE_S_SIZE_INIT)
+			strbuf_add(s, str, len);
+		else
+			strbuf_addstr(s, str);
 		break;
 	case QUOTE_SHELL:
 		sq_quote_buf(s, str);
@@ -740,9 +763,12 @@ static int append_atom(struct atom_value *v, struct ref_formatting_state *state,
 	 * encountered.
 	 */
 	if (!state->stack->prev)
-		quote_formatting(&state->stack->output, v->s, state->quote_style);
+		quote_formatting(&state->stack->output, v->s, v->s_size, state->quote_style);
 	else
-		strbuf_addstr(&state->stack->output, v->s);
+		if (v->s_size != ATOM_VALUE_S_SIZE_INIT)
+			strbuf_add(&state->stack->output, v->s, v->s_size);
+		else
+			strbuf_addstr(&state->stack->output, v->s);
 	return 0;
 }
 
@@ -842,21 +868,23 @@ static int if_atom_handler(struct atom_value *atomv, struct ref_formatting_state
 	return 0;
 }
 
-static int is_empty(const char *s)
+static int is_empty(struct strbuf *buf)
 {
-	while (*s != '\0') {
-		if (!isspace(*s))
-			return 0;
-		s++;
-	}
-	return 1;
-}
+	const char *cur = buf->buf;
+	const char *end = buf->buf + buf->len;
+
+	while (cur != end && (isspace(*cur)))
+		cur++;
+
+	return cur == end;
+ }
 
 static int then_atom_handler(struct atom_value *atomv, struct ref_formatting_state *state,
 			     struct strbuf *err)
 {
 	struct ref_formatting_stack *cur = state->stack;
 	struct if_then_else *if_then_else = NULL;
+	size_t str_len = 0;
 
 	if (cur->at_end == if_then_else_handler)
 		if_then_else = (struct if_then_else *)cur->at_end_data;
@@ -867,18 +895,22 @@ static int then_atom_handler(struct atom_value *atomv, struct ref_formatting_sta
 	if (if_then_else->else_atom_seen)
 		return strbuf_addf_ret(err, -1, _("format: %%(then) atom used after %%(else)"));
 	if_then_else->then_atom_seen = 1;
+	if (if_then_else->str)
+		str_len = strlen(if_then_else->str);
 	/*
 	 * If the 'equals' or 'notequals' attribute is used then
 	 * perform the required comparison. If not, only non-empty
 	 * strings satisfy the 'if' condition.
 	 */
 	if (if_then_else->cmp_status == COMPARE_EQUAL) {
-		if (!strcmp(if_then_else->str, cur->output.buf))
+		if (str_len == cur->output.len &&
+		    !memcmp(if_then_else->str, cur->output.buf, cur->output.len))
 			if_then_else->condition_satisfied = 1;
 	} else if (if_then_else->cmp_status == COMPARE_UNEQUAL) {
-		if (strcmp(if_then_else->str, cur->output.buf))
+		if (str_len != cur->output.len ||
+		    memcmp(if_then_else->str, cur->output.buf, cur->output.len))
 			if_then_else->condition_satisfied = 1;
-	} else if (cur->output.len && !is_empty(cur->output.buf))
+	} else if (cur->output.len && !is_empty(&cur->output))
 		if_then_else->condition_satisfied = 1;
 	strbuf_reset(&cur->output);
 	return 0;
@@ -924,7 +956,7 @@ static int end_atom_handler(struct atom_value *atomv, struct ref_formatting_stat
 	 * only on the topmost supporting atom.
 	 */
 	if (!current->prev->prev) {
-		quote_formatting(&s, current->output.buf, state->quote_style);
+		quote_formatting(&s, current->output.buf, current->output.len, state->quote_style);
 		strbuf_swap(&current->output, &s);
 	}
 	strbuf_release(&s);
@@ -974,6 +1006,10 @@ int verify_ref_format(struct ref_format *format)
 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
 		if (at < 0)
 			die("%s", err.buf);
+		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
+		    used_atom[at].u.raw_data.option == RAW_BARE)
+			die(_("--format=%.*s cannot be used with"
+			      "--python, --shell, --tcl, --perl"), (int)(ep - sp - 2), sp + 2);
 		cp = ep + 1;
 
 		if (skip_prefix(used_atom[at].name, "color:", &color))
@@ -1362,17 +1398,29 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, struct exp
 	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
 	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
 	void *buf = data->content;
+	unsigned long buf_size = data->size;
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		struct used_atom *atom = &used_atom[i];
 		const char *name = atom->name;
 		struct atom_value *v = &val[i];
+		enum atom_type atom_type = atom->atom_type;
 
 		if (!!deref != (*name == '*'))
 			continue;
 		if (deref)
 			name++;
 
+		if (atom_type == ATOM_RAW) {
+			if (atom->u.raw_data.option == RAW_BARE) {
+				v->s = xmemdupz(buf, buf_size);
+				v->s_size = buf_size;
+			} else if (atom->u.raw_data.option == RAW_LENGTH) {
+				v->s = xstrfmt("%"PRIuMAX, (uintmax_t)buf_size);
+			}
+			continue;
+		}
+
 		if ((data->type != OBJ_TAG &&
 		     data->type != OBJ_COMMIT) ||
 		    (strcmp(name, "body") &&
@@ -1460,9 +1508,11 @@ static void grab_values(struct atom_value *val, int deref, struct object *obj, s
 		break;
 	case OBJ_TREE:
 		/* grab_tree_values(val, deref, obj, buf, sz); */
+		grab_sub_body_contents(val, deref, data);
 		break;
 	case OBJ_BLOB:
 		/* grab_blob_values(val, deref, obj, buf, sz); */
+		grab_sub_body_contents(val, deref, data);
 		break;
 	default:
 		die("Eh?  Object of type %d?", obj->type);
@@ -1766,6 +1816,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 		const char *refname;
 		struct branch *branch = NULL;
 
+		v->s_size = ATOM_VALUE_S_SIZE_INIT;
 		v->handler = append_atom;
 		v->atom = atom;
 
@@ -2369,6 +2420,19 @@ static int compare_detached_head(struct ref_array_item *a, struct ref_array_item
 	return 0;
 }
 
+static int memcasecmp(const void *vs1, const void *vs2, size_t n)
+{
+	const char *s1 = vs1, *s2 = vs2;
+	const char *end = s1 + n;
+
+	for (; s1 < end; s1++, s2++) {
+		int diff = tolower(*s1) - tolower(*s2);
+		if (diff)
+			return diff;
+	}
+	return 0;
+}
+
 static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, struct ref_array_item *b)
 {
 	struct atom_value *va, *vb;
@@ -2389,10 +2453,30 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru
 	} else if (s->sort_flags & REF_SORTING_VERSION) {
 		cmp = versioncmp(va->s, vb->s);
 	} else if (cmp_type == FIELD_STR) {
-		int (*cmp_fn)(const char *, const char *);
-		cmp_fn = s->sort_flags & REF_SORTING_ICASE
-			? strcasecmp : strcmp;
-		cmp = cmp_fn(va->s, vb->s);
+		if (va->s_size == ATOM_VALUE_S_SIZE_INIT &&
+		    vb->s_size == ATOM_VALUE_S_SIZE_INIT) {
+			int (*cmp_fn)(const char *, const char *);
+			cmp_fn = s->sort_flags & REF_SORTING_ICASE
+				? strcasecmp : strcmp;
+			cmp = cmp_fn(va->s, vb->s);
+		} else {
+			size_t a_size = va->s_size == ATOM_VALUE_S_SIZE_INIT ?
+					strlen(va->s) : va->s_size;
+			size_t b_size = vb->s_size == ATOM_VALUE_S_SIZE_INIT ?
+					strlen(vb->s) : vb->s_size;
+			int (*cmp_fn)(const void *, const void *, size_t);
+			cmp_fn = s->sort_flags & REF_SORTING_ICASE
+				? memcasecmp : memcmp;
+
+			cmp = cmp_fn(va->s, vb->s, b_size > a_size ?
+				     a_size : b_size);
+			if (!cmp) {
+				if (a_size > b_size)
+					cmp = 1;
+				else if (a_size < b_size)
+					cmp = -1;
+			}
+		}
 	} else {
 		if (va->value < vb->value)
 			cmp = -1;
@@ -2492,6 +2576,7 @@ int format_ref_array_item(struct ref_array_item *info,
 	}
 	if (format->need_color_reset_at_eol) {
 		struct atom_value resetv;
+		resetv.s_size = ATOM_VALUE_S_SIZE_INIT;
 		resetv.s = GIT_COLOR_RESET;
 		if (append_atom(&resetv, &state, error_buf)) {
 			pop_stack_element(&state.stack);
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 9e0214076b4..9c5379e2f56 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -130,6 +130,8 @@ test_atom head parent:short=10 ''
 test_atom head numparent 0
 test_atom head object ''
 test_atom head type ''
+test_atom head raw "$(git cat-file commit refs/heads/main)
+"
 test_atom head '*objectname' ''
 test_atom head '*objecttype' ''
 test_atom head author 'A U Thor <author@example.com> 1151968724 +0200'
@@ -221,6 +223,15 @@ test_atom tag contents 'Tagging at 1151968727
 '
 test_atom tag HEAD ' '
 
+test_expect_success 'basic atom: refs/tags/testtag *raw' '
+	git cat-file commit refs/tags/testtag^{} >expected &&
+	git for-each-ref --format="%(*raw)" refs/tags/testtag >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_expect_success 'Check invalid atoms names are errors' '
 	test_must_fail git for-each-ref --format="%(INVALID)" refs/heads
 '
@@ -686,6 +697,15 @@ test_atom refs/tags/signed-empty contents:body ''
 test_atom refs/tags/signed-empty contents:signature "$sig"
 test_atom refs/tags/signed-empty contents "$sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-empty raw' '
+	git cat-file tag refs/tags/signed-empty >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-empty >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_atom refs/tags/signed-short subject 'subject line'
 test_atom refs/tags/signed-short subject:sanitize 'subject-line'
 test_atom refs/tags/signed-short contents:subject 'subject line'
@@ -695,6 +715,15 @@ test_atom refs/tags/signed-short contents:signature "$sig"
 test_atom refs/tags/signed-short contents "subject line
 $sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-short raw' '
+	git cat-file tag refs/tags/signed-short >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-short >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_atom refs/tags/signed-long subject 'subject line'
 test_atom refs/tags/signed-long subject:sanitize 'subject-line'
 test_atom refs/tags/signed-long contents:subject 'subject line'
@@ -708,6 +737,15 @@ test_atom refs/tags/signed-long contents "subject line
 body contents
 $sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-long raw' '
+	git cat-file tag refs/tags/signed-long >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-long >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_expect_success 'set up refs pointing to tree and blob' '
 	git update-ref refs/mytrees/first refs/heads/main^{tree} &&
 	git update-ref refs/myblobs/first refs/heads/main:one
@@ -720,6 +758,16 @@ test_atom refs/mytrees/first contents:body ""
 test_atom refs/mytrees/first contents:signature ""
 test_atom refs/mytrees/first contents ""
 
+test_expect_success 'basic atom: refs/mytrees/first raw' '
+	git cat-file tree refs/mytrees/first >expected &&
+	echo >>expected &&
+	git for-each-ref --format="%(raw)" refs/mytrees/first >actual &&
+	test_cmp expected actual &&
+	git cat-file -s refs/mytrees/first >expected &&
+	git for-each-ref --format="%(raw:size)" refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
 test_atom refs/myblobs/first subject ""
 test_atom refs/myblobs/first contents:subject ""
 test_atom refs/myblobs/first body ""
@@ -727,6 +775,174 @@ test_atom refs/myblobs/first contents:body ""
 test_atom refs/myblobs/first contents:signature ""
 test_atom refs/myblobs/first contents ""
 
+test_expect_success 'basic atom: refs/myblobs/first raw' '
+	git cat-file blob refs/myblobs/first >expected &&
+	echo >>expected &&
+	git for-each-ref --format="%(raw)" refs/myblobs/first >actual &&
+	test_cmp expected actual &&
+	git cat-file -s refs/myblobs/first >expected &&
+	git for-each-ref --format="%(raw:size)" refs/myblobs/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'set up refs pointing to binary blob' '
+	printf "a\0b\0c" >blob1 &&
+	printf "a\0c\0b" >blob2 &&
+	printf "\0a\0b\0c" >blob3 &&
+	printf "abc" >blob4 &&
+	printf "\0 \0 \0 " >blob5 &&
+	printf "\0 \0a\0 " >blob6 &&
+	printf "  " >blob7 &&
+	>blob8 &&
+	obj=$(git hash-object -w blob1) &&
+        git update-ref refs/myblobs/blob1 "$obj" &&
+	obj=$(git hash-object -w blob2) &&
+        git update-ref refs/myblobs/blob2 "$obj" &&
+	obj=$(git hash-object -w blob3) &&
+        git update-ref refs/myblobs/blob3 "$obj" &&
+	obj=$(git hash-object -w blob4) &&
+        git update-ref refs/myblobs/blob4 "$obj" &&
+	obj=$(git hash-object -w blob5) &&
+        git update-ref refs/myblobs/blob5 "$obj" &&
+	obj=$(git hash-object -w blob6) &&
+        git update-ref refs/myblobs/blob6 "$obj" &&
+	obj=$(git hash-object -w blob7) &&
+        git update-ref refs/myblobs/blob7 "$obj" &&
+	obj=$(git hash-object -w blob8) &&
+        git update-ref refs/myblobs/blob8 "$obj"
+'
+
+test_expect_success 'Verify sorts with raw' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob8
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/myblobs/blob3
+	refs/myblobs/blob7
+	refs/mytrees/first
+	refs/myblobs/first
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob4
+	refs/heads/main
+	EOF
+	git for-each-ref --format="%(refname)" --sort=raw \
+		refs/heads/main refs/myblobs/ refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'Verify sorts with raw:size' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob8
+	refs/myblobs/first
+	refs/myblobs/blob7
+	refs/heads/main
+	refs/myblobs/blob4
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob3
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/mytrees/first
+	EOF
+	git for-each-ref --format="%(refname)" --sort=raw:size \
+		refs/heads/main refs/myblobs/ refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'validate raw atom with %(if:equals)' '
+	cat >expected <<-EOF &&
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	refs/myblobs/blob4
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	EOF
+	git for-each-ref --format="%(if:equals=abc)%(raw)%(then)%(refname)%(else)not equals%(end)" \
+		refs/myblobs/ refs/heads/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'validate raw atom with %(if:notequals)' '
+	cat >expected <<-EOF &&
+	refs/heads/ambiguous
+	refs/heads/main
+	refs/heads/newtag
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob3
+	equals
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/myblobs/blob7
+	refs/myblobs/blob8
+	refs/myblobs/first
+	EOF
+	git for-each-ref --format="%(if:notequals=abc)%(raw)%(then)%(refname)%(else)equals%(end)" \
+		refs/myblobs/ refs/heads/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'empty raw refs with %(if)' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob1 not empty
+	refs/myblobs/blob2 not empty
+	refs/myblobs/blob3 not empty
+	refs/myblobs/blob4 not empty
+	refs/myblobs/blob5 not empty
+	refs/myblobs/blob6 not empty
+	refs/myblobs/blob7 empty
+	refs/myblobs/blob8 empty
+	refs/myblobs/first not empty
+	EOF
+	git for-each-ref --format="%(refname) %(if)%(raw)%(then)not empty%(else)empty%(end)" \
+		refs/myblobs/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success '%(raw) with --python must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --python
+'
+
+test_expect_success '%(raw) with --tcl must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --tcl
+'
+
+test_expect_success '%(raw) with --perl must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --perl
+'
+
+test_expect_success '%(raw) with --shell must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --shell
+'
+
+test_expect_success '%(raw) with --shell and --sort=raw must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --sort=raw --shell
+'
+
+test_expect_success '%(raw:size) with --shell' '
+	git for-each-ref --format="%(raw:size)" | while read line
+	do
+		echo "'\''$line'\''" >>expect
+	done &&
+	git for-each-ref --format="%(raw:size)" --shell >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'for-each-ref --format compare with cat-file --batch' '
+	git rev-parse refs/mytrees/first | git cat-file --batch >expected &&
+	git for-each-ref --format="%(objectname) %(objecttype) %(objectsize)
+%(raw)" refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
 test_expect_success 'set up multiple-sort tags' '
 	for when in 100000 200000
 	do
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 03/15] [GSOC] ref-filter: --format=%(raw) re-support --perl
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 01/15] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 02/15] [GSOC] ref-filter: add %(raw) atom ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02         ` ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 04/15] [GSOC] ref-filter: use non-const ref_format in *_atom_parser() ZheNing Hu via GitGitGadget
                           ` (12 subsequent siblings)
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Because the perl language can handle binary data correctly,
add the function perl_quote_buf_with_len(), which can specify
the length of the data and prevent the data from being truncated
at '\0' to help `--format="%(raw)"` re-support `--perl`.

Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-for-each-ref.txt |  2 +-
 quote.c                            | 17 +++++++++++++++++
 quote.h                            |  1 +
 ref-filter.c                       | 15 +++++++++++----
 t/t6300-for-each-ref.sh            | 19 +++++++++++++++++--
 5 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index 7f1f0a1ca3b..ea9b438c16f 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -241,7 +241,7 @@ raw:size::
 	The raw data size of the object.
 
 Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
-`--perl` because the host language may not support arbitrary binary data in the
+because the host language may not support arbitrary binary data in the
 variables of its string type.
 
 The message in a commit or a tag object is `contents`, from which
diff --git a/quote.c b/quote.c
index 8a3a5e39eb1..26719d21d1e 100644
--- a/quote.c
+++ b/quote.c
@@ -471,6 +471,23 @@ void perl_quote_buf(struct strbuf *sb, const char *src)
 	strbuf_addch(sb, sq);
 }
 
+void perl_quote_buf_with_len(struct strbuf *sb, const char *src, size_t len)
+{
+	const char sq = '\'';
+	const char bq = '\\';
+	const char *c = src;
+	const char *end = src + len;
+
+	strbuf_addch(sb, sq);
+	while (c != end) {
+		if (*c == sq || *c == bq)
+			strbuf_addch(sb, bq);
+		strbuf_addch(sb, *c);
+		c++;
+	}
+	strbuf_addch(sb, sq);
+}
+
 void python_quote_buf(struct strbuf *sb, const char *src)
 {
 	const char sq = '\'';
diff --git a/quote.h b/quote.h
index 768cc6338e2..0fe69e264b0 100644
--- a/quote.h
+++ b/quote.h
@@ -94,6 +94,7 @@ char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigne
 
 /* quoting as a string literal for other languages */
 void perl_quote_buf(struct strbuf *sb, const char *src);
+void perl_quote_buf_with_len(struct strbuf *sb, const char *src, size_t len);
 void python_quote_buf(struct strbuf *sb, const char *src);
 void tcl_quote_buf(struct strbuf *sb, const char *src);
 void basic_regex_quote_buf(struct strbuf *sb, const char *src);
diff --git a/ref-filter.c b/ref-filter.c
index 7822be90307..797b20ffa61 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -742,7 +742,10 @@ static void quote_formatting(struct strbuf *s, const char *str, size_t len, int
 		sq_quote_buf(s, str);
 		break;
 	case QUOTE_PERL:
-		perl_quote_buf(s, str);
+		if (len != ATOM_VALUE_S_SIZE_INIT)
+			perl_quote_buf_with_len(s, str, len);
+		else
+			perl_quote_buf(s, str);
 		break;
 	case QUOTE_PYTHON:
 		python_quote_buf(s, str);
@@ -1006,10 +1009,14 @@ int verify_ref_format(struct ref_format *format)
 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
 		if (at < 0)
 			die("%s", err.buf);
-		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
-		    used_atom[at].u.raw_data.option == RAW_BARE)
+
+		if ((format->quote_style == QUOTE_PYTHON ||
+		     format->quote_style == QUOTE_SHELL ||
+		     format->quote_style == QUOTE_TCL) &&
+		     used_atom[at].atom_type == ATOM_RAW &&
+		     used_atom[at].u.raw_data.option == RAW_BARE)
 			die(_("--format=%.*s cannot be used with"
-			      "--python, --shell, --tcl, --perl"), (int)(ep - sp - 2), sp + 2);
+			      "--python, --shell, --tcl"), (int)(ep - sp - 2), sp + 2);
 		cp = ep + 1;
 
 		if (skip_prefix(used_atom[at].name, "color:", &color))
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 9c5379e2f56..5556063c347 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -915,8 +915,23 @@ test_expect_success '%(raw) with --tcl must fail' '
 	test_must_fail git for-each-ref --format="%(raw)" --tcl
 '
 
-test_expect_success '%(raw) with --perl must fail' '
-	test_must_fail git for-each-ref --format="%(raw)" --perl
+test_expect_success '%(raw) with --perl' '
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/blob1 --perl | perl > actual &&
+	cmp blob1 actual &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/blob3 --perl | perl > actual &&
+	cmp blob3 actual &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/blob8 --perl | perl > actual &&
+	cmp blob8 actual &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/first --perl | perl > actual &&
+	cmp one actual &&
+	git cat-file tree refs/mytrees/first > expected &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/mytrees/first --perl | perl > actual &&
+	cmp expected actual
 '
 
 test_expect_success '%(raw) with --shell must fail' '
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 04/15] [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                           ` (2 preceding siblings ...)
  2021-06-25 16:02         ` [PATCH v5 03/15] [GSOC] ref-filter: --format=%(raw) re-support --perl ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02         ` ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 05/15] [GSOC] ref-filter: add %(rest) atom ZheNing Hu via GitGitGadget
                           ` (11 subsequent siblings)
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Use non-const ref_format in *_atom_parser(), which can help us
modify the members of ref_format in *_atom_parser().

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/tag.c |  2 +-
 ref-filter.c  | 44 ++++++++++++++++++++++----------------------
 ref-filter.h  |  4 ++--
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/builtin/tag.c b/builtin/tag.c
index 82fcfc09824..452558ec957 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -146,7 +146,7 @@ static int verify_tag(const char *name, const char *ref,
 		      const struct object_id *oid, void *cb_data)
 {
 	int flags;
-	const struct ref_format *format = cb_data;
+	struct ref_format *format = cb_data;
 	flags = GPG_VERIFY_VERBOSE;
 
 	if (format->format)
diff --git a/ref-filter.c b/ref-filter.c
index 797b20ffa61..d01a0266fb8 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -226,7 +226,7 @@ static int strbuf_addf_ret(struct strbuf *sb, int ret, const char *fmt, ...)
 	return ret;
 }
 
-static int color_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int color_atom_parser(struct ref_format *format, struct used_atom *atom,
 			     const char *color_value, struct strbuf *err)
 {
 	if (!color_value)
@@ -264,7 +264,7 @@ static int refname_atom_parser_internal(struct refname_atom *atom, const char *a
 	return 0;
 }
 
-static int remote_ref_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int remote_ref_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	struct string_list params = STRING_LIST_INIT_DUP;
@@ -311,7 +311,7 @@ static int remote_ref_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int objecttype_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int objecttype_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -323,7 +323,7 @@ static int objecttype_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int objectsize_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int objectsize_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	if (!arg) {
@@ -343,7 +343,7 @@ static int objectsize_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int deltabase_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int deltabase_atom_parser(struct ref_format *format, struct used_atom *atom,
 				 const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -355,7 +355,7 @@ static int deltabase_atom_parser(const struct ref_format *format, struct used_at
 	return 0;
 }
 
-static int body_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int body_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -364,7 +364,7 @@ static int body_atom_parser(const struct ref_format *format, struct used_atom *a
 	return 0;
 }
 
-static int subject_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int subject_atom_parser(struct ref_format *format, struct used_atom *atom,
 			       const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -376,7 +376,7 @@ static int subject_atom_parser(const struct ref_format *format, struct used_atom
 	return 0;
 }
 
-static int trailers_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int trailers_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	atom->u.contents.trailer_opts.no_divider = 1;
@@ -402,7 +402,7 @@ static int trailers_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
-static int contents_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int contents_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -430,7 +430,7 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
-static int raw_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int raw_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -442,7 +442,7 @@ static int raw_atom_parser(const struct ref_format *format, struct used_atom *at
 	return 0;
 }
 
-static int oid_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int oid_atom_parser(struct ref_format *format, struct used_atom *atom,
 			   const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -461,7 +461,7 @@ static int oid_atom_parser(const struct ref_format *format, struct used_atom *at
 	return 0;
 }
 
-static int person_email_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int person_email_atom_parser(struct ref_format *format, struct used_atom *atom,
 				    const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -475,7 +475,7 @@ static int person_email_atom_parser(const struct ref_format *format, struct used
 	return 0;
 }
 
-static int refname_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int refname_atom_parser(struct ref_format *format, struct used_atom *atom,
 			       const char *arg, struct strbuf *err)
 {
 	return refname_atom_parser_internal(&atom->u.refname, arg, atom->name, err);
@@ -492,7 +492,7 @@ static align_type parse_align_position(const char *s)
 	return -1;
 }
 
-static int align_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int align_atom_parser(struct ref_format *format, struct used_atom *atom,
 			     const char *arg, struct strbuf *err)
 {
 	struct align *align = &atom->u.align;
@@ -544,7 +544,7 @@ static int align_atom_parser(const struct ref_format *format, struct used_atom *
 	return 0;
 }
 
-static int if_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int if_atom_parser(struct ref_format *format, struct used_atom *atom,
 			  const char *arg, struct strbuf *err)
 {
 	if (!arg) {
@@ -559,7 +559,7 @@ static int if_atom_parser(const struct ref_format *format, struct used_atom *ato
 	return 0;
 }
 
-static int head_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int head_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *unused_err)
 {
 	atom->u.head = resolve_refdup("HEAD", RESOLVE_REF_READING, NULL, NULL);
@@ -570,7 +570,7 @@ static struct {
 	const char *name;
 	info_source source;
 	cmp_type cmp_type;
-	int (*parser)(const struct ref_format *format, struct used_atom *atom,
+	int (*parser)(struct ref_format *format, struct used_atom *atom,
 		      const char *arg, struct strbuf *err);
 } valid_atom[] = {
 	[ATOM_REFNAME] = { "refname", SOURCE_NONE, FIELD_STR, refname_atom_parser },
@@ -649,7 +649,7 @@ struct atom_value {
 /*
  * Used to parse format string and sort specifiers
  */
-static int parse_ref_filter_atom(const struct ref_format *format,
+static int parse_ref_filter_atom(struct ref_format *format,
 				 const char *atom, const char *ep,
 				 struct strbuf *err)
 {
@@ -2553,9 +2553,9 @@ static void append_literal(const char *cp, const char *ep, struct ref_formatting
 }
 
 int format_ref_array_item(struct ref_array_item *info,
-			   const struct ref_format *format,
-			   struct strbuf *final_buf,
-			   struct strbuf *error_buf)
+			  struct ref_format *format,
+			  struct strbuf *final_buf,
+			  struct strbuf *error_buf)
 {
 	const char *cp, *sp, *ep;
 	struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
@@ -2600,7 +2600,7 @@ int format_ref_array_item(struct ref_array_item *info,
 }
 
 void pretty_print_ref(const char *name, const struct object_id *oid,
-		      const struct ref_format *format)
+		      struct ref_format *format)
 {
 	struct ref_array_item *ref_item;
 	struct strbuf output = STRBUF_INIT;
diff --git a/ref-filter.h b/ref-filter.h
index baf72a71896..74fb423fc89 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -116,7 +116,7 @@ void ref_array_sort(struct ref_sorting *sort, struct ref_array *array);
 void ref_sorting_set_sort_flags_all(struct ref_sorting *sorting, unsigned int mask, int on);
 /*  Based on the given format and quote_style, fill the strbuf */
 int format_ref_array_item(struct ref_array_item *info,
-			  const struct ref_format *format,
+			  struct ref_format *format,
 			  struct strbuf *final_buf,
 			  struct strbuf *error_buf);
 /*  Parse a single sort specifier and add it to the list */
@@ -137,7 +137,7 @@ void setup_ref_filter_porcelain_msg(void);
  * name must be a fully qualified refname.
  */
 void pretty_print_ref(const char *name, const struct object_id *oid,
-		      const struct ref_format *format);
+		      struct ref_format *format);
 
 /*
  * Push a single ref onto the array; this can be used to construct your own
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 05/15] [GSOC] ref-filter: add %(rest) atom
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                           ` (3 preceding siblings ...)
  2021-06-25 16:02         ` [PATCH v5 04/15] [GSOC] ref-filter: use non-const ref_format in *_atom_parser() ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02         ` ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 06/15] [GSOC] ref-filter: pass get_object() return value to their callers ZheNing Hu via GitGitGadget
                           ` (10 subsequent siblings)
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

In order to let "cat-file --batch=%(rest)" use the ref-filter
interface, add %(rest) atom for ref-filter. "git for-each-ref",
"git branch", "git tag" and "git verify-tag" will reject %(rest)
by default.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c             | 21 +++++++++++++++++++++
 ref-filter.h             |  5 ++++-
 t/t3203-branch-output.sh |  4 ++++
 t/t6300-for-each-ref.sh  |  4 ++++
 t/t7004-tag.sh           |  4 ++++
 t/t7030-verify-tag.sh    |  4 ++++
 6 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/ref-filter.c b/ref-filter.c
index d01a0266fb8..10c78de9cfa 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -157,6 +157,7 @@ enum atom_type {
 	ATOM_IF,
 	ATOM_THEN,
 	ATOM_ELSE,
+	ATOM_REST,
 };
 
 /*
@@ -559,6 +560,15 @@ static int if_atom_parser(struct ref_format *format, struct used_atom *atom,
 	return 0;
 }
 
+static int rest_atom_parser(struct ref_format *format, struct used_atom *atom,
+			    const char *arg, struct strbuf *err)
+{
+	if (arg)
+		return strbuf_addf_ret(err, -1, _("%%(rest) does not take arguments"));
+	format->use_rest = 1;
+	return 0;
+}
+
 static int head_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *unused_err)
 {
@@ -615,6 +625,7 @@ static struct {
 	[ATOM_IF] = { "if", SOURCE_NONE, FIELD_STR, if_atom_parser },
 	[ATOM_THEN] = { "then", SOURCE_NONE },
 	[ATOM_ELSE] = { "else", SOURCE_NONE },
+	[ATOM_REST] = { "rest", SOURCE_NONE, FIELD_STR, rest_atom_parser },
 	/*
 	 * Please update $__git_ref_fieldlist in git-completion.bash
 	 * when you add new atoms
@@ -1010,6 +1021,9 @@ int verify_ref_format(struct ref_format *format)
 		if (at < 0)
 			die("%s", err.buf);
 
+		if (used_atom[at].atom_type == ATOM_REST)
+			die("this command reject atom %%(%.*s)", (int)(ep - sp - 2), sp + 2);
+
 		if ((format->quote_style == QUOTE_PYTHON ||
 		     format->quote_style == QUOTE_SHELL ||
 		     format->quote_style == QUOTE_TCL) &&
@@ -1927,6 +1941,12 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 			v->handler = else_atom_handler;
 			v->s = xstrdup("");
 			continue;
+		} else if (atom_type == ATOM_REST) {
+			if (ref->rest)
+				v->s = xstrdup(ref->rest);
+			else
+				v->s = xstrdup("");
+			continue;
 		} else
 			continue;
 
@@ -2144,6 +2164,7 @@ static struct ref_array_item *new_ref_array_item(const char *refname,
 
 	FLEX_ALLOC_STR(ref, refname, refname);
 	oidcpy(&ref->objectname, oid);
+	ref->rest = NULL;
 
 	return ref;
 }
diff --git a/ref-filter.h b/ref-filter.h
index 74fb423fc89..c15dee8d6b9 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -38,6 +38,7 @@ struct ref_sorting {
 
 struct ref_array_item {
 	struct object_id objectname;
+	const char *rest;
 	int flag;
 	unsigned int kind;
 	const char *symref;
@@ -76,14 +77,16 @@ struct ref_format {
 	 * verify_ref_format() afterwards to finalize.
 	 */
 	const char *format;
+	const char *rest;
 	int quote_style;
+	int use_rest;
 	int use_color;
 
 	/* Internal state to ref-filter */
 	int need_color_reset_at_eol;
 };
 
-#define REF_FORMAT_INIT { NULL, 0, -1 }
+#define REF_FORMAT_INIT { .use_color = -1 }
 
 /*  Macros for checking --merged and --no-merged options */
 #define _OPT_MERGED_NO_MERGED(option, filter, h) \
diff --git a/t/t3203-branch-output.sh b/t/t3203-branch-output.sh
index 5325b9f67a0..6e94c6db7b5 100755
--- a/t/t3203-branch-output.sh
+++ b/t/t3203-branch-output.sh
@@ -340,6 +340,10 @@ test_expect_success 'git branch --format option' '
 	test_cmp expect actual
 '
 
+test_expect_success 'git branch with --format=%(rest) must fail' '
+	test_must_fail git branch --format="%(rest)" >actual
+'
+
 test_expect_success 'worktree colors correct' '
 	cat >expect <<-EOF &&
 	* <GREEN>(HEAD detached from fromtag)<RESET>
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 5556063c347..82c0ad2cb11 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -1211,6 +1211,10 @@ test_expect_success 'basic atom: head contents:trailers' '
 	test_cmp expect actual.clean
 '
 
+test_expect_success 'basic atom: rest must fail' '
+	test_must_fail git for-each-ref --format="%(rest)" refs/heads/main
+'
+
 test_expect_success 'trailer parsing not fooled by --- line' '
 	git commit --allow-empty -F - <<-\EOF &&
 	this is the subject
diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
index 2f72c5c6883..082be85dffc 100755
--- a/t/t7004-tag.sh
+++ b/t/t7004-tag.sh
@@ -1998,6 +1998,10 @@ test_expect_success '--format should list tags as per format given' '
 	test_cmp expect actual
 '
 
+test_expect_success 'git tag -l with --format="%(rest)" must fail' '
+	test_must_fail git tag -l --format="%(rest)" "v1*"
+'
+
 test_expect_success "set up color tests" '
 	echo "<RED>v1.0<RESET>" >expect.color &&
 	echo "v1.0" >expect.bare &&
diff --git a/t/t7030-verify-tag.sh b/t/t7030-verify-tag.sh
index 3cefde9602b..10faa645157 100755
--- a/t/t7030-verify-tag.sh
+++ b/t/t7030-verify-tag.sh
@@ -194,6 +194,10 @@ test_expect_success GPG 'verifying tag with --format' '
 	test_cmp expect actual
 '
 
+test_expect_success GPG 'verifying tag with --format="%(rest)" must fail' '
+	test_must_fail git verify-tag --format="%(rest)" "fourth-signed"
+'
+
 test_expect_success GPG 'verifying a forged tag with --format should fail silently' '
 	test_must_fail git verify-tag --format="tagname : %(tag)" $(cat forged1.tag) >actual-forged &&
 	test_must_be_empty actual-forged
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 06/15] [GSOC] ref-filter: pass get_object() return value to their callers
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                           ` (4 preceding siblings ...)
  2021-06-25 16:02         ` [PATCH v5 05/15] [GSOC] ref-filter: add %(rest) atom ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02         ` ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 07/15] [GSOC] ref-filter: introduce free_ref_array_item_value() function ZheNing Hu via GitGitGadget
                           ` (9 subsequent siblings)
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Because in the refactor of `git cat-file --batch` later,
oid_object_info_extended() in get_object() will be used to obtain
the info of an object with it's oid. When the object cannot be
obtained in the git repository, `cat-file --batch` expects to output
"<oid> missing" and continue the next oid query instead of letting
Git exit. In other error conditions, Git should exit normally. So we
can achieve this function by passing the return value of get_object().

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 10c78de9cfa..58def6ccd33 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1816,6 +1816,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 {
 	struct object *obj;
 	int i;
+	int ret = 0;
 	struct object_info empty = OBJECT_INFO_INIT;
 
 	CALLOC_ARRAY(ref->value, used_atom_cnt);
@@ -1972,8 +1973,9 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 
 
 	oi.oid = ref->objectname;
-	if (get_object(ref, 0, &obj, &oi, err))
-		return -1;
+	ret = get_object(ref, 0, &obj, &oi, err);
+	if (ret)
+		return ret;
 
 	/*
 	 * If there is no atom that wants to know about tagged
@@ -2005,8 +2007,10 @@ static int get_ref_atom_value(struct ref_array_item *ref, int atom,
 			      struct atom_value **v, struct strbuf *err)
 {
 	if (!ref->value) {
-		if (populate_value(ref, err))
-			return -1;
+		int ret = populate_value(ref, err);
+
+		if (ret)
+			return ret;
 		fill_missing_values(ref->value);
 	}
 	*v = &ref->value[atom];
@@ -2580,6 +2584,7 @@ int format_ref_array_item(struct ref_array_item *info,
 {
 	const char *cp, *sp, *ep;
 	struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
+	int ret = 0;
 
 	state.quote_style = format->quote_style;
 	push_stack_element(&state.stack);
@@ -2592,10 +2597,10 @@ int format_ref_array_item(struct ref_array_item *info,
 		if (cp < sp)
 			append_literal(cp, sp, &state);
 		pos = parse_ref_filter_atom(format, sp + 2, ep, error_buf);
-		if (pos < 0 || get_ref_atom_value(info, pos, &atomv, error_buf) ||
+		if (pos < 0 || (ret = get_ref_atom_value(info, pos, &atomv, error_buf)) ||
 		    atomv->handler(atomv, &state, error_buf)) {
 			pop_stack_element(&state.stack);
-			return -1;
+			return ret ? ret : -1;
 		}
 	}
 	if (*cp) {
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 07/15] [GSOC] ref-filter: introduce free_ref_array_item_value() function
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                           ` (5 preceding siblings ...)
  2021-06-25 16:02         ` [PATCH v5 06/15] [GSOC] ref-filter: pass get_object() return value to their callers ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02         ` ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 08/15] [GSOC] ref-filter: add cat_file_mode in struct ref_format ZheNing Hu via GitGitGadget
                           ` (8 subsequent siblings)
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

When we use ref_array_item which is not dynamically allocated and
want to free the space of its member "value" after the end of use,
free_array_item() does not meet our needs, because it tries to free
ref_array_item itself and its member "symref".

Introduce free_ref_array_item_value() for freeing ref_array_item value.
It will be called internally by free_array_item(), and it will help
`cat-file --batch` free ref_array_item's value memory later.

Helped-by: Junio C Hamano <gitster@pobox.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 11 ++++++++---
 ref-filter.h |  2 ++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 58def6ccd33..22315d4809d 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -2291,16 +2291,21 @@ static int ref_filter_handler(const char *refname, const struct object_id *oid,
 	return 0;
 }
 
-/*  Free memory allocated for a ref_array_item */
-static void free_array_item(struct ref_array_item *item)
+void free_ref_array_item_value(struct ref_array_item *item)
 {
-	free((char *)item->symref);
 	if (item->value) {
 		int i;
 		for (i = 0; i < used_atom_cnt; i++)
 			free((char *)item->value[i].s);
 		free(item->value);
 	}
+}
+
+/*  Free memory allocated for a ref_array_item */
+static void free_array_item(struct ref_array_item *item)
+{
+	free((char *)item->symref);
+	free_ref_array_item_value(item);
 	free(item);
 }
 
diff --git a/ref-filter.h b/ref-filter.h
index c15dee8d6b9..44e6dc05ac2 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -111,6 +111,8 @@ struct ref_format {
 int filter_refs(struct ref_array *array, struct ref_filter *filter, unsigned int type);
 /*  Clear all memory allocated to ref_array */
 void ref_array_clear(struct ref_array *array);
+/* Free ref_array_item's value */
+void free_ref_array_item_value(struct ref_array_item *item);
 /*  Used to verify if the given format is correct and to parse out the used atoms */
 int verify_ref_format(struct ref_format *format);
 /*  Sort the given ref_array as per the ref_sorting provided */
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 08/15] [GSOC] ref-filter: add cat_file_mode in struct ref_format
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                           ` (6 preceding siblings ...)
  2021-06-25 16:02         ` [PATCH v5 07/15] [GSOC] ref-filter: introduce free_ref_array_item_value() function ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02         ` ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 09/15] [GSOC] ref-filter: modify the error message and value in get_object ZheNing Hu via GitGitGadget
                           ` (7 subsequent siblings)
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Add `cat_file_mode` member in struct `ref_format`, when
`cat-file --batch` use ref-filter logic later, it can help
us reject atoms in verify_ref_format() which cat-file cannot
use, e.g. `%(refname)`, `%(push)`, `%(upstream)`...

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 11 +++++++++--
 ref-filter.h |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 22315d4809d..f21f41df0d8 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1021,8 +1021,15 @@ int verify_ref_format(struct ref_format *format)
 		if (at < 0)
 			die("%s", err.buf);
 
-		if (used_atom[at].atom_type == ATOM_REST)
-			die("this command reject atom %%(%.*s)", (int)(ep - sp - 2), sp + 2);
+		if ((!format->cat_file_mode && used_atom[at].atom_type == ATOM_REST) ||
+		    (format->cat_file_mode && (used_atom[at].atom_type == ATOM_FLAG ||
+					       used_atom[at].atom_type == ATOM_HEAD ||
+					       used_atom[at].atom_type == ATOM_PUSH ||
+					       used_atom[at].atom_type == ATOM_REFNAME ||
+					       used_atom[at].atom_type == ATOM_SYMREF ||
+					       used_atom[at].atom_type == ATOM_UPSTREAM ||
+					       used_atom[at].atom_type == ATOM_WORKTREEPATH)))
+			die(_("this command reject atom %%(%.*s)"), (int)(ep - sp - 2), sp + 2);
 
 		if ((format->quote_style == QUOTE_PYTHON ||
 		     format->quote_style == QUOTE_SHELL ||
diff --git a/ref-filter.h b/ref-filter.h
index 44e6dc05ac2..053980a6a42 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -78,6 +78,7 @@ struct ref_format {
 	 */
 	const char *format;
 	const char *rest;
+	int cat_file_mode;
 	int quote_style;
 	int use_rest;
 	int use_color;
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 09/15] [GSOC] ref-filter: modify the error message and value in get_object
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                           ` (7 preceding siblings ...)
  2021-06-25 16:02         ` [PATCH v5 08/15] [GSOC] ref-filter: add cat_file_mode in struct ref_format ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02         ` ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 10/15] [GSOC] cat-file: add has_object_file() check ZheNing Hu via GitGitGadget
                           ` (6 subsequent siblings)
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Let get_object() return 1 and print "<oid> missing" instead
of returning -1 and printing "missing object <oid> for <refname>"
if oid_object_info_extended() unable to find the data corresponding
to oid. When `cat-file --batch` use ref-filter logic later it can
help `format_ref_array_item()` just report that the object is missing
without letting Git exit.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c                   | 4 ++--
 t/t6301-for-each-ref-errors.sh | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index f21f41df0d8..181d99c9273 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1749,8 +1749,8 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 	}
 	if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
 				     OBJECT_INFO_LOOKUP_REPLACE))
-		return strbuf_addf_ret(err, -1, _("missing object %s for %s"),
-				       oid_to_hex(&oi->oid), ref->refname);
+		return strbuf_addf_ret(err, 1, _("%s missing"),
+				       oid_to_hex(&oi->oid));
 	if (oi->info.disk_sizep && oi->disk_size < 0)
 		BUG("Object size is less than zero.");
 
diff --git a/t/t6301-for-each-ref-errors.sh b/t/t6301-for-each-ref-errors.sh
index 40edf9dab53..3553f84a00c 100755
--- a/t/t6301-for-each-ref-errors.sh
+++ b/t/t6301-for-each-ref-errors.sh
@@ -41,7 +41,7 @@ test_expect_success 'Missing objects are reported correctly' '
 	r=refs/heads/missing &&
 	echo $MISSING >.git/$r &&
 	test_when_finished "rm -f .git/$r" &&
-	echo "fatal: missing object $MISSING for $r" >missing-err &&
+	echo "fatal: $MISSING missing" >missing-err &&
 	test_must_fail git for-each-ref 2>err &&
 	test_cmp missing-err err &&
 	(
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 10/15] [GSOC] cat-file: add has_object_file() check
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                           ` (8 preceding siblings ...)
  2021-06-25 16:02         ` [PATCH v5 09/15] [GSOC] ref-filter: modify the error message and value in get_object ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02         ` ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 11/15] [GSOC] cat-file: change batch_objects parameter name ZheNing Hu via GitGitGadget
                           ` (5 subsequent siblings)
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Use `has_object_file()` in `batch_one_object()` to check
whether the input object exists. This can help us reject
the missing oid when we let `cat-file --batch` use ref-filter
logic later.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/cat-file.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 5ebf13359e8..9fd3c04ff20 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -428,6 +428,13 @@ static void batch_one_object(const char *obj_name,
 		return;
 	}
 
+	if (!has_object_file(&data->oid)) {
+		printf("%s missing\n",
+		       obj_name ? obj_name : oid_to_hex(&data->oid));
+		fflush(stdout);
+		return;
+	}
+
 	batch_object_write(obj_name, scratch, opt, data);
 }
 
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 11/15] [GSOC] cat-file: change batch_objects parameter name
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                           ` (9 preceding siblings ...)
  2021-06-25 16:02         ` [PATCH v5 10/15] [GSOC] cat-file: add has_object_file() check ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02         ` ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 12/15] [GSOC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                           ` (4 subsequent siblings)
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Because later cat-file reuses ref-filter logic that will add
parameter "const struct option *options" to batch_objects(),
the two synonymous parameters of "opt" and "options" may
confuse readers, so change batch_options parameter of
batch_objects() from "opt" to "batch".

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/cat-file.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 9fd3c04ff20..cd84c39df96 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -495,7 +495,7 @@ static int batch_unordered_packed(const struct object_id *oid,
 	return batch_unordered_object(oid, data);
 }
 
-static int batch_objects(struct batch_options *opt)
+static int batch_objects(struct batch_options *batch)
 {
 	struct strbuf input = STRBUF_INIT;
 	struct strbuf output = STRBUF_INIT;
@@ -503,8 +503,8 @@ static int batch_objects(struct batch_options *opt)
 	int save_warning;
 	int retval = 0;
 
-	if (!opt->format)
-		opt->format = "%(objectname) %(objecttype) %(objectsize)";
+	if (!batch->format)
+		batch->format = "%(objectname) %(objecttype) %(objectsize)";
 
 	/*
 	 * Expand once with our special mark_query flag, which will prime the
@@ -513,13 +513,13 @@ static int batch_objects(struct batch_options *opt)
 	 */
 	memset(&data, 0, sizeof(data));
 	data.mark_query = 1;
-	strbuf_expand(&output, opt->format, expand_format, &data);
+	strbuf_expand(&output, batch->format, expand_format, &data);
 	data.mark_query = 0;
 	strbuf_release(&output);
-	if (opt->cmdmode)
+	if (batch->cmdmode)
 		data.split_on_whitespace = 1;
 
-	if (opt->all_objects) {
+	if (batch->all_objects) {
 		struct object_info empty = OBJECT_INFO_INIT;
 		if (!memcmp(&data.info, &empty, sizeof(empty)))
 			data.skip_object_info = 1;
@@ -529,20 +529,20 @@ static int batch_objects(struct batch_options *opt)
 	 * If we are printing out the object, then always fill in the type,
 	 * since we will want to decide whether or not to stream.
 	 */
-	if (opt->print_contents)
+	if (batch->print_contents)
 		data.info.typep = &data.type;
 
-	if (opt->all_objects) {
+	if (batch->all_objects) {
 		struct object_cb_data cb;
 
 		if (has_promisor_remote())
 			warning("This repository uses promisor remotes. Some objects may not be loaded.");
 
-		cb.opt = opt;
+		cb.opt = batch;
 		cb.expand = &data;
 		cb.scratch = &output;
 
-		if (opt->unordered) {
+		if (batch->unordered) {
 			struct oidset seen = OIDSET_INIT;
 
 			cb.seen = &seen;
@@ -592,7 +592,7 @@ static int batch_objects(struct batch_options *opt)
 			data.rest = p;
 		}
 
-		batch_one_object(input.buf, &output, opt, &data);
+		batch_one_object(input.buf, &output, batch, &data);
 	}
 
 	strbuf_release(&input);
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 12/15] [GSOC] cat-file: reuse ref-filter logic
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                           ` (10 preceding siblings ...)
  2021-06-25 16:02         ` [PATCH v5 11/15] [GSOC] cat-file: change batch_objects parameter name ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02         ` ZheNing Hu via GitGitGadget
  2021-06-26 17:26           ` Hariom verma
  2021-06-26 18:08           ` Hariom verma
  2021-06-25 16:02         ` [PATCH v5 13/15] [GSOC] cat-file: reuse err buf in batch_object_write() ZheNing Hu via GitGitGadget
                           ` (3 subsequent siblings)
  15 siblings, 2 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

In order to let cat-file use ref-filter logic, let's do the
following:

1. Change the type of member `format` in struct `batch_options`
to `ref_format`, we will pass it to ref-filter later.
2. Let `batch_objects()` add atoms to format, and use
`verify_ref_format()` to check atoms.
3. Use `format_ref_array_item()` in `batch_object_write()` to
get the formatted data corresponding to the object. If the
return value of `format_ref_array_item()` is equals to zero,
use `batch_write()` to print object data; else if the return
value is less than zero, use `die()` to print the error message
and exit; else if return value is greater than zero, only print
the error message, but don't exit.
4. Use free_ref_array_item_value() to free ref_array_item's
value.

Most of the atoms in `for-each-ref --format` are now supported,
such as `%(tree)`, `%(parent)`, `%(author)`, `%(tagger)`, `%(if)`,
`%(then)`, `%(else)`, `%(end)`. But these atoms will be rejected:
`%(refname)`, `%(symref)`, `%(upstream)`, `%(push)`, `%(worktreepath)`,
`%(flag)`, `%(HEAD)`, because our objects don't have a refname.

The performance for `git cat-file --batch-all-objects
--batch-check` on the Git repository itself with performance
testing tool `hyperfine` changes from 669.4 ms ±  31.1 ms to
1.134 s ±  0.063 s.

The performance for `git cat-file --batch-all-objects --batch
>/dev/null` on the Git repository itself with performance testing
tool `time` change from "27.37s user 0.29s system 98% cpu 28.089
total" to "33.69s user 1.54s system 87% cpu 40.258 total".

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-cat-file.txt |   6 +
 builtin/cat-file.c             | 244 ++++++-------------------------
 t/t1006-cat-file.sh            | 252 +++++++++++++++++++++++++++++++++
 3 files changed, 305 insertions(+), 197 deletions(-)

diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index 4eb0421b3fd..ef8ab952b2f 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -226,6 +226,12 @@ newline. The available atoms are:
 	after that first run of whitespace (i.e., the "rest" of the
 	line) are output in place of the `%(rest)` atom.
 
+Note that most of the atoms in `for-each-ref --format` are now supported,
+such as `%(tree)`, `%(parent)`, `%(author)`, `%(tagger)`, `%(if)`,
+`%(then)`, `%(else)`, `%(end)`. But these atoms will be rejected:
+`%(refname)`, `%(symref)`, `%(upstream)`, `%(push)`, `%(worktreepath)`,
+`%(flag)`, `%(HEAD)`. See linkgit:git-for-each-ref[1].
+
 If no format is specified, the default format is `%(objectname)
 %(objecttype) %(objectsize)`.
 
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index cd84c39df96..0e7ad038e5f 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -16,6 +16,7 @@
 #include "packfile.h"
 #include "object-store.h"
 #include "promisor-remote.h"
+#include "ref-filter.h"
 
 struct batch_options {
 	int enabled;
@@ -25,7 +26,7 @@ struct batch_options {
 	int all_objects;
 	int unordered;
 	int cmdmode; /* may be 'w' or 'c' for --filters or --textconv */
-	const char *format;
+	struct ref_format format;
 };
 
 static const char *force_path;
@@ -195,99 +196,10 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
 
 struct expand_data {
 	struct object_id oid;
-	enum object_type type;
-	unsigned long size;
-	off_t disk_size;
 	const char *rest;
-	struct object_id delta_base_oid;
-
-	/*
-	 * If mark_query is true, we do not expand anything, but rather
-	 * just mark the object_info with items we wish to query.
-	 */
-	int mark_query;
-
-	/*
-	 * Whether to split the input on whitespace before feeding it to
-	 * get_sha1; this is decided during the mark_query phase based on
-	 * whether we have a %(rest) token in our format.
-	 */
 	int split_on_whitespace;
-
-	/*
-	 * After a mark_query run, this object_info is set up to be
-	 * passed to oid_object_info_extended. It will point to the data
-	 * elements above, so you can retrieve the response from there.
-	 */
-	struct object_info info;
-
-	/*
-	 * This flag will be true if the requested batch format and options
-	 * don't require us to call oid_object_info, which can then be
-	 * optimized out.
-	 */
-	unsigned skip_object_info : 1;
 };
 
-static int is_atom(const char *atom, const char *s, int slen)
-{
-	int alen = strlen(atom);
-	return alen == slen && !memcmp(atom, s, alen);
-}
-
-static void expand_atom(struct strbuf *sb, const char *atom, int len,
-			void *vdata)
-{
-	struct expand_data *data = vdata;
-
-	if (is_atom("objectname", atom, len)) {
-		if (!data->mark_query)
-			strbuf_addstr(sb, oid_to_hex(&data->oid));
-	} else if (is_atom("objecttype", atom, len)) {
-		if (data->mark_query)
-			data->info.typep = &data->type;
-		else
-			strbuf_addstr(sb, type_name(data->type));
-	} else if (is_atom("objectsize", atom, len)) {
-		if (data->mark_query)
-			data->info.sizep = &data->size;
-		else
-			strbuf_addf(sb, "%"PRIuMAX , (uintmax_t)data->size);
-	} else if (is_atom("objectsize:disk", atom, len)) {
-		if (data->mark_query)
-			data->info.disk_sizep = &data->disk_size;
-		else
-			strbuf_addf(sb, "%"PRIuMAX, (uintmax_t)data->disk_size);
-	} else if (is_atom("rest", atom, len)) {
-		if (data->mark_query)
-			data->split_on_whitespace = 1;
-		else if (data->rest)
-			strbuf_addstr(sb, data->rest);
-	} else if (is_atom("deltabase", atom, len)) {
-		if (data->mark_query)
-			data->info.delta_base_oid = &data->delta_base_oid;
-		else
-			strbuf_addstr(sb,
-				      oid_to_hex(&data->delta_base_oid));
-	} else
-		die("unknown format element: %.*s", len, atom);
-}
-
-static size_t expand_format(struct strbuf *sb, const char *start, void *data)
-{
-	const char *end;
-
-	if (*start != '(')
-		return 0;
-	end = strchr(start + 1, ')');
-	if (!end)
-		die("format element '%s' does not end in ')'", start);
-
-	expand_atom(sb, start + 1, end - start - 1, data);
-
-	return end - start + 1;
-}
-
 static void batch_write(struct batch_options *opt, const void *data, int len)
 {
 	if (opt->buffer_output) {
@@ -297,87 +209,34 @@ static void batch_write(struct batch_options *opt, const void *data, int len)
 		write_or_die(1, data, len);
 }
 
-static void print_object_or_die(struct batch_options *opt, struct expand_data *data)
-{
-	const struct object_id *oid = &data->oid;
-
-	assert(data->info.typep);
-
-	if (data->type == OBJ_BLOB) {
-		if (opt->buffer_output)
-			fflush(stdout);
-		if (opt->cmdmode) {
-			char *contents;
-			unsigned long size;
-
-			if (!data->rest)
-				die("missing path for '%s'", oid_to_hex(oid));
-
-			if (opt->cmdmode == 'w') {
-				if (filter_object(data->rest, 0100644, oid,
-						  &contents, &size))
-					die("could not convert '%s' %s",
-					    oid_to_hex(oid), data->rest);
-			} else if (opt->cmdmode == 'c') {
-				enum object_type type;
-				if (!textconv_object(the_repository,
-						     data->rest, 0100644, oid,
-						     1, &contents, &size))
-					contents = read_object_file(oid,
-								    &type,
-								    &size);
-				if (!contents)
-					die("could not convert '%s' %s",
-					    oid_to_hex(oid), data->rest);
-			} else
-				BUG("invalid cmdmode: %c", opt->cmdmode);
-			batch_write(opt, contents, size);
-			free(contents);
-		} else {
-			stream_blob(oid);
-		}
-	}
-	else {
-		enum object_type type;
-		unsigned long size;
-		void *contents;
-
-		contents = read_object_file(oid, &type, &size);
-		if (!contents)
-			die("object %s disappeared", oid_to_hex(oid));
-		if (type != data->type)
-			die("object %s changed type!?", oid_to_hex(oid));
-		if (data->info.sizep && size != data->size)
-			die("object %s changed size!?", oid_to_hex(oid));
-
-		batch_write(opt, contents, size);
-		free(contents);
-	}
-}
 
 static void batch_object_write(const char *obj_name,
 			       struct strbuf *scratch,
 			       struct batch_options *opt,
 			       struct expand_data *data)
 {
-	if (!data->skip_object_info &&
-	    oid_object_info_extended(the_repository, &data->oid, &data->info,
-				     OBJECT_INFO_LOOKUP_REPLACE) < 0) {
-		printf("%s missing\n",
-		       obj_name ? obj_name : oid_to_hex(&data->oid));
-		fflush(stdout);
-		return;
-	}
+	int ret = 0;
+	struct strbuf err = STRBUF_INIT;
+	struct ref_array_item item = { data->oid, data->rest };
 
 	strbuf_reset(scratch);
-	strbuf_expand(scratch, opt->format, expand_format, data);
-	strbuf_addch(scratch, '\n');
-	batch_write(opt, scratch->buf, scratch->len);
 
-	if (opt->print_contents) {
-		print_object_or_die(opt, data);
-		batch_write(opt, "\n", 1);
+	ret = format_ref_array_item(&item, &opt->format, scratch, &err);
+	if (ret < 0) {
+		die("%s\n", err.buf);
+	} if (ret) {
+		/* ret > 0 means when the object corresponding to oid
+		 * cannot be found in format_ref_array_item(), we only print
+		 * the error message.
+		 */
+		printf("%s\n", err.buf);
+		fflush(stdout);
+	} else {
+		strbuf_addch(scratch, '\n');
+		batch_write(opt, scratch->buf, scratch->len);
 	}
+	free_ref_array_item_value(&item);
+	strbuf_release(&err);
 }
 
 static void batch_one_object(const char *obj_name,
@@ -495,42 +354,34 @@ static int batch_unordered_packed(const struct object_id *oid,
 	return batch_unordered_object(oid, data);
 }
 
-static int batch_objects(struct batch_options *batch)
+static const char * const cat_file_usage[] = {
+	N_("git cat-file (-t [--allow-unknown-type] | -s [--allow-unknown-type] | -e | -p | <type> | --textconv | --filters) [--path=<path>] <object>"),
+	N_("git cat-file (--batch[=<format>] | --batch-check[=<format>]) [--follow-symlinks] [--textconv | --filters]"),
+	NULL
+};
+
+static int batch_objects(struct batch_options *batch, const struct option *options)
 {
 	struct strbuf input = STRBUF_INIT;
 	struct strbuf output = STRBUF_INIT;
+	struct strbuf format = STRBUF_INIT;
 	struct expand_data data;
 	int save_warning;
 	int retval = 0;
 
-	if (!batch->format)
-		batch->format = "%(objectname) %(objecttype) %(objectsize)";
-
-	/*
-	 * Expand once with our special mark_query flag, which will prime the
-	 * object_info to be handed to oid_object_info_extended for each
-	 * object.
-	 */
 	memset(&data, 0, sizeof(data));
-	data.mark_query = 1;
-	strbuf_expand(&output, batch->format, expand_format, &data);
-	data.mark_query = 0;
-	strbuf_release(&output);
-	if (batch->cmdmode)
-		data.split_on_whitespace = 1;
-
-	if (batch->all_objects) {
-		struct object_info empty = OBJECT_INFO_INIT;
-		if (!memcmp(&data.info, &empty, sizeof(empty)))
-			data.skip_object_info = 1;
-	}
-
-	/*
-	 * If we are printing out the object, then always fill in the type,
-	 * since we will want to decide whether or not to stream.
-	 */
+	if (batch->format.format)
+		strbuf_addstr(&format, batch->format.format);
+	else
+		strbuf_addstr(&format, "%(objectname) %(objecttype) %(objectsize)");
 	if (batch->print_contents)
-		data.info.typep = &data.type;
+		strbuf_addstr(&format, "\n%(raw)");
+	batch->format.format = format.buf;
+	if (verify_ref_format(&batch->format))
+		usage_with_options(cat_file_usage, options);
+
+	if (batch->cmdmode || batch->format.use_rest)
+		data.split_on_whitespace = 1;
 
 	if (batch->all_objects) {
 		struct object_cb_data cb;
@@ -563,6 +414,7 @@ static int batch_objects(struct batch_options *batch)
 			oid_array_clear(&sa);
 		}
 
+		strbuf_release(&format);
 		strbuf_release(&output);
 		return 0;
 	}
@@ -595,18 +447,13 @@ static int batch_objects(struct batch_options *batch)
 		batch_one_object(input.buf, &output, batch, &data);
 	}
 
+	strbuf_release(&format);
 	strbuf_release(&input);
 	strbuf_release(&output);
 	warn_on_object_refname_ambiguity = save_warning;
 	return retval;
 }
 
-static const char * const cat_file_usage[] = {
-	N_("git cat-file (-t [--allow-unknown-type] | -s [--allow-unknown-type] | -e | -p | <type> | --textconv | --filters) [--path=<path>] <object>"),
-	N_("git cat-file (--batch[=<format>] | --batch-check[=<format>]) [--follow-symlinks] [--textconv | --filters]"),
-	NULL
-};
-
 static int git_cat_file_config(const char *var, const char *value, void *cb)
 {
 	if (userdiff_config(var, value) < 0)
@@ -629,7 +476,7 @@ static int batch_option_callback(const struct option *opt,
 
 	bo->enabled = 1;
 	bo->print_contents = !strcmp(opt->long_name, "batch");
-	bo->format = arg;
+	bo->format.format = arg;
 
 	return 0;
 }
@@ -638,7 +485,9 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 {
 	int opt = 0;
 	const char *exp_type = NULL, *obj_name = NULL;
-	struct batch_options batch = {0};
+	struct batch_options batch = {
+		.format = REF_FORMAT_INIT
+	};
 	int unknown_type = 0;
 
 	const struct option options[] = {
@@ -677,6 +526,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 	git_config(git_cat_file_config, NULL);
 
 	batch.buffer_output = -1;
+	batch.format.cat_file_mode = 1;
 	argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
 
 	if (opt) {
@@ -720,7 +570,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		batch.buffer_output = batch.all_objects;
 
 	if (batch.enabled)
-		return batch_objects(&batch);
+		return batch_objects(&batch, options);
 
 	if (unknown_type && opt != 't' && opt != 's')
 		die("git cat-file --allow-unknown-type: use with -s or -t");
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 5d2dc99b74a..69eb627774d 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -586,4 +586,256 @@ test_expect_success 'cat-file --unordered works' '
 	test_cmp expect actual
 '
 
+. "$TEST_DIRECTORY"/lib-gpg.sh
+. "$TEST_DIRECTORY"/lib-terminal.sh
+
+test_expect_success 'cat-file --batch|--batch-check setup' '
+	echo 1>blob1 &&
+	printf "a\0b\0\c" >blob2 &&
+	git add blob1 blob2 &&
+	git commit -m "Commit Message" &&
+	git branch -M main &&
+	git tag -a -m "v0.0.0" testtag &&
+	git update-ref refs/myblobs/blob1 HEAD:blob1 &&
+	git update-ref refs/myblobs/blob2 HEAD:blob2 &&
+	git update-ref refs/mytrees/tree1 HEAD^{tree}
+'
+
+batch_test_atom() {
+	if test "$3" = "fail"
+	then
+		test_expect_${4:-success} $PREREQ "basic atom: $1 $2 must fail" "
+			test_must_fail git cat-file --batch-check='$2' >bad <<-EOF
+			$1
+			EOF
+		"
+	else
+		test_expect_${4:-success} $PREREQ "basic atom: $1 $2" "
+			git for-each-ref --format='$2' $1 >expected &&
+			git cat-file --batch-check='$2' >actual <<-EOF &&
+			$1
+			EOF
+			sanitize_pgp <actual >actual.clean &&
+			cmp expected actual.clean
+		"
+	fi
+}
+
+batch_test_atom refs/heads/main '%(refname)' fail
+batch_test_atom refs/heads/main '%(refname:)' fail
+batch_test_atom refs/heads/main '%(refname:short)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=1)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=2)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=-1)' fail
+batch_test_atom refs/heads/main '%(refname:lstrip=-2)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=1)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=2)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=-1)' fail
+batch_test_atom refs/heads/main '%(refname:rstrip=-2)' fail
+batch_test_atom refs/heads/main '%(refname:strip=1)' fail
+batch_test_atom refs/heads/main '%(refname:strip=2)' fail
+batch_test_atom refs/heads/main '%(refname:strip=-1)' fail
+batch_test_atom refs/heads/main '%(refname:strip=-2)' fail
+batch_test_atom refs/heads/main '%(upstream)' fail
+batch_test_atom refs/heads/main '%(upstream:short)' fail
+batch_test_atom refs/heads/main '%(upstream:lstrip=2)' fail
+batch_test_atom refs/heads/main '%(upstream:lstrip=-2)' fail
+batch_test_atom refs/heads/main '%(upstream:rstrip=2)' fail
+batch_test_atom refs/heads/main '%(upstream:rstrip=-2)' fail
+batch_test_atom refs/heads/main '%(upstream:strip=2)' fail
+batch_test_atom refs/heads/main '%(upstream:strip=-2)' fail
+batch_test_atom refs/heads/main '%(push)' fail
+batch_test_atom refs/heads/main '%(push:short)' fail
+batch_test_atom refs/heads/main '%(push:lstrip=1)' fail
+batch_test_atom refs/heads/main '%(push:lstrip=-1)' fail
+batch_test_atom refs/heads/main '%(push:rstrip=1)' fail
+batch_test_atom refs/heads/main '%(push:rstrip=-1)' fail
+batch_test_atom refs/heads/main '%(push:strip=1)' fail
+batch_test_atom refs/heads/main '%(push:strip=-1)' fail
+batch_test_atom refs/heads/main '%(objecttype)'
+batch_test_atom refs/heads/main '%(objectsize)'
+batch_test_atom refs/heads/main '%(objectsize:disk)'
+batch_test_atom refs/heads/main '%(deltabase)'
+batch_test_atom refs/heads/main '%(objectname)'
+batch_test_atom refs/heads/main '%(objectname:short)'
+batch_test_atom refs/heads/main '%(objectname:short=1)'
+batch_test_atom refs/heads/main '%(objectname:short=10)'
+batch_test_atom refs/heads/main '%(tree)'
+batch_test_atom refs/heads/main '%(tree:short)'
+batch_test_atom refs/heads/main '%(tree:short=1)'
+batch_test_atom refs/heads/main '%(tree:short=10)'
+batch_test_atom refs/heads/main '%(parent)'
+batch_test_atom refs/heads/main '%(parent:short)'
+batch_test_atom refs/heads/main '%(parent:short=1)'
+batch_test_atom refs/heads/main '%(parent:short=10)'
+batch_test_atom refs/heads/main '%(numparent)'
+batch_test_atom refs/heads/main '%(object)'
+batch_test_atom refs/heads/main '%(type)'
+batch_test_atom refs/heads/main '%(raw)'
+batch_test_atom refs/heads/main '%(*objectname)'
+batch_test_atom refs/heads/main '%(*objecttype)'
+batch_test_atom refs/heads/main '%(author)'
+batch_test_atom refs/heads/main '%(authorname)'
+batch_test_atom refs/heads/main '%(authoremail)'
+batch_test_atom refs/heads/main '%(authoremail:trim)'
+batch_test_atom refs/heads/main '%(authoremail:localpart)'
+batch_test_atom refs/heads/main '%(authordate)'
+batch_test_atom refs/heads/main '%(committer)'
+batch_test_atom refs/heads/main '%(committername)'
+batch_test_atom refs/heads/main '%(committeremail)'
+batch_test_atom refs/heads/main '%(committeremail:trim)'
+batch_test_atom refs/heads/main '%(committeremail:localpart)'
+batch_test_atom refs/heads/main '%(committerdate)'
+batch_test_atom refs/heads/main '%(tag)'
+batch_test_atom refs/heads/main '%(tagger)'
+batch_test_atom refs/heads/main '%(taggername)'
+batch_test_atom refs/heads/main '%(taggeremail)'
+batch_test_atom refs/heads/main '%(taggeremail:trim)'
+batch_test_atom refs/heads/main '%(taggeremail:localpart)'
+batch_test_atom refs/heads/main '%(taggerdate)'
+batch_test_atom refs/heads/main '%(creator)'
+batch_test_atom refs/heads/main '%(creatordate)'
+batch_test_atom refs/heads/main '%(subject)'
+batch_test_atom refs/heads/main '%(subject:sanitize)'
+batch_test_atom refs/heads/main '%(contents:subject)'
+batch_test_atom refs/heads/main '%(body)'
+batch_test_atom refs/heads/main '%(contents:body)'
+batch_test_atom refs/heads/main '%(contents:signature)'
+batch_test_atom refs/heads/main '%(contents)'
+batch_test_atom refs/heads/main '%(HEAD)' fail
+batch_test_atom refs/heads/main '%(upstream:track)' fail
+batch_test_atom refs/heads/main '%(upstream:trackshort)' fail
+batch_test_atom refs/heads/main '%(upstream:track,nobracket)' fail
+batch_test_atom refs/heads/main '%(upstream:nobracket,track)' fail
+batch_test_atom refs/heads/main '%(push:track)' fail
+batch_test_atom refs/heads/main '%(push:trackshort)' fail
+batch_test_atom refs/heads/main '%(worktreepath)' fail
+batch_test_atom refs/heads/main '%(symref)' fail
+batch_test_atom refs/heads/main '%(flag)' fail
+
+batch_test_atom refs/tags/testtag '%(refname)' fail
+batch_test_atom refs/tags/testtag '%(refname:short)' fail
+batch_test_atom refs/tags/testtag '%(upstream)' fail
+batch_test_atom refs/tags/testtag '%(push)' fail
+batch_test_atom refs/tags/testtag '%(objecttype)'
+batch_test_atom refs/tags/testtag '%(objectsize)'
+batch_test_atom refs/tags/testtag '%(objectsize:disk)'
+batch_test_atom refs/tags/testtag '%(*objectsize:disk)'
+batch_test_atom refs/tags/testtag '%(deltabase)'
+batch_test_atom refs/tags/testtag '%(*deltabase)'
+batch_test_atom refs/tags/testtag '%(objectname)'
+batch_test_atom refs/tags/testtag '%(objectname:short)'
+batch_test_atom refs/tags/testtag '%(tree)'
+batch_test_atom refs/tags/testtag '%(tree:short)'
+batch_test_atom refs/tags/testtag '%(tree:short=1)'
+batch_test_atom refs/tags/testtag '%(tree:short=10)'
+batch_test_atom refs/tags/testtag '%(parent)'
+batch_test_atom refs/tags/testtag '%(parent:short)'
+batch_test_atom refs/tags/testtag '%(parent:short=1)'
+batch_test_atom refs/tags/testtag '%(parent:short=10)'
+batch_test_atom refs/tags/testtag '%(numparent)'
+batch_test_atom refs/tags/testtag '%(object)'
+batch_test_atom refs/tags/testtag '%(type)'
+batch_test_atom refs/tags/testtag '%(*objectname)'
+batch_test_atom refs/tags/testtag '%(*objecttype)'
+batch_test_atom refs/tags/testtag '%(author)'
+batch_test_atom refs/tags/testtag '%(authorname)'
+batch_test_atom refs/tags/testtag '%(authoremail)'
+batch_test_atom refs/tags/testtag '%(authoremail:trim)'
+batch_test_atom refs/tags/testtag '%(authoremail:localpart)'
+batch_test_atom refs/tags/testtag '%(authordate)'
+batch_test_atom refs/tags/testtag '%(committer)'
+batch_test_atom refs/tags/testtag '%(committername)'
+batch_test_atom refs/tags/testtag '%(committeremail)'
+batch_test_atom refs/tags/testtag '%(committeremail:trim)'
+batch_test_atom refs/tags/testtag '%(committeremail:localpart)'
+batch_test_atom refs/tags/testtag '%(committerdate)'
+batch_test_atom refs/tags/testtag '%(tag)'
+batch_test_atom refs/tags/testtag '%(tagger)'
+batch_test_atom refs/tags/testtag '%(taggername)'
+batch_test_atom refs/tags/testtag '%(taggeremail)'
+batch_test_atom refs/tags/testtag '%(taggeremail:trim)'
+batch_test_atom refs/tags/testtag '%(taggeremail:localpart)'
+batch_test_atom refs/tags/testtag '%(taggerdate)'
+batch_test_atom refs/tags/testtag '%(creator)'
+batch_test_atom refs/tags/testtag '%(creatordate)'
+batch_test_atom refs/tags/testtag '%(subject)'
+batch_test_atom refs/tags/testtag '%(subject:sanitize)'
+batch_test_atom refs/tags/testtag '%(contents:subject)'
+batch_test_atom refs/tags/testtag '%(body)'
+batch_test_atom refs/tags/testtag '%(contents:body)'
+batch_test_atom refs/tags/testtag '%(contents:signature)'
+batch_test_atom refs/tags/testtag '%(contents)'
+batch_test_atom refs/tags/testtag '%(HEAD)' fail
+
+batch_test_atom refs/myblobs/blob1 '%(refname)' fail
+batch_test_atom refs/myblobs/blob1 '%(upstream)' fail
+batch_test_atom refs/myblobs/blob1 '%(push)' fail
+batch_test_atom refs/myblobs/blob1 '%(HEAD)' fail
+
+batch_test_atom refs/myblobs/blob1 '%(objectname)'
+batch_test_atom refs/myblobs/blob1 '%(objecttype)'
+batch_test_atom refs/myblobs/blob1 '%(objectsize)'
+batch_test_atom refs/myblobs/blob1 '%(objectsize:disk)'
+batch_test_atom refs/myblobs/blob1 '%(deltabase)'
+
+batch_test_atom refs/myblobs/blob1 '%(contents)'
+batch_test_atom refs/myblobs/blob2 '%(contents)'
+
+batch_test_atom refs/myblobs/blob1 '%(raw)'
+batch_test_atom refs/myblobs/blob2 '%(raw)'
+batch_test_atom refs/mytrees/tree1 '%(raw)'
+
+batch_test_atom refs/myblobs/blob1 '%(raw:size)'
+batch_test_atom refs/myblobs/blob2 '%(raw:size)'
+batch_test_atom refs/mytrees/tree1 '%(raw:size)'
+
+batch_test_atom refs/myblobs/blob1 '%(if:equals=blob)%(objecttype)%(then)commit%(else)not commit%(end)'
+batch_test_atom refs/myblobs/blob2 '%(if:equals=blob)%(objecttype)%(then)commit%(else)not commit%(end)'
+batch_test_atom refs/mytrees/tree1 '%(if:equals=tree)%(objecttype)%(then)tree%(else)not tree%(end)'
+
+batch_test_atom refs/heads/main '%(align:60) objectname is %(objectname)%(end)|%(objectname)'
+batch_test_atom refs/heads/main '%(align:left,60) objectname is %(objectname)%(end)|%(objectname)'
+batch_test_atom refs/heads/main '%(align:middle,60) objectname is %(objectname)%(end)|%(objectname)'
+batch_test_atom refs/heads/main '%(align:60,right) objectname is %(objectname)%(end)|%(objectname)'
+
+batch_test_atom refs/heads/main 'VALID'
+batch_test_atom refs/heads/main '%(INVALID)' fail
+batch_test_atom refs/heads/main '%(authordate:INVALID)' fail
+
+test_expect_success '%(rest) works with both a branch and a tag' '
+	cat >expected <<-EOF &&
+	123 commit 123
+	456 tag 456
+	EOF
+	git cat-file --batch-check="%(rest) %(objecttype) %(rest)" >actual <<-EOF &&
+	refs/heads/main 123
+	refs/tags/testtag 456
+	EOF
+	test_cmp expected actual
+'
+
+batch_test_atom refs/heads/main '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+batch_test_atom refs/tags/testtag '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+batch_test_atom refs/myblobs/blob1 '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+batch_test_atom refs/myblobs/blob2 '%(objectname) %(objecttype) %(objectsize)
+%(raw)'
+
+
+test_expect_success 'cat-file --batch equals to --batch-check with atoms' '
+	git cat-file --batch-check="%(objectname) %(objecttype) %(objectsize)
+%(raw)" >expected <<-EOF &&
+	refs/heads/main
+	refs/tags/testtag
+	EOF
+	git cat-file --batch >actual <<-EOF &&
+	refs/heads/main
+	refs/tags/testtag
+	EOF
+	cmp expected actual
+'
+
 test_done
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 13/15] [GSOC] cat-file: reuse err buf in batch_object_write()
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                           ` (11 preceding siblings ...)
  2021-06-25 16:02         ` [PATCH v5 12/15] [GSOC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02         ` ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 14/15] [GSOC] cat-file: re-implement --textconv, --filters options ZheNing Hu via GitGitGadget
                           ` (2 subsequent siblings)
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Reuse the `err` buffer in batch_object_write(), as the
buffer `scratch` does. This will reduce the overhead
of multiple allocations of memory of the err buffer.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/cat-file.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 0e7ad038e5f..27403326e7a 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -212,35 +212,36 @@ static void batch_write(struct batch_options *opt, const void *data, int len)
 
 static void batch_object_write(const char *obj_name,
 			       struct strbuf *scratch,
+			       struct strbuf *err,
 			       struct batch_options *opt,
 			       struct expand_data *data)
 {
 	int ret = 0;
-	struct strbuf err = STRBUF_INIT;
 	struct ref_array_item item = { data->oid, data->rest };
 
 	strbuf_reset(scratch);
+	strbuf_reset(err);
 
-	ret = format_ref_array_item(&item, &opt->format, scratch, &err);
+	ret = format_ref_array_item(&item, &opt->format, scratch, err);
 	if (ret < 0) {
-		die("%s\n", err.buf);
+		die("%s\n", err->buf);
 	} if (ret) {
 		/* ret > 0 means when the object corresponding to oid
 		 * cannot be found in format_ref_array_item(), we only print
 		 * the error message.
 		 */
-		printf("%s\n", err.buf);
+		printf("%s\n", err->buf);
 		fflush(stdout);
 	} else {
 		strbuf_addch(scratch, '\n');
 		batch_write(opt, scratch->buf, scratch->len);
 	}
 	free_ref_array_item_value(&item);
-	strbuf_release(&err);
 }
 
 static void batch_one_object(const char *obj_name,
 			     struct strbuf *scratch,
+			     struct strbuf *err,
 			     struct batch_options *opt,
 			     struct expand_data *data)
 {
@@ -294,7 +295,7 @@ static void batch_one_object(const char *obj_name,
 		return;
 	}
 
-	batch_object_write(obj_name, scratch, opt, data);
+	batch_object_write(obj_name, scratch, err, opt, data);
 }
 
 struct object_cb_data {
@@ -302,13 +303,14 @@ struct object_cb_data {
 	struct expand_data *expand;
 	struct oidset *seen;
 	struct strbuf *scratch;
+	struct strbuf *err;
 };
 
 static int batch_object_cb(const struct object_id *oid, void *vdata)
 {
 	struct object_cb_data *data = vdata;
 	oidcpy(&data->expand->oid, oid);
-	batch_object_write(NULL, data->scratch, data->opt, data->expand);
+	batch_object_write(NULL, data->scratch, data->err, data->opt, data->expand);
 	return 0;
 }
 
@@ -364,6 +366,7 @@ static int batch_objects(struct batch_options *batch, const struct option *optio
 {
 	struct strbuf input = STRBUF_INIT;
 	struct strbuf output = STRBUF_INIT;
+	struct strbuf err = STRBUF_INIT;
 	struct strbuf format = STRBUF_INIT;
 	struct expand_data data;
 	int save_warning;
@@ -392,6 +395,7 @@ static int batch_objects(struct batch_options *batch, const struct option *optio
 		cb.opt = batch;
 		cb.expand = &data;
 		cb.scratch = &output;
+		cb.err = &err;
 
 		if (batch->unordered) {
 			struct oidset seen = OIDSET_INIT;
@@ -416,6 +420,7 @@ static int batch_objects(struct batch_options *batch, const struct option *optio
 
 		strbuf_release(&format);
 		strbuf_release(&output);
+		strbuf_release(&err);
 		return 0;
 	}
 
@@ -444,12 +449,13 @@ static int batch_objects(struct batch_options *batch, const struct option *optio
 			data.rest = p;
 		}
 
-		batch_one_object(input.buf, &output, batch, &data);
+		batch_one_object(input.buf, &output, &err, batch, &data);
 	}
 
 	strbuf_release(&format);
 	strbuf_release(&input);
 	strbuf_release(&output);
+	strbuf_release(&err);
 	warn_on_object_refname_ambiguity = save_warning;
 	return retval;
 }
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 14/15] [GSOC] cat-file: re-implement --textconv, --filters options
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                           ` (12 preceding siblings ...)
  2021-06-25 16:02         ` [PATCH v5 13/15] [GSOC] cat-file: reuse err buf in batch_object_write() ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02         ` ZheNing Hu via GitGitGadget
  2021-06-25 16:02         ` [PATCH v5 15/15] [GSOC] ref-filter: remove grab_oid() function ZheNing Hu via GitGitGadget
  2021-06-27 12:35         ` [PATCH v6 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

After cat-file reuses the ref-filter logic, we re-implement the
functions of --textconv and --filters options.

Add members `use_textconv` and `use_filters` in struct `ref_format`,
and use global variables `use_filters` and `use_textconv` in
`ref-filter.c`, so that we can filter the content of the object
in get_object(). Use `actual_oi` to record the real expand_data:
it may point to the original `oi` or the `act_oi` processed by
`textconv_object()` or `convert_to_working_tree()`. `grab_values()`
will grab the contents of `actual_oi` and `grab_common_values()`
to grab the contents of origin `oi`, this ensures that `%(objectsize)`
still uses the size of the unfiltered data.

In `get_object()`, we made an optimization: Firstly, get the size and
type of the object instead of directly getting the object data.
If using --textconv, after successfully obtaining the filtered object
data, an extra oid_object_info_extended() will be skipped, which can
reduce the cost of object data copy; If using --filter, the data of
the object first will be getted first, and then convert_to_working_tree()
will be used to get the filtered object data.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/cat-file.c |  6 +++++
 ref-filter.c       | 59 ++++++++++++++++++++++++++++++++++++++++++++--
 ref-filter.h       |  2 ++
 3 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 27403326e7a..f3140d927f7 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -380,6 +380,12 @@ static int batch_objects(struct batch_options *batch, const struct option *optio
 	if (batch->print_contents)
 		strbuf_addstr(&format, "\n%(raw)");
 	batch->format.format = format.buf;
+
+	if (batch->cmdmode == 'c')
+		batch->format.use_textconv = 1;
+	else if (batch->cmdmode == 'w')
+		batch->format.use_filters = 1;
+
 	if (verify_ref_format(&batch->format))
 		usage_with_options(cat_file_usage, options);
 
diff --git a/ref-filter.c b/ref-filter.c
index 181d99c9273..99b87742b0f 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1,3 +1,4 @@
+#define USE_THE_INDEX_COMPATIBILITY_MACROS
 #include "builtin.h"
 #include "cache.h"
 #include "parse-options.h"
@@ -84,6 +85,9 @@ static struct expand_data {
 	struct object_info info;
 } oi, oi_deref;
 
+int use_filters;
+int use_textconv;
+
 struct ref_to_worktree_entry {
 	struct hashmap_entry ent;
 	struct worktree *wt; /* key is wt->head_ref */
@@ -1031,6 +1035,9 @@ int verify_ref_format(struct ref_format *format)
 					       used_atom[at].atom_type == ATOM_WORKTREEPATH)))
 			die(_("this command reject atom %%(%.*s)"), (int)(ep - sp - 2), sp + 2);
 
+		use_filters = format->use_filters;
+		use_textconv = format->use_textconv;
+
 		if ((format->quote_style == QUOTE_PYTHON ||
 		     format->quote_style == QUOTE_SHELL ||
 		     format->quote_style == QUOTE_TCL) &&
@@ -1742,10 +1749,38 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 {
 	/* parse_object_buffer() will set eaten to 0 if free() will be needed */
 	int eaten = 1;
+	struct expand_data *actual_oi = oi;
+	struct expand_data act_oi = {0};
+
 	if (oi->info.contentp) {
 		/* We need to know that to use parse_object_buffer properly */
+		void **temp_contentp = oi->info.contentp;
+		oi->info.contentp = NULL;
 		oi->info.sizep = &oi->size;
 		oi->info.typep = &oi->type;
+
+		/* get the type and size */
+		if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
+					OBJECT_INFO_LOOKUP_REPLACE))
+			return strbuf_addf_ret(err, 1, _("%s missing"),
+					       oid_to_hex(&oi->oid));
+
+		oi->info.sizep = NULL;
+		oi->info.typep = NULL;
+		oi->info.contentp = temp_contentp;
+
+		if (use_textconv && !ref->rest)
+			return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
+					       oid_to_hex(&act_oi.oid));
+		if (use_textconv && oi->type == OBJ_BLOB) {
+			act_oi = *oi;
+			if (textconv_object(the_repository,
+					    ref->rest, 0100644, &act_oi.oid,
+					    1, (char **)(&act_oi.content), &act_oi.size)) {
+				actual_oi = &act_oi;
+				goto success;
+			}
+		}
 	}
 	if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
 				     OBJECT_INFO_LOOKUP_REPLACE))
@@ -1755,19 +1790,39 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 		BUG("Object size is less than zero.");
 
 	if (oi->info.contentp) {
-		*obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten);
+		if (use_filters && !ref->rest)
+			return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
+					       oid_to_hex(&oi->oid));
+		if (use_filters && oi->type == OBJ_BLOB) {
+			struct strbuf strbuf = STRBUF_INIT;
+			struct checkout_metadata meta;
+			act_oi = *oi;
+
+			init_checkout_metadata(&meta, NULL, NULL, &act_oi.oid);
+			if (!convert_to_working_tree(&the_index, ref->rest, act_oi.content, act_oi.size, &strbuf, &meta))
+				die("could not convert '%s' %s",
+					oid_to_hex(&oi->oid), ref->rest);
+			act_oi.size = strbuf.len;
+			act_oi.content = strbuf_detach(&strbuf, NULL);
+			actual_oi = &act_oi;
+		}
+
+success:
+		*obj = parse_object_buffer(the_repository, &actual_oi->oid, actual_oi->type, actual_oi->size, actual_oi->content, &eaten);
 		if (!*obj) {
 			if (!eaten)
 				free(oi->content);
 			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
 					       oid_to_hex(&oi->oid), ref->refname);
 		}
-		grab_values(ref->value, deref, *obj, oi);
+		grab_values(ref->value, deref, *obj, actual_oi);
 	}
 
 	grab_common_values(ref->value, deref, oi);
 	if (!eaten)
 		free(oi->content);
+	if (actual_oi != oi)
+		free(actual_oi->content);
 	return 0;
 }
 
diff --git a/ref-filter.h b/ref-filter.h
index 053980a6a42..497e3e93632 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -80,6 +80,8 @@ struct ref_format {
 	const char *rest;
 	int cat_file_mode;
 	int quote_style;
+	int use_textconv;
+	int use_filters;
 	int use_rest;
 	int use_color;
 
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v5 15/15] [GSOC] ref-filter: remove grab_oid() function
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                           ` (13 preceding siblings ...)
  2021-06-25 16:02         ` [PATCH v5 14/15] [GSOC] cat-file: re-implement --textconv, --filters options ZheNing Hu via GitGitGadget
@ 2021-06-25 16:02         ` ZheNing Hu via GitGitGadget
  2021-06-27 12:35         ` [PATCH v6 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-25 16:02 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Because "atom_type == ATOM_OBJECTNAME" implies the condition
of `starts_with(name, "objectname")`, "atom_type == ATOM_TREE"
implies the condition of `starts_with(name, "tree")`, so the
check for `starts_with(name, field)` in grab_oid() is redundant.

So Remove the grab_oid() from ref-filter, to reduce repeated check.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 99b87742b0f..ab53c1fd22f 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1071,16 +1071,6 @@ static const char *do_grab_oid(const char *field, const struct object_id *oid,
 	}
 }
 
-static int grab_oid(const char *name, const char *field, const struct object_id *oid,
-		    struct atom_value *v, struct used_atom *atom)
-{
-	if (starts_with(name, field)) {
-		v->s = xstrdup(do_grab_oid(field, oid, atom));
-		return 1;
-	}
-	return 0;
-}
-
 /* See grab_values */
 static void grab_common_values(struct atom_value *val, int deref, struct expand_data *oi)
 {
@@ -1106,8 +1096,9 @@ static void grab_common_values(struct atom_value *val, int deref, struct expand_
 			}
 		} else if (atom_type == ATOM_DELTABASE)
 			v->s = xstrdup(oid_to_hex(&oi->delta_base_oid));
-		else if (atom_type == ATOM_OBJECTNAME && deref)
-			grab_oid(name, "objectname", &oi->oid, v, &used_atom[i]);
+		else if (atom_type == ATOM_OBJECTNAME && deref) {
+			v->s = xstrdup(do_grab_oid("objectname", &oi->oid, &used_atom[i]));
+		}
 	}
 }
 
@@ -1148,9 +1139,10 @@ static void grab_commit_values(struct atom_value *val, int deref, struct object
 			continue;
 		if (deref)
 			name++;
-		if (atom_type == ATOM_TREE &&
-		    grab_oid(name, "tree", get_commit_tree_oid(commit), v, &used_atom[i]))
+		if (atom_type == ATOM_TREE) {
+			v->s = xstrdup(do_grab_oid("tree", get_commit_tree_oid(commit), &used_atom[i]));
 			continue;
+		}
 		if (atom_type == ATOM_NUMPARENT) {
 			v->value = commit_list_count(commit->parents);
 			v->s = xstrfmt("%lu", (unsigned long)v->value);
@@ -1971,9 +1963,9 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 				v->s = xstrdup(buf + 1);
 			}
 			continue;
-		} else if (!deref && atom_type == ATOM_OBJECTNAME &&
-			   grab_oid(name, "objectname", &ref->objectname, v, atom)) {
-				continue;
+		} else if (!deref && atom_type == ATOM_OBJECTNAME) {
+			   v->s = xstrdup(do_grab_oid("objectname", &ref->objectname, atom));
+			   continue;
 		} else if (atom_type == ATOM_HEAD) {
 			if (atom->u.head && !strcmp(ref->refname, atom->u.head))
 				v->s = xstrdup("*");
-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v5 02/15] [GSOC] ref-filter: add %(raw) atom
  2021-06-25 16:02         ` [PATCH v5 02/15] [GSOC] ref-filter: add %(raw) atom ZheNing Hu via GitGitGadget
@ 2021-06-26  0:42           ` Bagas Sanjaya
  2021-06-27 11:43             ` ZheNing Hu
  0 siblings, 1 reply; 121+ messages in thread
From: Bagas Sanjaya @ 2021-06-26  0:42 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget, git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Jeff King,
	Ævar Arnfjörð Bjarmason, ZheNing Hu

On 25/06/21 23.02, ZheNing Hu via GitGitGadget wrote:
> Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
> `--tcl`, and `--perl` because if the binary raw data is passed to a
> variable in such languages, these may not support arbitrary binary data
> in their string variable type.
> 

Commit message looks OK, but...

> +Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
> +`--perl` because the host language may not support arbitrary binary data in the
> +variables of its string type.
> +

Seems like out of sync between commit message and the docs change above. 
Did you mean the (unsupported) host languages are Python, BASH script, 
TCL/TK, and Perl respectively? If so, the docs should say:

"Note that `--format=%(raw) can not be used with `--python`, `--shell`, 
`-tcl`, and `--perl` because such languages may not support arbitrary 
binary data in their string variable type."

Thanks.

-- 
An old man doll... just what I always wanted! - Clara

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v5 12/15] [GSOC] cat-file: reuse ref-filter logic
  2021-06-25 16:02         ` [PATCH v5 12/15] [GSOC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
@ 2021-06-26 17:26           ` Hariom verma
  2021-06-27 11:31             ` ZheNing Hu
  2021-06-26 18:08           ` Hariom verma
  1 sibling, 1 reply; 121+ messages in thread
From: Hariom verma @ 2021-06-26 17:26 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget
  Cc: git, Junio C Hamano, Christian Couder, Bagas Sanjaya, Jeff King,
	Ævar Arnfjörð Bjarmason, ZheNing Hu

Hi,

On Fri, Jun 25, 2021 at 9:32 PM ZheNing Hu via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: ZheNing Hu <adlternative@gmail.com>
>
> +       if (ret < 0) {
> +               die("%s\n", err.buf);
> +       } if (ret) {
> +               /* ret > 0 means when the object corresponding to oid
> +                * cannot be found in format_ref_array_item(), we only print
> +                * the error message.
> +                */
> +               printf("%s\n", err.buf);
> +               fflush(stdout);
> +       } else {
> +               strbuf_addch(scratch, '\n');
> +               batch_write(opt, scratch->buf, scratch->len);
>         }
> +       free_ref_array_item_value(&item);
> +       strbuf_release(&err);
>  }

I think you can get rid of braces in condition `ret < 0`:

```
        if (ret < 0)
                die("%s\n", err->buf);
        if (ret) {
                /* ret > 0 means when the object corresponding to oid
                 * cannot be found in format_ref_array_item(), we only print
                 * the error message.
                 */
                printf("%s\n", err->buf);
                fflush(stdout);
        } else {
                strbuf_addch(scratch, '\n');
                batch_write(opt, scratch->buf, scratch->len);
        }
```

Thanks,
Hariom.

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v5 12/15] [GSOC] cat-file: reuse ref-filter logic
  2021-06-25 16:02         ` [PATCH v5 12/15] [GSOC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  2021-06-26 17:26           ` Hariom verma
@ 2021-06-26 18:08           ` Hariom verma
  2021-06-27 11:34             ` ZheNing Hu
  1 sibling, 1 reply; 121+ messages in thread
From: Hariom verma @ 2021-06-26 18:08 UTC (permalink / raw)
  To: ZheNing Hu via GitGitGadget
  Cc: git, Junio C Hamano, Christian Couder, Bagas Sanjaya, Jeff King,
	Ævar Arnfjörð Bjarmason, ZheNing Hu

On Fri, Jun 25, 2021 at 9:32 PM ZheNing Hu via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: ZheNing Hu <adlternative@gmail.com>
>
>  static void batch_object_write(const char *obj_name,
>                                struct strbuf *scratch,
>                                struct batch_options *opt,
>                                struct expand_data *data)
>  {
> -       if (!data->skip_object_info &&
> -           oid_object_info_extended(the_repository, &data->oid, &data->info,
> -                                    OBJECT_INFO_LOOKUP_REPLACE) < 0) {
> -               printf("%s missing\n",
> -                      obj_name ? obj_name : oid_to_hex(&data->oid));
> -               fflush(stdout);
> -               return;
> -       }
> +       int ret = 0;

No need to initialize `ret` with 0. Later we are going to assign it
with the return value of `format_ref_array_item()` anyway.

> +       struct strbuf err = STRBUF_INIT;
> +       struct ref_array_item item = { data->oid, data->rest };
>
>         strbuf_reset(scratch);
> -       strbuf_expand(scratch, opt->format, expand_format, data);
> -       strbuf_addch(scratch, '\n');
> -       batch_write(opt, scratch->buf, scratch->len);
>
> -       if (opt->print_contents) {
> -               print_object_or_die(opt, data);
> -               batch_write(opt, "\n", 1);
> +       ret = format_ref_array_item(&item, &opt->format, scratch, &err);

Here.

-- 
Hariom

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v5 12/15] [GSOC] cat-file: reuse ref-filter logic
  2021-06-26 17:26           ` Hariom verma
@ 2021-06-27 11:31             ` ZheNing Hu
  0 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu @ 2021-06-27 11:31 UTC (permalink / raw)
  To: Hariom verma
  Cc: ZheNing Hu via GitGitGadget, git, Junio C Hamano,
	Christian Couder, Bagas Sanjaya, Jeff King,
	Ævar Arnfjörð Bjarmason

Hariom verma <hariom18599@gmail.com> 于2021年6月27日周日 上午1:27写道:
>
> Hi,
>
> On Fri, Jun 25, 2021 at 9:32 PM ZheNing Hu via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
> >
> > From: ZheNing Hu <adlternative@gmail.com>
> >
> > +       if (ret < 0) {
> > +               die("%s\n", err.buf);
> > +       } if (ret) {
> > +               /* ret > 0 means when the object corresponding to oid
> > +                * cannot be found in format_ref_array_item(), we only print
> > +                * the error message.
> > +                */
> > +               printf("%s\n", err.buf);
> > +               fflush(stdout);
> > +       } else {
> > +               strbuf_addch(scratch, '\n');
> > +               batch_write(opt, scratch->buf, scratch->len);
> >         }
> > +       free_ref_array_item_value(&item);
> > +       strbuf_release(&err);
> >  }
>
> I think you can get rid of braces in condition `ret < 0`:
>

Make sences. ;-)

> ```
>         if (ret < 0)
>                 die("%s\n", err->buf);
>         if (ret) {
>                 /* ret > 0 means when the object corresponding to oid
>                  * cannot be found in format_ref_array_item(), we only print
>                  * the error message.
>                  */
>                 printf("%s\n", err->buf);
>                 fflush(stdout);
>         } else {
>                 strbuf_addch(scratch, '\n');
>                 batch_write(opt, scratch->buf, scratch->len);
>         }
> ```
>
> Thanks,
> Hariom.

Thanks,
ZheNing Hu

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v5 12/15] [GSOC] cat-file: reuse ref-filter logic
  2021-06-26 18:08           ` Hariom verma
@ 2021-06-27 11:34             ` ZheNing Hu
  0 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu @ 2021-06-27 11:34 UTC (permalink / raw)
  To: Hariom verma
  Cc: ZheNing Hu via GitGitGadget, git, Junio C Hamano,
	Christian Couder, Bagas Sanjaya, Jeff King,
	Ævar Arnfjörð Bjarmason

Hariom verma <hariom18599@gmail.com> 于2021年6月27日周日 上午2:08写道:
>
> On Fri, Jun 25, 2021 at 9:32 PM ZheNing Hu via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
> >
> > From: ZheNing Hu <adlternative@gmail.com>
> >
> >  static void batch_object_write(const char *obj_name,
> >                                struct strbuf *scratch,
> >                                struct batch_options *opt,
> >                                struct expand_data *data)
> >  {
> > -       if (!data->skip_object_info &&
> > -           oid_object_info_extended(the_repository, &data->oid, &data->info,
> > -                                    OBJECT_INFO_LOOKUP_REPLACE) < 0) {
> > -               printf("%s missing\n",
> > -                      obj_name ? obj_name : oid_to_hex(&data->oid));
> > -               fflush(stdout);
> > -               return;
> > -       }
> > +       int ret = 0;
>
> No need to initialize `ret` with 0. Later we are going to assign it
> with the return value of `format_ref_array_item()` anyway.
>

I agree. It is worth noting that there are similar `int ret = 0` in
ref-filter.c,
they should be changed too.

> > +       struct strbuf err = STRBUF_INIT;
> > +       struct ref_array_item item = { data->oid, data->rest };
> >
> >         strbuf_reset(scratch);
> > -       strbuf_expand(scratch, opt->format, expand_format, data);
> > -       strbuf_addch(scratch, '\n');
> > -       batch_write(opt, scratch->buf, scratch->len);
> >
> > -       if (opt->print_contents) {
> > -               print_object_or_die(opt, data);
> > -               batch_write(opt, "\n", 1);
> > +       ret = format_ref_array_item(&item, &opt->format, scratch, &err);
>
> Here.
>
> --
> Hariom

--
ZheNing Hu

^ permalink raw reply	[flat|nested] 121+ messages in thread

* Re: [PATCH v5 02/15] [GSOC] ref-filter: add %(raw) atom
  2021-06-26  0:42           ` Bagas Sanjaya
@ 2021-06-27 11:43             ` ZheNing Hu
  0 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu @ 2021-06-27 11:43 UTC (permalink / raw)
  To: Bagas Sanjaya
  Cc: ZheNing Hu via GitGitGadget, Git List, Junio C Hamano,
	Christian Couder, Hariom Verma, Jeff King,
	Ævar Arnfjörð Bjarmason

Bagas Sanjaya <bagasdotme@gmail.com> 于2021年6月26日周六 上午8:42写道:
>
> On 25/06/21 23.02, ZheNing Hu via GitGitGadget wrote:
> > Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
> > `--tcl`, and `--perl` because if the binary raw data is passed to a
> > variable in such languages, these may not support arbitrary binary data
> > in their string variable type.
> >
>
> Commit message looks OK, but...
>
> > +Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
> > +`--perl` because the host language may not support arbitrary binary data in the
> > +variables of its string type.
> > +
>
> Seems like out of sync between commit message and the docs change above.
> Did you mean the (unsupported) host languages are Python, BASH script,
> TCL/TK, and Perl respectively? If so, the docs should say:
>

Indeed so. I will change them too.

> "Note that `--format=%(raw) can not be used with `--python`, `--shell`,
> `-tcl`, and `--perl` because such languages may not support arbitrary
> binary data in their string variable type."
>
> Thanks.
>
> --
> An old man doll... just what I always wanted! - Clara

Thanks.
--
ZheNing Hu

^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v6 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic
  2021-06-25 16:02       ` [PATCH v5 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                           ` (14 preceding siblings ...)
  2021-06-25 16:02         ` [PATCH v5 15/15] [GSOC] ref-filter: remove grab_oid() function ZheNing Hu via GitGitGadget
@ 2021-06-27 12:35         ` ZheNing Hu via GitGitGadget
  2021-06-27 12:35           ` [PATCH v6 01/15] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
                             ` (15 more replies)
  15 siblings, 16 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-27 12:35 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu

This patch series make cat-file reuse ref-filter logic.

Change from last version:

 1. Amend part of the description of git for-each-ref.txt.
 2. Modify the code style.
 3. Do not assign the 0 to the variable ret during it's initialization.

ZheNing Hu (15):
  [GSOC] ref-filter: add obj-type check in grab contents
  [GSOC] ref-filter: add %(raw) atom
  [GSOC] ref-filter: --format=%(raw) re-support --perl
  [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  [GSOC] ref-filter: add %(rest) atom
  [GSOC] ref-filter: pass get_object() return value to their callers
  [GSOC] ref-filter: introduce free_ref_array_item_value() function
  [GSOC] ref-filter: add cat_file_mode in struct ref_format
  [GSOC] ref-filter: modify the error message and value in get_object
  [GSOC] cat-file: add has_object_file() check
  [GSOC] cat-file: change batch_objects parameter name
  [GSOC] cat-file: reuse ref-filter logic
  [GSOC] cat-file: reuse err buf in batch_object_write()
  [GSOC] cat-file: re-implement --textconv, --filters options
  [GSOC] ref-filter: remove grab_oid() function

 Documentation/git-cat-file.txt     |   6 +
 Documentation/git-for-each-ref.txt |   9 +
 builtin/cat-file.c                 | 277 ++++++----------------
 builtin/tag.c                      |   2 +-
 quote.c                            |  17 ++
 quote.h                            |   1 +
 ref-filter.c                       | 357 ++++++++++++++++++++++-------
 ref-filter.h                       |  14 +-
 t/t1006-cat-file.sh                | 252 ++++++++++++++++++++
 t/t3203-branch-output.sh           |   4 +
 t/t6300-for-each-ref.sh            | 235 +++++++++++++++++++
 t/t6301-for-each-ref-errors.sh     |   2 +-
 t/t7004-tag.sh                     |   4 +
 t/t7030-verify-tag.sh              |   4 +
 14 files changed, 888 insertions(+), 296 deletions(-)


base-commit: 1197f1a46360d3ae96bd9c15908a3a6f8e562207
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-980%2Fadlternative%2Fcat-file-batch-refactor-v6
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-980/adlternative/cat-file-batch-refactor-v6
Pull-Request: https://github.com/gitgitgadget/git/pull/980

Range-diff vs v5:

  1:  f72ad9cc5e8 =  1:  f72ad9cc5e8 [GSOC] ref-filter: add obj-type check in grab contents
  2:  4e473838b9d !  2:  d9bc50c4ae6 [GSOC] ref-filter: add %(raw) atom
     @@ Commit message
      
          Mentored-by: Christian Couder <christian.couder@gmail.com>
          Mentored-by: Hariom Verma <hariom18599@gmail.com>
     +    Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
          Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
          Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
          Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
     @@ Documentation/git-for-each-ref.txt: and `date` to extract the named component.
      +	The raw data size of the object.
      +
      +Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
     -+`--perl` because the host language may not support arbitrary binary data in the
     -+variables of its string type.
     ++`--perl` because the such language may not support arbitrary binary data in their
     ++string variable type.
      +
       The message in a commit or a tag object is `contents`, from which
       `contents:<part>` can be used to extract various parts out of:
     @@ t/t6300-for-each-ref.sh: test_atom refs/myblobs/first contents:body ""
      +	printf "  " >blob7 &&
      +	>blob8 &&
      +	obj=$(git hash-object -w blob1) &&
     -+        git update-ref refs/myblobs/blob1 "$obj" &&
     ++	git update-ref refs/myblobs/blob1 "$obj" &&
      +	obj=$(git hash-object -w blob2) &&
     -+        git update-ref refs/myblobs/blob2 "$obj" &&
     ++	git update-ref refs/myblobs/blob2 "$obj" &&
      +	obj=$(git hash-object -w blob3) &&
     -+        git update-ref refs/myblobs/blob3 "$obj" &&
     ++	git update-ref refs/myblobs/blob3 "$obj" &&
      +	obj=$(git hash-object -w blob4) &&
     -+        git update-ref refs/myblobs/blob4 "$obj" &&
     ++	git update-ref refs/myblobs/blob4 "$obj" &&
      +	obj=$(git hash-object -w blob5) &&
     -+        git update-ref refs/myblobs/blob5 "$obj" &&
     ++	git update-ref refs/myblobs/blob5 "$obj" &&
      +	obj=$(git hash-object -w blob6) &&
     -+        git update-ref refs/myblobs/blob6 "$obj" &&
     ++	git update-ref refs/myblobs/blob6 "$obj" &&
      +	obj=$(git hash-object -w blob7) &&
     -+        git update-ref refs/myblobs/blob7 "$obj" &&
     ++	git update-ref refs/myblobs/blob7 "$obj" &&
      +	obj=$(git hash-object -w blob8) &&
     -+        git update-ref refs/myblobs/blob8 "$obj"
     ++	git update-ref refs/myblobs/blob8 "$obj"
      +'
      +
      +test_expect_success 'Verify sorts with raw' '
  3:  765cf08a108 !  3:  47f868f63d9 [GSOC] ref-filter: --format=%(raw) re-support --perl
     @@ Documentation/git-for-each-ref.txt: raw:size::
       	The raw data size of the object.
       
       Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
     --`--perl` because the host language may not support arbitrary binary data in the
     -+because the host language may not support arbitrary binary data in the
     - variables of its string type.
     +-`--perl` because the such language may not support arbitrary binary data in their
     ++because the such language may not support arbitrary binary data in their
     + string variable type.
       
       The message in a commit or a tag object is `contents`, from which
      
  4:  d2aeafd0ef3 =  4:  debca156470 [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  5:  1ca3a42f041 =  5:  cb0df2b8207 [GSOC] ref-filter: add %(rest) atom
  6:  67f1a3cca9a !  6:  9873354930a [GSOC] ref-filter: pass get_object() return value to their callers
     @@ ref-filter.c: static int populate_value(struct ref_array_item *ref, struct strbu
       {
       	struct object *obj;
       	int i;
     -+	int ret = 0;
     ++	int ret;
       	struct object_info empty = OBJECT_INFO_INIT;
       
       	CALLOC_ARRAY(ref->value, used_atom_cnt);
     @@ ref-filter.c: int format_ref_array_item(struct ref_array_item *info,
       {
       	const char *cp, *sp, *ep;
       	struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
     -+	int ret = 0;
     ++	int ret;
       
       	state.quote_style = format->quote_style;
       	push_stack_element(&state.stack);
  7:  2a48a48e81c =  7:  e592c21ea1d [GSOC] ref-filter: introduce free_ref_array_item_value() function
  8:  be55005be75 =  8:  b6e7757de4c [GSOC] ref-filter: add cat_file_mode in struct ref_format
  9:  937f88b7837 =  9:  85686187d49 [GSOC] ref-filter: modify the error message and value in get_object
 10:  45657499c55 = 10:  6037295ee58 [GSOC] cat-file: add has_object_file() check
 11:  bf5c0a017ad = 11:  32e1ca56389 [GSOC] cat-file: change batch_objects parameter name
 12:  370101ba65f ! 12:  9a1f0732940 [GSOC] cat-file: reuse ref-filter logic
     @@ builtin/cat-file.c: static void batch_write(struct batch_options *opt, const voi
      -		fflush(stdout);
      -		return;
      -	}
     -+	int ret = 0;
     ++	int ret;
      +	struct strbuf err = STRBUF_INIT;
      +	struct ref_array_item item = { data->oid, data->rest };
       
     @@ builtin/cat-file.c: static void batch_write(struct batch_options *opt, const voi
      -		print_object_or_die(opt, data);
      -		batch_write(opt, "\n", 1);
      +	ret = format_ref_array_item(&item, &opt->format, scratch, &err);
     -+	if (ret < 0) {
     ++	if (ret < 0)
      +		die("%s\n", err.buf);
     -+	} if (ret) {
     ++	if (ret) {
      +		/* ret > 0 means when the object corresponding to oid
      +		 * cannot be found in format_ref_array_item(), we only print
      +		 * the error message.
 13:  69eef47065d ! 13:  3fb47584924 [GSOC] cat-file: reuse err buf in batch_object_write()
     @@ builtin/cat-file.c: static void batch_write(struct batch_options *opt, const voi
       			       struct batch_options *opt,
       			       struct expand_data *data)
       {
     - 	int ret = 0;
     + 	int ret;
      -	struct strbuf err = STRBUF_INIT;
       	struct ref_array_item item = { data->oid, data->rest };
       
     @@ builtin/cat-file.c: static void batch_write(struct batch_options *opt, const voi
       
      -	ret = format_ref_array_item(&item, &opt->format, scratch, &err);
      +	ret = format_ref_array_item(&item, &opt->format, scratch, err);
     - 	if (ret < 0) {
     + 	if (ret < 0)
      -		die("%s\n", err.buf);
      +		die("%s\n", err->buf);
     - 	} if (ret) {
     + 	if (ret) {
       		/* ret > 0 means when the object corresponding to oid
       		 * cannot be found in format_ref_array_item(), we only print
       		 * the error message.
 14:  a7ac037a946 = 14:  e0b1a05e711 [GSOC] cat-file: re-implement --textconv, --filters options
 15:  843de8864a9 = 15:  891d62fd93f [GSOC] ref-filter: remove grab_oid() function

-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v6 01/15] [GSOC] ref-filter: add obj-type check in grab contents
  2021-06-27 12:35         ` [PATCH v6 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
@ 2021-06-27 12:35           ` ZheNing Hu via GitGitGadget
  2021-06-27 12:35           ` [PATCH v6 02/15] [GSOC] ref-filter: add %(raw) atom ZheNing Hu via GitGitGadget
                             ` (14 subsequent siblings)
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-27 12:35 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Only tag and commit objects use `grab_sub_body_contents()` to grab
object contents in the current codebase.  We want to teach the
function to also handle blobs and trees to get their raw data,
without parsing a blob (whose contents looks like a commit or a tag)
incorrectly as a commit or a tag.

Skip the block of code that is specific to handling commits and tags
early when the given object is of a wrong type to help later
addition to handle other types of objects in this function.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 4db0e40ff4c..5cee6512fba 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1356,11 +1356,12 @@ static void append_lines(struct strbuf *out, const char *buf, unsigned long size
 }
 
 /* See grab_values */
-static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
+static void grab_sub_body_contents(struct atom_value *val, int deref, struct expand_data *data)
 {
 	int i;
 	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
 	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
+	void *buf = data->content;
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		struct used_atom *atom = &used_atom[i];
@@ -1371,10 +1372,13 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
 			continue;
 		if (deref)
 			name++;
-		if (strcmp(name, "body") &&
-		    !starts_with(name, "subject") &&
-		    !starts_with(name, "trailers") &&
-		    !starts_with(name, "contents"))
+
+		if ((data->type != OBJ_TAG &&
+		     data->type != OBJ_COMMIT) ||
+		    (strcmp(name, "body") &&
+		     !starts_with(name, "subject") &&
+		     !starts_with(name, "trailers") &&
+		     !starts_with(name, "contents")))
 			continue;
 		if (!subpos)
 			find_subpos(buf,
@@ -1438,17 +1442,19 @@ static void fill_missing_values(struct atom_value *val)
  * pointed at by the ref itself; otherwise it is the object the
  * ref (which is a tag) refers to.
  */
-static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf)
+static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
 {
+	void *buf = data->content;
+
 	switch (obj->type) {
 	case OBJ_TAG:
 		grab_tag_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, data);
 		grab_person("tagger", val, deref, buf);
 		break;
 	case OBJ_COMMIT:
 		grab_commit_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, data);
 		grab_person("author", val, deref, buf);
 		grab_person("committer", val, deref, buf);
 		break;
@@ -1678,7 +1684,7 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
 					       oid_to_hex(&oi->oid), ref->refname);
 		}
-		grab_values(ref->value, deref, *obj, oi->content);
+		grab_values(ref->value, deref, *obj, oi);
 	}
 
 	grab_common_values(ref->value, deref, oi);
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v6 02/15] [GSOC] ref-filter: add %(raw) atom
  2021-06-27 12:35         ` [PATCH v6 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  2021-06-27 12:35           ` [PATCH v6 01/15] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
@ 2021-06-27 12:35           ` ZheNing Hu via GitGitGadget
  2021-06-28  6:49             ` Bagas Sanjaya
  2021-06-27 12:35           ` [PATCH v6 03/15] [GSOC] ref-filter: --format=%(raw) re-support --perl ZheNing Hu via GitGitGadget
                             ` (13 subsequent siblings)
  15 siblings, 1 reply; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-27 12:35 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.

The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).

E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.

Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.

Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-for-each-ref.txt |   9 ++
 ref-filter.c                       | 139 +++++++++++++++----
 t/t6300-for-each-ref.sh            | 216 +++++++++++++++++++++++++++++
 3 files changed, 337 insertions(+), 27 deletions(-)

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index 2ae2478de70..3727a5ffee7 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -235,6 +235,15 @@ and `date` to extract the named component.  For email fields (`authoremail`,
 without angle brackets, and `:localpart` to get the part before the `@` symbol
 out of the trimmed email.
 
+The raw data in an object is `raw`.
+
+raw:size::
+	The raw data size of the object.
+
+Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
+`--perl` because the such language may not support arbitrary binary data in their
+string variable type.
+
 The message in a commit or a tag object is `contents`, from which
 `contents:<part>` can be used to extract various parts out of:
 
diff --git a/ref-filter.c b/ref-filter.c
index 5cee6512fba..7822be90307 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -144,6 +144,7 @@ enum atom_type {
 	ATOM_BODY,
 	ATOM_TRAILERS,
 	ATOM_CONTENTS,
+	ATOM_RAW,
 	ATOM_UPSTREAM,
 	ATOM_PUSH,
 	ATOM_SYMREF,
@@ -189,6 +190,9 @@ static struct used_atom {
 			struct process_trailer_options trailer_opts;
 			unsigned int nlines;
 		} contents;
+		struct {
+			enum { RAW_BARE, RAW_LENGTH } option;
+		} raw_data;
 		struct {
 			cmp_status cmp_status;
 			const char *str;
@@ -426,6 +430,18 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
+static int raw_atom_parser(const struct ref_format *format, struct used_atom *atom,
+				const char *arg, struct strbuf *err)
+{
+	if (!arg)
+		atom->u.raw_data.option = RAW_BARE;
+	else if (!strcmp(arg, "size"))
+		atom->u.raw_data.option = RAW_LENGTH;
+	else
+		return strbuf_addf_ret(err, -1, _("unrecognized %%(raw) argument: %s"), arg);
+	return 0;
+}
+
 static int oid_atom_parser(const struct ref_format *format, struct used_atom *atom,
 			   const char *arg, struct strbuf *err)
 {
@@ -586,6 +602,7 @@ static struct {
 	[ATOM_BODY] = { "body", SOURCE_OBJ, FIELD_STR, body_atom_parser },
 	[ATOM_TRAILERS] = { "trailers", SOURCE_OBJ, FIELD_STR, trailers_atom_parser },
 	[ATOM_CONTENTS] = { "contents", SOURCE_OBJ, FIELD_STR, contents_atom_parser },
+	[ATOM_RAW] = { "raw", SOURCE_OBJ, FIELD_STR, raw_atom_parser },
 	[ATOM_UPSTREAM] = { "upstream", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
 	[ATOM_PUSH] = { "push", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
 	[ATOM_SYMREF] = { "symref", SOURCE_NONE, FIELD_STR, refname_atom_parser },
@@ -620,12 +637,15 @@ struct ref_formatting_state {
 
 struct atom_value {
 	const char *s;
+	size_t s_size;
 	int (*handler)(struct atom_value *atomv, struct ref_formatting_state *state,
 		       struct strbuf *err);
 	uintmax_t value; /* used for sorting when not FIELD_STR */
 	struct used_atom *atom;
 };
 
+#define ATOM_VALUE_S_SIZE_INIT (-1)
+
 /*
  * Used to parse format string and sort specifiers
  */
@@ -644,13 +664,6 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 		return strbuf_addf_ret(err, -1, _("malformed field name: %.*s"),
 				       (int)(ep-atom), atom);
 
-	/* Do we have the atom already used elsewhere? */
-	for (i = 0; i < used_atom_cnt; i++) {
-		int len = strlen(used_atom[i].name);
-		if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
-			return i;
-	}
-
 	/*
 	 * If the atom name has a colon, strip it and everything after
 	 * it off - it specifies the format for this entry, and
@@ -660,6 +673,13 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 	arg = memchr(sp, ':', ep - sp);
 	atom_len = (arg ? arg : ep) - sp;
 
+	/* Do we have the atom already used elsewhere? */
+	for (i = 0; i < used_atom_cnt; i++) {
+		int len = strlen(used_atom[i].name);
+		if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
+			return i;
+	}
+
 	/* Is the atom a valid one? */
 	for (i = 0; i < ARRAY_SIZE(valid_atom); i++) {
 		int len = strlen(valid_atom[i].name);
@@ -709,11 +729,14 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 	return at;
 }
 
-static void quote_formatting(struct strbuf *s, const char *str, int quote_style)
+static void quote_formatting(struct strbuf *s, const char *str, size_t len, int quote_style)
 {
 	switch (quote_style) {
 	case QUOTE_NONE:
-		strbuf_addstr(s, str);
+		if (len != ATOM_VALUE_S_SIZE_INIT)
+			strbuf_add(s, str, len);
+		else
+			strbuf_addstr(s, str);
 		break;
 	case QUOTE_SHELL:
 		sq_quote_buf(s, str);
@@ -740,9 +763,12 @@ static int append_atom(struct atom_value *v, struct ref_formatting_state *state,
 	 * encountered.
 	 */
 	if (!state->stack->prev)
-		quote_formatting(&state->stack->output, v->s, state->quote_style);
+		quote_formatting(&state->stack->output, v->s, v->s_size, state->quote_style);
 	else
-		strbuf_addstr(&state->stack->output, v->s);
+		if (v->s_size != ATOM_VALUE_S_SIZE_INIT)
+			strbuf_add(&state->stack->output, v->s, v->s_size);
+		else
+			strbuf_addstr(&state->stack->output, v->s);
 	return 0;
 }
 
@@ -842,21 +868,23 @@ static int if_atom_handler(struct atom_value *atomv, struct ref_formatting_state
 	return 0;
 }
 
-static int is_empty(const char *s)
+static int is_empty(struct strbuf *buf)
 {
-	while (*s != '\0') {
-		if (!isspace(*s))
-			return 0;
-		s++;
-	}
-	return 1;
-}
+	const char *cur = buf->buf;
+	const char *end = buf->buf + buf->len;
+
+	while (cur != end && (isspace(*cur)))
+		cur++;
+
+	return cur == end;
+ }
 
 static int then_atom_handler(struct atom_value *atomv, struct ref_formatting_state *state,
 			     struct strbuf *err)
 {
 	struct ref_formatting_stack *cur = state->stack;
 	struct if_then_else *if_then_else = NULL;
+	size_t str_len = 0;
 
 	if (cur->at_end == if_then_else_handler)
 		if_then_else = (struct if_then_else *)cur->at_end_data;
@@ -867,18 +895,22 @@ static int then_atom_handler(struct atom_value *atomv, struct ref_formatting_sta
 	if (if_then_else->else_atom_seen)
 		return strbuf_addf_ret(err, -1, _("format: %%(then) atom used after %%(else)"));
 	if_then_else->then_atom_seen = 1;
+	if (if_then_else->str)
+		str_len = strlen(if_then_else->str);
 	/*
 	 * If the 'equals' or 'notequals' attribute is used then
 	 * perform the required comparison. If not, only non-empty
 	 * strings satisfy the 'if' condition.
 	 */
 	if (if_then_else->cmp_status == COMPARE_EQUAL) {
-		if (!strcmp(if_then_else->str, cur->output.buf))
+		if (str_len == cur->output.len &&
+		    !memcmp(if_then_else->str, cur->output.buf, cur->output.len))
 			if_then_else->condition_satisfied = 1;
 	} else if (if_then_else->cmp_status == COMPARE_UNEQUAL) {
-		if (strcmp(if_then_else->str, cur->output.buf))
+		if (str_len != cur->output.len ||
+		    memcmp(if_then_else->str, cur->output.buf, cur->output.len))
 			if_then_else->condition_satisfied = 1;
-	} else if (cur->output.len && !is_empty(cur->output.buf))
+	} else if (cur->output.len && !is_empty(&cur->output))
 		if_then_else->condition_satisfied = 1;
 	strbuf_reset(&cur->output);
 	return 0;
@@ -924,7 +956,7 @@ static int end_atom_handler(struct atom_value *atomv, struct ref_formatting_stat
 	 * only on the topmost supporting atom.
 	 */
 	if (!current->prev->prev) {
-		quote_formatting(&s, current->output.buf, state->quote_style);
+		quote_formatting(&s, current->output.buf, current->output.len, state->quote_style);
 		strbuf_swap(&current->output, &s);
 	}
 	strbuf_release(&s);
@@ -974,6 +1006,10 @@ int verify_ref_format(struct ref_format *format)
 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
 		if (at < 0)
 			die("%s", err.buf);
+		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
+		    used_atom[at].u.raw_data.option == RAW_BARE)
+			die(_("--format=%.*s cannot be used with"
+			      "--python, --shell, --tcl, --perl"), (int)(ep - sp - 2), sp + 2);
 		cp = ep + 1;
 
 		if (skip_prefix(used_atom[at].name, "color:", &color))
@@ -1362,17 +1398,29 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, struct exp
 	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
 	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
 	void *buf = data->content;
+	unsigned long buf_size = data->size;
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		struct used_atom *atom = &used_atom[i];
 		const char *name = atom->name;
 		struct atom_value *v = &val[i];
+		enum atom_type atom_type = atom->atom_type;
 
 		if (!!deref != (*name == '*'))
 			continue;
 		if (deref)
 			name++;
 
+		if (atom_type == ATOM_RAW) {
+			if (atom->u.raw_data.option == RAW_BARE) {
+				v->s = xmemdupz(buf, buf_size);
+				v->s_size = buf_size;
+			} else if (atom->u.raw_data.option == RAW_LENGTH) {
+				v->s = xstrfmt("%"PRIuMAX, (uintmax_t)buf_size);
+			}
+			continue;
+		}
+
 		if ((data->type != OBJ_TAG &&
 		     data->type != OBJ_COMMIT) ||
 		    (strcmp(name, "body") &&
@@ -1460,9 +1508,11 @@ static void grab_values(struct atom_value *val, int deref, struct object *obj, s
 		break;
 	case OBJ_TREE:
 		/* grab_tree_values(val, deref, obj, buf, sz); */
+		grab_sub_body_contents(val, deref, data);
 		break;
 	case OBJ_BLOB:
 		/* grab_blob_values(val, deref, obj, buf, sz); */
+		grab_sub_body_contents(val, deref, data);
 		break;
 	default:
 		die("Eh?  Object of type %d?", obj->type);
@@ -1766,6 +1816,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 		const char *refname;
 		struct branch *branch = NULL;
 
+		v->s_size = ATOM_VALUE_S_SIZE_INIT;
 		v->handler = append_atom;
 		v->atom = atom;
 
@@ -2369,6 +2420,19 @@ static int compare_detached_head(struct ref_array_item *a, struct ref_array_item
 	return 0;
 }
 
+static int memcasecmp(const void *vs1, const void *vs2, size_t n)
+{
+	const char *s1 = vs1, *s2 = vs2;
+	const char *end = s1 + n;
+
+	for (; s1 < end; s1++, s2++) {
+		int diff = tolower(*s1) - tolower(*s2);
+		if (diff)
+			return diff;
+	}
+	return 0;
+}
+
 static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, struct ref_array_item *b)
 {
 	struct atom_value *va, *vb;
@@ -2389,10 +2453,30 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru
 	} else if (s->sort_flags & REF_SORTING_VERSION) {
 		cmp = versioncmp(va->s, vb->s);
 	} else if (cmp_type == FIELD_STR) {
-		int (*cmp_fn)(const char *, const char *);
-		cmp_fn = s->sort_flags & REF_SORTING_ICASE
-			? strcasecmp : strcmp;
-		cmp = cmp_fn(va->s, vb->s);
+		if (va->s_size == ATOM_VALUE_S_SIZE_INIT &&
+		    vb->s_size == ATOM_VALUE_S_SIZE_INIT) {
+			int (*cmp_fn)(const char *, const char *);
+			cmp_fn = s->sort_flags & REF_SORTING_ICASE
+				? strcasecmp : strcmp;
+			cmp = cmp_fn(va->s, vb->s);
+		} else {
+			size_t a_size = va->s_size == ATOM_VALUE_S_SIZE_INIT ?
+					strlen(va->s) : va->s_size;
+			size_t b_size = vb->s_size == ATOM_VALUE_S_SIZE_INIT ?
+					strlen(vb->s) : vb->s_size;
+			int (*cmp_fn)(const void *, const void *, size_t);
+			cmp_fn = s->sort_flags & REF_SORTING_ICASE
+				? memcasecmp : memcmp;
+
+			cmp = cmp_fn(va->s, vb->s, b_size > a_size ?
+				     a_size : b_size);
+			if (!cmp) {
+				if (a_size > b_size)
+					cmp = 1;
+				else if (a_size < b_size)
+					cmp = -1;
+			}
+		}
 	} else {
 		if (va->value < vb->value)
 			cmp = -1;
@@ -2492,6 +2576,7 @@ int format_ref_array_item(struct ref_array_item *info,
 	}
 	if (format->need_color_reset_at_eol) {
 		struct atom_value resetv;
+		resetv.s_size = ATOM_VALUE_S_SIZE_INIT;
 		resetv.s = GIT_COLOR_RESET;
 		if (append_atom(&resetv, &state, error_buf)) {
 			pop_stack_element(&state.stack);
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 9e0214076b4..18554f62d94 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -130,6 +130,8 @@ test_atom head parent:short=10 ''
 test_atom head numparent 0
 test_atom head object ''
 test_atom head type ''
+test_atom head raw "$(git cat-file commit refs/heads/main)
+"
 test_atom head '*objectname' ''
 test_atom head '*objecttype' ''
 test_atom head author 'A U Thor <author@example.com> 1151968724 +0200'
@@ -221,6 +223,15 @@ test_atom tag contents 'Tagging at 1151968727
 '
 test_atom tag HEAD ' '
 
+test_expect_success 'basic atom: refs/tags/testtag *raw' '
+	git cat-file commit refs/tags/testtag^{} >expected &&
+	git for-each-ref --format="%(*raw)" refs/tags/testtag >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_expect_success 'Check invalid atoms names are errors' '
 	test_must_fail git for-each-ref --format="%(INVALID)" refs/heads
 '
@@ -686,6 +697,15 @@ test_atom refs/tags/signed-empty contents:body ''
 test_atom refs/tags/signed-empty contents:signature "$sig"
 test_atom refs/tags/signed-empty contents "$sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-empty raw' '
+	git cat-file tag refs/tags/signed-empty >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-empty >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_atom refs/tags/signed-short subject 'subject line'
 test_atom refs/tags/signed-short subject:sanitize 'subject-line'
 test_atom refs/tags/signed-short contents:subject 'subject line'
@@ -695,6 +715,15 @@ test_atom refs/tags/signed-short contents:signature "$sig"
 test_atom refs/tags/signed-short contents "subject line
 $sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-short raw' '
+	git cat-file tag refs/tags/signed-short >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-short >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_atom refs/tags/signed-long subject 'subject line'
 test_atom refs/tags/signed-long subject:sanitize 'subject-line'
 test_atom refs/tags/signed-long contents:subject 'subject line'
@@ -708,6 +737,15 @@ test_atom refs/tags/signed-long contents "subject line
 body contents
 $sig"
 
+test_expect_success GPG 'basic atom: refs/tags/signed-long raw' '
+	git cat-file tag refs/tags/signed-long >expected &&
+	git for-each-ref --format="%(raw)" refs/tags/signed-long >actual &&
+	sanitize_pgp <expected >expected.clean &&
+	echo >>expected.clean &&
+	sanitize_pgp <actual >actual.clean &&
+	test_cmp expected.clean actual.clean
+'
+
 test_expect_success 'set up refs pointing to tree and blob' '
 	git update-ref refs/mytrees/first refs/heads/main^{tree} &&
 	git update-ref refs/myblobs/first refs/heads/main:one
@@ -720,6 +758,16 @@ test_atom refs/mytrees/first contents:body ""
 test_atom refs/mytrees/first contents:signature ""
 test_atom refs/mytrees/first contents ""
 
+test_expect_success 'basic atom: refs/mytrees/first raw' '
+	git cat-file tree refs/mytrees/first >expected &&
+	echo >>expected &&
+	git for-each-ref --format="%(raw)" refs/mytrees/first >actual &&
+	test_cmp expected actual &&
+	git cat-file -s refs/mytrees/first >expected &&
+	git for-each-ref --format="%(raw:size)" refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
 test_atom refs/myblobs/first subject ""
 test_atom refs/myblobs/first contents:subject ""
 test_atom refs/myblobs/first body ""
@@ -727,6 +775,174 @@ test_atom refs/myblobs/first contents:body ""
 test_atom refs/myblobs/first contents:signature ""
 test_atom refs/myblobs/first contents ""
 
+test_expect_success 'basic atom: refs/myblobs/first raw' '
+	git cat-file blob refs/myblobs/first >expected &&
+	echo >>expected &&
+	git for-each-ref --format="%(raw)" refs/myblobs/first >actual &&
+	test_cmp expected actual &&
+	git cat-file -s refs/myblobs/first >expected &&
+	git for-each-ref --format="%(raw:size)" refs/myblobs/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'set up refs pointing to binary blob' '
+	printf "a\0b\0c" >blob1 &&
+	printf "a\0c\0b" >blob2 &&
+	printf "\0a\0b\0c" >blob3 &&
+	printf "abc" >blob4 &&
+	printf "\0 \0 \0 " >blob5 &&
+	printf "\0 \0a\0 " >blob6 &&
+	printf "  " >blob7 &&
+	>blob8 &&
+	obj=$(git hash-object -w blob1) &&
+	git update-ref refs/myblobs/blob1 "$obj" &&
+	obj=$(git hash-object -w blob2) &&
+	git update-ref refs/myblobs/blob2 "$obj" &&
+	obj=$(git hash-object -w blob3) &&
+	git update-ref refs/myblobs/blob3 "$obj" &&
+	obj=$(git hash-object -w blob4) &&
+	git update-ref refs/myblobs/blob4 "$obj" &&
+	obj=$(git hash-object -w blob5) &&
+	git update-ref refs/myblobs/blob5 "$obj" &&
+	obj=$(git hash-object -w blob6) &&
+	git update-ref refs/myblobs/blob6 "$obj" &&
+	obj=$(git hash-object -w blob7) &&
+	git update-ref refs/myblobs/blob7 "$obj" &&
+	obj=$(git hash-object -w blob8) &&
+	git update-ref refs/myblobs/blob8 "$obj"
+'
+
+test_expect_success 'Verify sorts with raw' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob8
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/myblobs/blob3
+	refs/myblobs/blob7
+	refs/mytrees/first
+	refs/myblobs/first
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob4
+	refs/heads/main
+	EOF
+	git for-each-ref --format="%(refname)" --sort=raw \
+		refs/heads/main refs/myblobs/ refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'Verify sorts with raw:size' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob8
+	refs/myblobs/first
+	refs/myblobs/blob7
+	refs/heads/main
+	refs/myblobs/blob4
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob3
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/mytrees/first
+	EOF
+	git for-each-ref --format="%(refname)" --sort=raw:size \
+		refs/heads/main refs/myblobs/ refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'validate raw atom with %(if:equals)' '
+	cat >expected <<-EOF &&
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	refs/myblobs/blob4
+	not equals
+	not equals
+	not equals
+	not equals
+	not equals
+	EOF
+	git for-each-ref --format="%(if:equals=abc)%(raw)%(then)%(refname)%(else)not equals%(end)" \
+		refs/myblobs/ refs/heads/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'validate raw atom with %(if:notequals)' '
+	cat >expected <<-EOF &&
+	refs/heads/ambiguous
+	refs/heads/main
+	refs/heads/newtag
+	refs/myblobs/blob1
+	refs/myblobs/blob2
+	refs/myblobs/blob3
+	equals
+	refs/myblobs/blob5
+	refs/myblobs/blob6
+	refs/myblobs/blob7
+	refs/myblobs/blob8
+	refs/myblobs/first
+	EOF
+	git for-each-ref --format="%(if:notequals=abc)%(raw)%(then)%(refname)%(else)equals%(end)" \
+		refs/myblobs/ refs/heads/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'empty raw refs with %(if)' '
+	cat >expected <<-EOF &&
+	refs/myblobs/blob1 not empty
+	refs/myblobs/blob2 not empty
+	refs/myblobs/blob3 not empty
+	refs/myblobs/blob4 not empty
+	refs/myblobs/blob5 not empty
+	refs/myblobs/blob6 not empty
+	refs/myblobs/blob7 empty
+	refs/myblobs/blob8 empty
+	refs/myblobs/first not empty
+	EOF
+	git for-each-ref --format="%(refname) %(if)%(raw)%(then)not empty%(else)empty%(end)" \
+		refs/myblobs/ >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success '%(raw) with --python must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --python
+'
+
+test_expect_success '%(raw) with --tcl must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --tcl
+'
+
+test_expect_success '%(raw) with --perl must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --perl
+'
+
+test_expect_success '%(raw) with --shell must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --shell
+'
+
+test_expect_success '%(raw) with --shell and --sort=raw must fail' '
+	test_must_fail git for-each-ref --format="%(raw)" --sort=raw --shell
+'
+
+test_expect_success '%(raw:size) with --shell' '
+	git for-each-ref --format="%(raw:size)" | while read line
+	do
+		echo "'\''$line'\''" >>expect
+	done &&
+	git for-each-ref --format="%(raw:size)" --shell >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'for-each-ref --format compare with cat-file --batch' '
+	git rev-parse refs/mytrees/first | git cat-file --batch >expected &&
+	git for-each-ref --format="%(objectname) %(objecttype) %(objectsize)
+%(raw)" refs/mytrees/first >actual &&
+	test_cmp expected actual
+'
+
 test_expect_success 'set up multiple-sort tags' '
 	for when in 100000 200000
 	do
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v6 03/15] [GSOC] ref-filter: --format=%(raw) re-support --perl
  2021-06-27 12:35         ` [PATCH v6 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
  2021-06-27 12:35           ` [PATCH v6 01/15] [GSOC] ref-filter: add obj-type check in grab contents ZheNing Hu via GitGitGadget
  2021-06-27 12:35           ` [PATCH v6 02/15] [GSOC] ref-filter: add %(raw) atom ZheNing Hu via GitGitGadget
@ 2021-06-27 12:35           ` ZheNing Hu via GitGitGadget
  2021-06-27 12:35           ` [PATCH v6 04/15] [GSOC] ref-filter: use non-const ref_format in *_atom_parser() ZheNing Hu via GitGitGadget
                             ` (12 subsequent siblings)
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-27 12:35 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Because the perl language can handle binary data correctly,
add the function perl_quote_buf_with_len(), which can specify
the length of the data and prevent the data from being truncated
at '\0' to help `--format="%(raw)"` re-support `--perl`.

Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-for-each-ref.txt |  2 +-
 quote.c                            | 17 +++++++++++++++++
 quote.h                            |  1 +
 ref-filter.c                       | 15 +++++++++++----
 t/t6300-for-each-ref.sh            | 19 +++++++++++++++++--
 5 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index 3727a5ffee7..6f970088f46 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -241,7 +241,7 @@ raw:size::
 	The raw data size of the object.
 
 Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
-`--perl` because the such language may not support arbitrary binary data in their
+because the such language may not support arbitrary binary data in their
 string variable type.
 
 The message in a commit or a tag object is `contents`, from which
diff --git a/quote.c b/quote.c
index 8a3a5e39eb1..26719d21d1e 100644
--- a/quote.c
+++ b/quote.c
@@ -471,6 +471,23 @@ void perl_quote_buf(struct strbuf *sb, const char *src)
 	strbuf_addch(sb, sq);
 }
 
+void perl_quote_buf_with_len(struct strbuf *sb, const char *src, size_t len)
+{
+	const char sq = '\'';
+	const char bq = '\\';
+	const char *c = src;
+	const char *end = src + len;
+
+	strbuf_addch(sb, sq);
+	while (c != end) {
+		if (*c == sq || *c == bq)
+			strbuf_addch(sb, bq);
+		strbuf_addch(sb, *c);
+		c++;
+	}
+	strbuf_addch(sb, sq);
+}
+
 void python_quote_buf(struct strbuf *sb, const char *src)
 {
 	const char sq = '\'';
diff --git a/quote.h b/quote.h
index 768cc6338e2..0fe69e264b0 100644
--- a/quote.h
+++ b/quote.h
@@ -94,6 +94,7 @@ char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigne
 
 /* quoting as a string literal for other languages */
 void perl_quote_buf(struct strbuf *sb, const char *src);
+void perl_quote_buf_with_len(struct strbuf *sb, const char *src, size_t len);
 void python_quote_buf(struct strbuf *sb, const char *src);
 void tcl_quote_buf(struct strbuf *sb, const char *src);
 void basic_regex_quote_buf(struct strbuf *sb, const char *src);
diff --git a/ref-filter.c b/ref-filter.c
index 7822be90307..797b20ffa61 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -742,7 +742,10 @@ static void quote_formatting(struct strbuf *s, const char *str, size_t len, int
 		sq_quote_buf(s, str);
 		break;
 	case QUOTE_PERL:
-		perl_quote_buf(s, str);
+		if (len != ATOM_VALUE_S_SIZE_INIT)
+			perl_quote_buf_with_len(s, str, len);
+		else
+			perl_quote_buf(s, str);
 		break;
 	case QUOTE_PYTHON:
 		python_quote_buf(s, str);
@@ -1006,10 +1009,14 @@ int verify_ref_format(struct ref_format *format)
 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
 		if (at < 0)
 			die("%s", err.buf);
-		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
-		    used_atom[at].u.raw_data.option == RAW_BARE)
+
+		if ((format->quote_style == QUOTE_PYTHON ||
+		     format->quote_style == QUOTE_SHELL ||
+		     format->quote_style == QUOTE_TCL) &&
+		     used_atom[at].atom_type == ATOM_RAW &&
+		     used_atom[at].u.raw_data.option == RAW_BARE)
 			die(_("--format=%.*s cannot be used with"
-			      "--python, --shell, --tcl, --perl"), (int)(ep - sp - 2), sp + 2);
+			      "--python, --shell, --tcl"), (int)(ep - sp - 2), sp + 2);
 		cp = ep + 1;
 
 		if (skip_prefix(used_atom[at].name, "color:", &color))
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 18554f62d94..0b66e743c58 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -915,8 +915,23 @@ test_expect_success '%(raw) with --tcl must fail' '
 	test_must_fail git for-each-ref --format="%(raw)" --tcl
 '
 
-test_expect_success '%(raw) with --perl must fail' '
-	test_must_fail git for-each-ref --format="%(raw)" --perl
+test_expect_success '%(raw) with --perl' '
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/blob1 --perl | perl > actual &&
+	cmp blob1 actual &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/blob3 --perl | perl > actual &&
+	cmp blob3 actual &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/blob8 --perl | perl > actual &&
+	cmp blob8 actual &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/first --perl | perl > actual &&
+	cmp one actual &&
+	git cat-file tree refs/mytrees/first > expected &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/mytrees/first --perl | perl > actual &&
+	cmp expected actual
 '
 
 test_expect_success '%(raw) with --shell must fail' '
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v6 04/15] [GSOC] ref-filter: use non-const ref_format in *_atom_parser()
  2021-06-27 12:35         ` [PATCH v6 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                             ` (2 preceding siblings ...)
  2021-06-27 12:35           ` [PATCH v6 03/15] [GSOC] ref-filter: --format=%(raw) re-support --perl ZheNing Hu via GitGitGadget
@ 2021-06-27 12:35           ` ZheNing Hu via GitGitGadget
  2021-06-27 12:35           ` [PATCH v6 05/15] [GSOC] ref-filter: add %(rest) atom ZheNing Hu via GitGitGadget
                             ` (11 subsequent siblings)
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-27 12:35 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

Use non-const ref_format in *_atom_parser(), which can help us
modify the members of ref_format in *_atom_parser().

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/tag.c |  2 +-
 ref-filter.c  | 44 ++++++++++++++++++++++----------------------
 ref-filter.h  |  4 ++--
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/builtin/tag.c b/builtin/tag.c
index 82fcfc09824..452558ec957 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -146,7 +146,7 @@ static int verify_tag(const char *name, const char *ref,
 		      const struct object_id *oid, void *cb_data)
 {
 	int flags;
-	const struct ref_format *format = cb_data;
+	struct ref_format *format = cb_data;
 	flags = GPG_VERIFY_VERBOSE;
 
 	if (format->format)
diff --git a/ref-filter.c b/ref-filter.c
index 797b20ffa61..d01a0266fb8 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -226,7 +226,7 @@ static int strbuf_addf_ret(struct strbuf *sb, int ret, const char *fmt, ...)
 	return ret;
 }
 
-static int color_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int color_atom_parser(struct ref_format *format, struct used_atom *atom,
 			     const char *color_value, struct strbuf *err)
 {
 	if (!color_value)
@@ -264,7 +264,7 @@ static int refname_atom_parser_internal(struct refname_atom *atom, const char *a
 	return 0;
 }
 
-static int remote_ref_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int remote_ref_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	struct string_list params = STRING_LIST_INIT_DUP;
@@ -311,7 +311,7 @@ static int remote_ref_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int objecttype_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int objecttype_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -323,7 +323,7 @@ static int objecttype_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int objectsize_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int objectsize_atom_parser(struct ref_format *format, struct used_atom *atom,
 				  const char *arg, struct strbuf *err)
 {
 	if (!arg) {
@@ -343,7 +343,7 @@ static int objectsize_atom_parser(const struct ref_format *format, struct used_a
 	return 0;
 }
 
-static int deltabase_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int deltabase_atom_parser(struct ref_format *format, struct used_atom *atom,
 				 const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -355,7 +355,7 @@ static int deltabase_atom_parser(const struct ref_format *format, struct used_at
 	return 0;
 }
 
-static int body_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int body_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *err)
 {
 	if (arg)
@@ -364,7 +364,7 @@ static int body_atom_parser(const struct ref_format *format, struct used_atom *a
 	return 0;
 }
 
-static int subject_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int subject_atom_parser(struct ref_format *format, struct used_atom *atom,
 			       const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -376,7 +376,7 @@ static int subject_atom_parser(const struct ref_format *format, struct used_atom
 	return 0;
 }
 
-static int trailers_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int trailers_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	atom->u.contents.trailer_opts.no_divider = 1;
@@ -402,7 +402,7 @@ static int trailers_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
-static int contents_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int contents_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -430,7 +430,7 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
 	return 0;
 }
 
-static int raw_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int raw_atom_parser(struct ref_format *format, struct used_atom *atom,
 				const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -442,7 +442,7 @@ static int raw_atom_parser(const struct ref_format *format, struct used_atom *at
 	return 0;
 }
 
-static int oid_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int oid_atom_parser(struct ref_format *format, struct used_atom *atom,
 			   const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -461,7 +461,7 @@ static int oid_atom_parser(const struct ref_format *format, struct used_atom *at
 	return 0;
 }
 
-static int person_email_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int person_email_atom_parser(struct ref_format *format, struct used_atom *atom,
 				    const char *arg, struct strbuf *err)
 {
 	if (!arg)
@@ -475,7 +475,7 @@ static int person_email_atom_parser(const struct ref_format *format, struct used
 	return 0;
 }
 
-static int refname_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int refname_atom_parser(struct ref_format *format, struct used_atom *atom,
 			       const char *arg, struct strbuf *err)
 {
 	return refname_atom_parser_internal(&atom->u.refname, arg, atom->name, err);
@@ -492,7 +492,7 @@ static align_type parse_align_position(const char *s)
 	return -1;
 }
 
-static int align_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int align_atom_parser(struct ref_format *format, struct used_atom *atom,
 			     const char *arg, struct strbuf *err)
 {
 	struct align *align = &atom->u.align;
@@ -544,7 +544,7 @@ static int align_atom_parser(const struct ref_format *format, struct used_atom *
 	return 0;
 }
 
-static int if_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int if_atom_parser(struct ref_format *format, struct used_atom *atom,
 			  const char *arg, struct strbuf *err)
 {
 	if (!arg) {
@@ -559,7 +559,7 @@ static int if_atom_parser(const struct ref_format *format, struct used_atom *ato
 	return 0;
 }
 
-static int head_atom_parser(const struct ref_format *format, struct used_atom *atom,
+static int head_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *unused_err)
 {
 	atom->u.head = resolve_refdup("HEAD", RESOLVE_REF_READING, NULL, NULL);
@@ -570,7 +570,7 @@ static struct {
 	const char *name;
 	info_source source;
 	cmp_type cmp_type;
-	int (*parser)(const struct ref_format *format, struct used_atom *atom,
+	int (*parser)(struct ref_format *format, struct used_atom *atom,
 		      const char *arg, struct strbuf *err);
 } valid_atom[] = {
 	[ATOM_REFNAME] = { "refname", SOURCE_NONE, FIELD_STR, refname_atom_parser },
@@ -649,7 +649,7 @@ struct atom_value {
 /*
  * Used to parse format string and sort specifiers
  */
-static int parse_ref_filter_atom(const struct ref_format *format,
+static int parse_ref_filter_atom(struct ref_format *format,
 				 const char *atom, const char *ep,
 				 struct strbuf *err)
 {
@@ -2553,9 +2553,9 @@ static void append_literal(const char *cp, const char *ep, struct ref_formatting
 }
 
 int format_ref_array_item(struct ref_array_item *info,
-			   const struct ref_format *format,
-			   struct strbuf *final_buf,
-			   struct strbuf *error_buf)
+			  struct ref_format *format,
+			  struct strbuf *final_buf,
+			  struct strbuf *error_buf)
 {
 	const char *cp, *sp, *ep;
 	struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
@@ -2600,7 +2600,7 @@ int format_ref_array_item(struct ref_array_item *info,
 }
 
 void pretty_print_ref(const char *name, const struct object_id *oid,
-		      const struct ref_format *format)
+		      struct ref_format *format)
 {
 	struct ref_array_item *ref_item;
 	struct strbuf output = STRBUF_INIT;
diff --git a/ref-filter.h b/ref-filter.h
index baf72a71896..74fb423fc89 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -116,7 +116,7 @@ void ref_array_sort(struct ref_sorting *sort, struct ref_array *array);
 void ref_sorting_set_sort_flags_all(struct ref_sorting *sorting, unsigned int mask, int on);
 /*  Based on the given format and quote_style, fill the strbuf */
 int format_ref_array_item(struct ref_array_item *info,
-			  const struct ref_format *format,
+			  struct ref_format *format,
 			  struct strbuf *final_buf,
 			  struct strbuf *error_buf);
 /*  Parse a single sort specifier and add it to the list */
@@ -137,7 +137,7 @@ void setup_ref_filter_porcelain_msg(void);
  * name must be a fully qualified refname.
  */
 void pretty_print_ref(const char *name, const struct object_id *oid,
-		      const struct ref_format *format);
+		      struct ref_format *format);
 
 /*
  * Push a single ref onto the array; this can be used to construct your own
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 121+ messages in thread

* [PATCH v6 05/15] [GSOC] ref-filter: add %(rest) atom
  2021-06-27 12:35         ` [PATCH v6 00/15] [GSOC][RFC] cat-file: reuse ref-filter logic ZheNing Hu via GitGitGadget
                             ` (3 preceding siblings ...)
  2021-06-27 12:35           ` [PATCH v6 04/15] [GSOC] ref-filter: use non-const ref_format in *_atom_parser() ZheNing Hu via GitGitGadget
@ 2021-06-27 12:35           ` ZheNing Hu via GitGitGadget
  2021-06-27 12:35           ` [PATCH v6 06/15] [GSOC] ref-filter: pass get_object() return value to their callers ZheNing Hu via GitGitGadget
                             ` (10 subsequent siblings)
  15 siblings, 0 replies; 121+ messages in thread
From: ZheNing Hu via GitGitGadget @ 2021-06-27 12:35 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Hariom Verma, Bagas Sanjaya,
	Jeff King, Ævar Arnfjörð Bjarmason, ZheNing Hu,
	ZheNing Hu

From: ZheNing Hu <adlternative@gmail.com>

In order to let "cat-file --batch=%(rest)" use the ref-filter
interface, add %(rest) atom for ref-filter. "git for-each-ref",
"git branch", "git tag" and "git verify-tag" will reject %(rest)
by default.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c             | 21 +++++++++++++++++++++
 ref-filter.h             |  5 ++++-
 t/t3203-branch-output.sh |  4 ++++
 t/t6300-for-each-ref.sh  |  4 ++++
 t/t7004-tag.sh           |  4 ++++
 t/t7030-verify-tag.sh    |  4 ++++
 6 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/ref-filter.c b/ref-filter.c
index d01a0266fb8..10c78de9cfa 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -157,6 +157,7 @@ enum atom_type {
 	ATOM_IF,
 	ATOM_THEN,
 	ATOM_ELSE,
+	ATOM_REST,
 };
 
 /*
@@ -559,6 +560,15 @@ static int if_atom_parser(struct ref_format *format, struct used_atom *atom,
 	return 0;
 }
 
+static int rest_atom_parser(struct ref_format *format, struct used_atom *atom,
+			    const char *arg, struct strbuf *err)
+{
+	if (arg)
+		return strbuf_addf_ret(err, -1, _("%%(rest) does not take arguments"));
+	format->use_rest = 1;
+	return 0;
+}
+
 static int head_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *unused_err)
 {
@@ -615,6 +625,7 @@ static struct {
 	[ATOM_IF] = { "if", SOURCE_NONE, FIELD_STR, if_atom_parser },
 	[ATOM_THEN] = { "then", SOURCE_NONE },
 	[ATOM_ELSE] = { "else", SOURCE_NONE },
+	[ATOM_REST] = { "rest", SOURCE_NONE, FIELD_STR, rest_atom_parser },
 	/*
 	 * Please update $__git_ref_fieldlist in git-completion.bash
 	 * when you add new atoms
@@ -1010,6 +1021,9 @@ int verify_ref_format(struct ref_format *format)
 		if (at < 0)
 			die("%s", err.buf);
 
+		if (used_atom[at].atom_type == ATOM_REST)
+			die("this command reject atom %%(%.*s)", (int)(ep - sp - 2), sp + 2);
+
 		if ((format->quote_style == QUOTE_PYTHON ||
 		     format->quote_style == QUOTE_SHELL ||
 		     format->quote_style == QUOTE_TCL) &&
@@ -1927,6 +1941,12 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 			v->handler = else_atom_handler;
 			v->s = xstrdup("");
 			continue;
+		} else if (atom_type == ATOM_REST) {
+			if (ref->rest)
+				v->s = xstrdup(ref->rest);
+			else
+				v->s = xstrdup("");
+			continue;
 		} else
 			continue;
 
@@ -2144,6 +2164,7 @@ static struct ref_array_item *new_ref_array_item(const char *refname,
 
 	FLEX_ALLOC_STR(ref, refname, refname);
 	oidcpy(&ref->objectname, oid);
+	ref->rest = NULL;
 
 	return ref;
 }
diff --git a/ref-filter.h b/ref-filter.h
index 74fb423fc89..c15dee8d6b9 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -38,6 +38,7 @@ struct ref_sorting {
 
 struct ref_array_item {
 	struct object_id objectname;
+	const char *rest;
 	int flag;
 	unsigned int kind;
 	const char *symref;
@@ -76,14 +77,16 @@ struct ref_format {
 	 * verify_ref_format() afterwards to finalize.
 	 */
 	const char *format;
+	const char *rest;
 	int quote_style;
+	int use_rest;
 	int use_color;
 
 	/* Internal state to ref-filter */
 	int need_color_reset_at_eol;
 };
 
-#define REF_FORMAT_INIT { NULL, 0, -1 }
+#define REF_FORMAT_INIT { .use_color = -1 }
 
 /*  Macros for checking --merged and --no-merged options */
 #define _OPT_MERGED_NO_MERGED(option, filter, h) \
diff --git a/t/t3203-branch-output.sh b/t/t3203-branch-output.sh
index 5325b9f67a0..6e94c6db7b5 100755
--- a/t/t3203-branch-output.sh
+++ b/t/t3203-branch-output.sh
@@ -340,6 +340,10 @@ test_expect_success 'git branch --format option' '
 	test_cmp expect actual
 '
 
+test_expect_success 'git branch with --format=%(rest) must fail' '
+	test_must_fail git branch --format="%(rest)" >actual
+'
+
 test_expect_success 'worktree colors correct' '
 	cat >expect <<-EOF &&
 	* <GREEN>(HEAD detached from fromtag)<RESET>
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 0b66e743c58..6ca5c2cc19c 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -1211,6 +1211,10 @@ test_expect_success 'basic atom: head contents:trailers' '
 	test_cmp expect actual.clean
 '
 
+test_expect_success 'basic atom: rest must fail' '
+	test_must_fail git for-each-ref --format="%(rest)" refs/heads/main
+'
+
 test_expect_success 'trailer parsing not fooled by --- line' '
 	git commit --allow-empty -F - <<-\EOF &&
 	this is the subject
diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
index 2f72c5c6883..082be85dffc 100755
--- a/t/t7004-tag.sh
+++ b/t/t7004-tag.sh
@@ -1998,6 +1998,10 @@ test_expect_success '--format should list tags as per format given' '
 	test_cmp expect actual
 '
 
+test_expect_success 'git tag -l with --format="%(rest)" must fail' '
+	test_must_fail git tag -l --format="%(rest)" "v1*"
+'
+
 test_expect_succe