All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Make ident dynamic, not just a hardcoded value of "$Id".
@ 2021-08-23 16:41 Maksym Sobolyev via GitGitGadget
  2021-08-23 18:10 ` Junio C Hamano
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Maksym Sobolyev via GitGitGadget @ 2021-08-23 16:41 UTC (permalink / raw)
  To: git; +Cc: Maksym Sobolyev, Maksym Sobolyev

From: Maksym Sobolyev <sobomax@sippysoft.com>

This allows ident to be something like $FreeBSD$ so it provides matching
functionality for repos migrated from CVS / SVN.

This works by allowing ident to have a parameter, i.e.:

* ident=MyCustomId

In .gitattributes.

Signed-off-by: Maksym Sobolyev <sobomax@sippysoft.com>
---
    Make ident dynamic, not just a hardcoded value of "$Id".
    
    This allows ident to be something like $FreeBSD$ so it provides matching
    functionality for repos migrated from CVS / SVN.
    
    This works by allowing ident to have a parameter, i.e.:
    
    * ident=MyCustomId
    
    In .gitattributes.

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1074%2Fsobomax%2Fpr-custom_ident-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1074/sobomax/pr-custom_ident-v1
Pull-Request: https://github.com/git/git/pull/1074

 builtin/checkout--worker.c              |  17 +++-
 convert.c                               | 114 +++++++++++++++---------
 convert.h                               |   7 +-
 parallel-checkout.c                     |  11 ++-
 parallel-checkout.h                     |   8 +-
 t/t2082-parallel-checkout-attributes.sh |   7 +-
 6 files changed, 108 insertions(+), 56 deletions(-)

diff --git a/builtin/checkout--worker.c b/builtin/checkout--worker.c
index fb9fd13b73c..61ba39402ad 100644
--- a/builtin/checkout--worker.c
+++ b/builtin/checkout--worker.c
@@ -9,7 +9,7 @@ static void packet_to_pc_item(const char *buffer, int len,
 			      struct parallel_checkout_item *pc_item)
 {
 	const struct pc_item_fixed_portion *fixed_portion;
-	const char *variant;
+	const char *variant, *ident_action;
 	char *encoding;
 
 	if (len < sizeof(struct pc_item_fixed_portion))
@@ -19,7 +19,8 @@ static void packet_to_pc_item(const char *buffer, int len,
 	fixed_portion = (struct pc_item_fixed_portion *)buffer;
 
 	if (len - sizeof(struct pc_item_fixed_portion) !=
-		fixed_portion->name_len + fixed_portion->working_tree_encoding_len)
+		fixed_portion->name_len + fixed_portion->working_tree_encoding_len +
+		fixed_portion->ident_action_len)
 		BUG("checkout worker received corrupted item");
 
 	variant = buffer + sizeof(struct pc_item_fixed_portion);
@@ -43,11 +44,21 @@ static void packet_to_pc_item(const char *buffer, int len,
 	pc_item->ce->ce_namelen = fixed_portion->name_len;
 	pc_item->ce->ce_mode = fixed_portion->ce_mode;
 	memcpy(pc_item->ce->name, variant, pc_item->ce->ce_namelen);
+	variant += pc_item->ce->ce_namelen;
 	oidcpy(&pc_item->ce->oid, &fixed_portion->oid);
 
+	if (fixed_portion->ident_action_len) {
+		ident_action = xmemdupz(variant,
+					fixed_portion->ident_action_len);
+		variant += fixed_portion->ident_action_len;
+	} else {
+		ident_action = NULL;
+	}
+
 	pc_item->id = fixed_portion->id;
 	pc_item->ca.crlf_action = fixed_portion->crlf_action;
-	pc_item->ca.ident = fixed_portion->ident;
+	pc_item->ca.ident_action.id = ident_action;
+	pc_item->ca.ident_action.id_len = fixed_portion->ident_action_len;
 	pc_item->ca.working_tree_encoding = encoding;
 }
 
diff --git a/convert.c b/convert.c
index 0d6fb3410ae..1e8940bf9d7 100644
--- a/convert.c
+++ b/convert.c
@@ -1056,7 +1056,12 @@ static int read_convert_config(const char *var, const char *value, void *cb)
 	return 0;
 }
 
-static int count_ident(const char *cp, unsigned long size)
+#define ID_STR "Id"
+
+#define GIT_MAX_IDENT_LEN 255
+
+static int count_ident(const char *cp, unsigned long size,
+		       const struct ident_action *idact)
 {
 	/*
 	 * "$Id: 0000000000000000000000000000000000000000 $" <=> "$Id$"
@@ -1069,13 +1074,13 @@ static int count_ident(const char *cp, unsigned long size)
 		size--;
 		if (ch != '$')
 			continue;
-		if (size < 3)
+		if (size < idact->id_len + 1)
 			break;
-		if (memcmp("Id", cp, 2))
+		if (memcmp(idact->id, cp, idact->id_len))
 			continue;
-		ch = cp[2];
-		cp += 3;
-		size -= 3;
+		ch = cp[idact->id_len];
+		cp += idact->id_len + 1;
+		size -= idact->id_len + 1;
 		if (ch == '$')
 			cnt++; /* $Id$ */
 		if (ch != ':')
@@ -1099,11 +1104,11 @@ static int count_ident(const char *cp, unsigned long size)
 }
 
 static int ident_to_git(const char *src, size_t len,
-			struct strbuf *buf, int ident)
+			struct strbuf *buf, const struct ident_action *idact)
 {
 	char *dst, *dollar;
 
-	if (!ident || (src && !count_ident(src, len)))
+	if (!idact->id || (src && !count_ident(src, len, idact)))
 		return 0;
 
 	if (!buf)
@@ -1122,17 +1127,18 @@ static int ident_to_git(const char *src, size_t len,
 		len -= dollar + 1 - src;
 		src  = dollar + 1;
 
-		if (len > 3 && !memcmp(src, "Id:", 3)) {
-			dollar = memchr(src + 3, '$', len - 3);
+		if (len > idact->id_len + 1 && !memcmp(src, idact->id, idact->id_len) && src[idact->id_len + 1] == ':') {
+			dollar = memchr(src + idact->id_len + 1, '$', len - (idact->id_len + 1));
 			if (!dollar)
 				break;
-			if (memchr(src + 3, '\n', dollar - src - 3)) {
+			if (memchr(src + idact->id_len + 1, '\n', dollar - src - (idact->id_len + 1))) {
 				/* Line break before the next dollar. */
 				continue;
 			}
 
-			memcpy(dst, "Id$", 3);
-			dst += 3;
+			memcpy(dst, idact->id, idact->id_len);
+			dst[idact->id_len] = '$';
+			dst += idact->id_len + 1;
 			len -= dollar + 1 - src;
 			src  = dollar + 1;
 		}
@@ -1143,16 +1149,16 @@ static int ident_to_git(const char *src, size_t len,
 }
 
 static int ident_to_worktree(const char *src, size_t len,
-			     struct strbuf *buf, int ident)
+			     struct strbuf *buf, const struct ident_action *idact)
 {
 	struct object_id oid;
 	char *to_free = NULL, *dollar, *spc;
 	int cnt;
 
-	if (!ident)
+	if (!idact->id)
 		return 0;
 
-	cnt = count_ident(src, len);
+	cnt = count_ident(src, len, idact);
 	if (!cnt)
 		return 0;
 
@@ -1161,7 +1167,7 @@ static int ident_to_worktree(const char *src, size_t len,
 		to_free = strbuf_detach(buf, NULL);
 	hash_object_file(the_hash_algo, src, len, "blob", &oid);
 
-	strbuf_grow(buf, len + cnt * (the_hash_algo->hexsz + 3));
+	strbuf_grow(buf, len + cnt * (the_hash_algo->hexsz + idact->id_len + 1));
 	for (;;) {
 		/* step 1: run to the next '$' */
 		dollar = memchr(src, '$', len);
@@ -1172,14 +1178,14 @@ static int ident_to_worktree(const char *src, size_t len,
 		src  = dollar + 1;
 
 		/* step 2: does it looks like a bit like Id:xxx$ or Id$ ? */
-		if (len < 3 || memcmp("Id", src, 2))
+		if (len < idact->id_len + 1 || memcmp(idact->id, src, idact->id_len))
 			continue;
 
 		/* step 3: skip over Id$ or Id:xxxxx$ */
-		if (src[2] == '$') {
-			src += 3;
-			len -= 3;
-		} else if (src[2] == ':') {
+		if (src[idact->id_len] == '$') {
+			src += idact->id_len + 1;
+			len -= idact->id_len + 1;
+		} else if (src[idact->id_len] == ':') {
 			/*
 			 * It's possible that an expanded Id has crept its way into the
 			 * repository, we cope with that by stripping the expansion out.
@@ -1187,18 +1193,18 @@ static int ident_to_worktree(const char *src, size_t len,
 			 * on checkout, which won't go away by stash, but let's keep it
 			 * for git-style ids.
 			 */
-			dollar = memchr(src + 3, '$', len - 3);
+			dollar = memchr(src + idact->id_len + 1, '$', len - (idact->id_len + 1));
 			if (!dollar) {
 				/* incomplete keyword, no more '$', so just quit the loop */
 				break;
 			}
 
-			if (memchr(src + 3, '\n', dollar - src - 3)) {
+			if (memchr(src + idact->id_len + 1, '\n', dollar - src - (idact->id_len + 1))) {
 				/* Line break before the next dollar. */
 				continue;
 			}
 
-			spc = memchr(src + 4, ' ', dollar - src - 4);
+			spc = memchr(src + idact->id_len + 2, ' ', dollar - src - (idact->id_len + 2));
 			if (spc && spc < dollar-1) {
 				/* There are spaces in unexpected places.
 				 * This is probably an id from some other
@@ -1215,7 +1221,8 @@ static int ident_to_worktree(const char *src, size_t len,
 		}
 
 		/* step 4: substitute */
-		strbuf_addstr(buf, "Id: ");
+		strbuf_addstr(buf, idact->id);
+		strbuf_addstr(buf, ": ");
 		strbuf_addstr(buf, oid_to_hex(&oid));
 		strbuf_addstr(buf, " $");
 	}
@@ -1286,11 +1293,21 @@ static struct convert_driver *git_path_check_convert(struct attr_check_item *che
 	return NULL;
 }
 
-static int git_path_check_ident(struct attr_check_item *check)
+static struct ident_action git_path_check_ident(struct attr_check_item *check)
 {
+	struct ident_action idact = {.id = NULL, .id_len = 0};
 	const char *value = check->value;
 
-	return !!ATTR_TRUE(value);
+	if (!ATTR_UNSET(value) && !ATTR_FALSE(value)) {
+		if (ATTR_TRUE(value))
+			idact.id = ID_STR;
+		else
+			idact.id = value;
+		idact.id_len = strlen(idact.id);
+		if (idact.id_len > GIT_MAX_IDENT_LEN)
+			die(_("ident value length exceeds GIT_MAX_IDENT_LEN"));
+	}
+	return idact;
 }
 
 static struct attr_check *check;
@@ -1313,7 +1330,7 @@ void convert_attrs(struct index_state *istate,
 	ca->crlf_action = git_path_check_crlf(ccheck + 4);
 	if (ca->crlf_action == CRLF_UNDEFINED)
 		ca->crlf_action = git_path_check_crlf(ccheck + 0);
-	ca->ident = git_path_check_ident(ccheck + 1);
+	ca->ident_action = git_path_check_ident(ccheck + 1);
 	ca->drv = git_path_check_convert(ccheck + 2);
 	if (ca->crlf_action != CRLF_BINARY) {
 		enum eol eol_attr = git_path_check_eol(ccheck + 3);
@@ -1433,7 +1450,7 @@ int convert_to_git(struct index_state *istate,
 			len = dst->len;
 		}
 	}
-	return ret | ident_to_git(src, len, dst, ca.ident);
+	return ret | ident_to_git(src, len, dst, &ca.ident_action);
 }
 
 void convert_to_git_filter_fd(struct index_state *istate,
@@ -1450,7 +1467,7 @@ void convert_to_git_filter_fd(struct index_state *istate,
 
 	encode_to_git(path, dst->buf, dst->len, dst, ca.working_tree_encoding, conv_flags);
 	crlf_to_git(istate, path, dst->buf, dst->len, dst, ca.crlf_action, conv_flags);
-	ident_to_git(dst->buf, dst->len, dst, ca.ident);
+	ident_to_git(dst->buf, dst->len, dst, &ca.ident_action);
 }
 
 static int convert_to_working_tree_ca_internal(const struct conv_attrs *ca,
@@ -1462,7 +1479,7 @@ static int convert_to_working_tree_ca_internal(const struct conv_attrs *ca,
 {
 	int ret = 0, ret_filter = 0;
 
-	ret |= ident_to_worktree(src, len, dst, ca->ident);
+	ret |= ident_to_worktree(src, len, dst, &(ca->ident_action));
 	if (ret) {
 		src = dst->buf;
 		len = dst->len;
@@ -1810,7 +1827,8 @@ struct ident_filter {
 	struct stream_filter filter;
 	struct strbuf left;
 	int state;
-	char ident[GIT_MAX_HEXSZ + 5]; /* ": x40 $" */
+	const struct ident_action *idact;
+	char ident[GIT_MAX_HEXSZ + GIT_MAX_IDENT_LEN + 3]; /* ": x40 $" */
 };
 
 static int is_foreign_ident(const char *str)
@@ -1847,13 +1865,16 @@ static int ident_filter_fn(struct stream_filter *filter,
 			   char *output, size_t *osize_p)
 {
 	struct ident_filter *ident = (struct ident_filter *)filter;
-	static const char head[] = "$Id";
+	const struct ident_action *idact = ident->idact;
 
 	if (!input) {
 		/* drain upon eof */
 		switch (ident->state) {
 		default:
-			strbuf_add(&ident->left, head, ident->state);
+			if (ident->state > 0)
+				strbuf_addch(&ident->left, '$');
+			if (ident->state > 1)
+				strbuf_add(&ident->left, idact->id, ident->state - 1);
 			/* fallthrough */
 		case IDENT_SKIPPING:
 			/* fallthrough */
@@ -1885,22 +1906,26 @@ static int ident_filter_fn(struct stream_filter *filter,
 			if (ch != '\n' && ch != '$')
 				continue;
 			if (ch == '$' && !is_foreign_ident(ident->left.buf)) {
-				strbuf_setlen(&ident->left, sizeof(head) - 1);
+				strbuf_setlen(&ident->left, idact->id_len + 1);
 				strbuf_addstr(&ident->left, ident->ident);
 			}
 			ident->state = IDENT_DRAINING;
 			continue;
 		}
 
-		if (ident->state < sizeof(head) &&
-		    head[ident->state] == ch) {
+		if ((ident->state == 0 && ch == '$') ||
+		    (ident->state > 0 && ident->state < idact->id_len + 1 &&
+		     idact->id[ident->state - 1] == ch)) {
 			ident->state++;
 			continue;
 		}
 
-		if (ident->state)
-			strbuf_add(&ident->left, head, ident->state);
-		if (ident->state == sizeof(head) - 1) {
+		if (ident->state) {
+			strbuf_addch(&ident->left, '$');
+			if (ident->state > 1)
+				strbuf_add(&ident->left, idact->id, ident->state - 1);
+		}
+		if (ident->state == idact->id_len + 1) {
 			if (ch != ':' && ch != '$') {
 				strbuf_addch(&ident->left, ch);
 				ident->state = 0;
@@ -1935,7 +1960,7 @@ static struct stream_filter_vtbl ident_vtbl = {
 	ident_free_fn,
 };
 
-static struct stream_filter *ident_filter(const struct object_id *oid)
+static struct stream_filter *ident_filter(const struct object_id *oid, const struct ident_action *idact)
 {
 	struct ident_filter *ident = xmalloc(sizeof(*ident));
 
@@ -1944,6 +1969,7 @@ static struct stream_filter *ident_filter(const struct object_id *oid)
 	strbuf_init(&ident->left, 0);
 	ident->filter.vtbl = &ident_vtbl;
 	ident->state = 0;
+	ident->idact = idact;
 	return (struct stream_filter *)ident;
 }
 
@@ -1963,8 +1989,8 @@ struct stream_filter *get_stream_filter_ca(const struct conv_attrs *ca,
 	if (classify_conv_attrs(ca) != CA_CLASS_STREAMABLE)
 		return NULL;
 
-	if (ca->ident)
-		filter = ident_filter(oid);
+	if (ca->ident_action.id)
+		filter = ident_filter(oid, &(ca->ident_action));
 
 	if (output_eol(ca->crlf_action) == EOL_CRLF)
 		filter = cascade_filter(filter, lf_to_crlf_filter());
diff --git a/convert.h b/convert.h
index 5ee1c322058..a02632e8104 100644
--- a/convert.h
+++ b/convert.h
@@ -76,11 +76,16 @@ enum convert_crlf_action {
 
 struct convert_driver;
 
+struct ident_action {
+	const char *id;
+	int id_len;
+};
+
 struct conv_attrs {
 	struct convert_driver *drv;
 	enum convert_crlf_action attr_action; /* What attr says */
 	enum convert_crlf_action crlf_action; /* When no attr is set, use core.autocrlf */
-	int ident;
+	struct ident_action ident_action; /* What ident says */
 	const char *working_tree_encoding; /* Supported encoding or default encoding if NULL */
 };
 
diff --git a/parallel-checkout.c b/parallel-checkout.c
index ddc0ff3c064..f9a3f2ff25b 100644
--- a/parallel-checkout.c
+++ b/parallel-checkout.c
@@ -403,13 +403,15 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
 	size_t name_len = pc_item->ce->ce_namelen;
 	size_t working_tree_encoding_len = working_tree_encoding ?
 					   strlen(working_tree_encoding) : 0;
+	const char *ident_action_id = pc_item->ca.ident_action.id;
+	size_t ident_action_len = pc_item->ca.ident_action.id_len;
 
 	/*
 	 * Any changes in the calculation of the message size must also be made
 	 * in is_eligible_for_parallel_checkout().
 	 */
 	len_data = sizeof(struct pc_item_fixed_portion) + name_len +
-		   working_tree_encoding_len;
+		   working_tree_encoding_len + ident_action_len;
 
 	data = xmalloc(len_data);
 
@@ -417,7 +419,7 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
 	fixed_portion->id = pc_item->id;
 	fixed_portion->ce_mode = pc_item->ce->ce_mode;
 	fixed_portion->crlf_action = pc_item->ca.crlf_action;
-	fixed_portion->ident = pc_item->ca.ident;
+	fixed_portion->ident_action_len = ident_action_len;
 	fixed_portion->name_len = name_len;
 	fixed_portion->working_tree_encoding_len = working_tree_encoding_len;
 	/*
@@ -434,6 +436,11 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
 		variant += working_tree_encoding_len;
 	}
 	memcpy(variant, pc_item->ce->name, name_len);
+	variant += name_len;
+	if (ident_action_len) {
+		memcpy(variant, ident_action_id, ident_action_len);
+		variant += ident_action_len;
+	}
 
 	packet_write(fd, data, len_data);
 
diff --git a/parallel-checkout.h b/parallel-checkout.h
index 80f539bcb77..c3c282f516b 100644
--- a/parallel-checkout.h
+++ b/parallel-checkout.h
@@ -76,9 +76,9 @@ struct parallel_checkout_item {
 
 /*
  * The fixed-size portion of `struct parallel_checkout_item` that is sent to the
- * workers. Following this will be 2 strings: ca.working_tree_encoding and
- * ce.name; These are NOT null terminated, since we have the size in the fixed
- * portion.
+ * workers. Following this will be 3 strings: ca.working_tree_encoding, ca.name
+ * and ca.ident_action.id; These are NOT null terminated, since we have the size
+ * in the fixed portion.
  *
  * Note that not all fields of conv_attrs and cache_entry are passed, only the
  * ones that will be required by the workers to smudge and write the entry.
@@ -88,7 +88,7 @@ struct pc_item_fixed_portion {
 	struct object_id oid;
 	unsigned int ce_mode;
 	enum convert_crlf_action crlf_action;
-	int ident;
+	size_t ident_action_len;
 	size_t working_tree_encoding_len;
 	size_t name_len;
 };
diff --git a/t/t2082-parallel-checkout-attributes.sh b/t/t2082-parallel-checkout-attributes.sh
index 25254579618..822957a8dc8 100755
--- a/t/t2082-parallel-checkout-attributes.sh
+++ b/t/t2082-parallel-checkout-attributes.sh
@@ -20,16 +20,19 @@ test_expect_success 'parallel-checkout with ident' '
 	(
 		cd ident &&
 		echo "A ident" >.gitattributes &&
+		echo "C ident=MyCusomVeryLongAndWordyId" >>.gitattributes &&
 		echo "\$Id\$" >A &&
 		echo "\$Id\$" >B &&
+		echo "\$MyCusomVeryLongAndWordyId\$" >C &&
 		git add -A &&
 		git commit -m id &&
 
-		rm A B &&
+		rm A B C &&
 		test_checkout_workers 2 git reset --hard &&
 		hexsz=$(test_oid hexsz) &&
 		grep -E "\\\$Id: [0-9a-f]{$hexsz} \\\$" A &&
-		grep "\\\$Id\\\$" B
+		grep "\\\$Id\\\$" B &&
+		grep -E "\\\$MyCusomVeryLongAndWordyId: [0-9a-f]{$hexsz} \\\$" C
 	)
 '
 

base-commit: 225bc32a989d7a22fa6addafd4ce7dcd04675dbf
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] Make ident dynamic, not just a hardcoded value of "$Id".
  2021-08-23 16:41 [PATCH] Make ident dynamic, not just a hardcoded value of "$Id" Maksym Sobolyev via GitGitGadget
@ 2021-08-23 18:10 ` Junio C Hamano
  2021-08-23 18:41 ` Philip Oakley
  2021-08-26  4:28 ` [PATCH v2] " Maksym Sobolyev via GitGitGadget
  2 siblings, 0 replies; 11+ messages in thread
From: Junio C Hamano @ 2021-08-23 18:10 UTC (permalink / raw)
  To: Maksym Sobolyev via GitGitGadget; +Cc: git, Maksym Sobolyev, Maksym Sobolyev

"Maksym Sobolyev via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Maksym Sobolyev <sobomax@sippysoft.com>
>
> This allows ident to be something like $FreeBSD$ so it provides matching
> functionality for repos migrated from CVS / SVN.
>
> This works by allowing ident to have a parameter, i.e.:
>
> * ident=MyCustomId
>
> In .gitattributes.

I would have expected that, just like cvs uses its configuration
mechanism for LocalKeyword, this would belong to the .git/config
file.

The attribute mechanism is inherently for stuff that you would want
to specify per-path, so the "do we do keyword expansion for this
file (yes/no)?" is a good match to be an attribute, and that
question is what the original 'ident' attribute asks.

I do not think "what keyword do we want to expand as if it were
$Id$?" is a question we want to be giving different answers per
path, and it does not smell like a good match to be an attribute.

By allowing a potentially separate values for

    *.txt ident=FreeBSD
    *.cc  ident=OpenBSD

this gives long rope to create a mess to the users.

I also think that the ident expansion is overrated, and have a
feeling that we should discourage its use, not encourage it, but
that is a separate story.

Thanks.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Make ident dynamic, not just a hardcoded value of "$Id".
  2021-08-23 16:41 [PATCH] Make ident dynamic, not just a hardcoded value of "$Id" Maksym Sobolyev via GitGitGadget
  2021-08-23 18:10 ` Junio C Hamano
@ 2021-08-23 18:41 ` Philip Oakley
  2021-08-26  4:28 ` [PATCH v2] " Maksym Sobolyev via GitGitGadget
  2 siblings, 0 replies; 11+ messages in thread
From: Philip Oakley @ 2021-08-23 18:41 UTC (permalink / raw)
  To: Maksym Sobolyev via GitGitGadget, git; +Cc: Maksym Sobolyev, Maksym Sobolyev

On 23/08/2021 17:41, Maksym Sobolyev via GitGitGadget wrote:
> From: Maksym Sobolyev <sobomax@sippysoft.com>
>
> This allows ident to be something like $FreeBSD$ so it provides matching
> functionality for repos migrated from CVS / SVN.
>
> This works by allowing ident to have a parameter, i.e.:
>
> * ident=MyCustomId
>
> In .gitattributes.
>
> Signed-off-by: Maksym Sobolyev <sobomax@sippysoft.com>
> ---
>     Make ident dynamic, not just a hardcoded value of "$Id".
>     
>     This allows ident to be something like $FreeBSD$ so it provides matching
>     functionality for repos migrated from CVS / SVN.
>     
>     This works by allowing ident to have a parameter, i.e.:
>     
>     * ident=MyCustomId
>     
>     In .gitattributes.
>
> Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1074%2Fsobomax%2Fpr-custom_ident-v1
> Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1074/sobomax/pr-custom_ident-v1
> Pull-Request: https://github.com/git/git/pull/1074
>
>  builtin/checkout--worker.c              |  17 +++-
>  convert.c                               | 114 +++++++++++++++---------
>  convert.h                               |   7 +-
>  parallel-checkout.c                     |  11 ++-
>  parallel-checkout.h                     |   8 +-
>  t/t2082-parallel-checkout-attributes.sh |   7 +-
>  6 files changed, 108 insertions(+), 56 deletions(-)
>
> diff --git a/builtin/checkout--worker.c b/builtin/checkout--worker.c

Should this also include some documentation updates?
-- 
Philip

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] Make ident dynamic, not just a hardcoded value of "$Id".
  2021-08-23 16:41 [PATCH] Make ident dynamic, not just a hardcoded value of "$Id" Maksym Sobolyev via GitGitGadget
  2021-08-23 18:10 ` Junio C Hamano
  2021-08-23 18:41 ` Philip Oakley
@ 2021-08-26  4:28 ` Maksym Sobolyev via GitGitGadget
  2021-08-26 20:37   ` Matheus Tavares
                     ` (2 more replies)
  2 siblings, 3 replies; 11+ messages in thread
From: Maksym Sobolyev via GitGitGadget @ 2021-08-26  4:28 UTC (permalink / raw)
  To: git; +Cc: Philip Oakley, Maksym Sobolyev, Maksym Sobolyev

From: Maksym Sobolyev <sobomax@sippysoft.com>

This allows ident to be something like $FreeBSD$ so it provides matching
functionality for repos migrated from CVS / SVN.

This works by allowing ident to have a parameter, i.e.:

* ident=MyCustomId

In .gitattributes.

Signed-off-by: Maksym Sobolyev <sobomax@sippysoft.com>
---
    Make ident dynamic, not just a hardcoded value of "$Id".
    
    This allows ident to be something like $FreeBSD$ so it provides matching
    functionality for repos migrated from CVS / SVN.
    
    This works by allowing ident to have a parameter, i.e.:
    
    * ident=MyCustomId
    
    In .gitattributes.

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1074%2Fsobomax%2Fpr-custom_ident-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1074/sobomax/pr-custom_ident-v2
Pull-Request: https://github.com/git/git/pull/1074

Range-diff vs v1:

 1:  52e0adde935 ! 1:  16713788a26 Make ident dynamic, not just a hardcoded value of "$Id".
     @@ Commit message
      
          Signed-off-by: Maksym Sobolyev <sobomax@sippysoft.com>
      
     + ## Documentation/gitattributes.txt ##
     +@@ Documentation/gitattributes.txt: sign `$` upon checkout.  Any byte sequence that begins with
     + `$Id:` and ends with `$` in the worktree file is replaced
     + with `$Id$` upon check-in.
     + 
     ++The `ident` attribute can also provide an optional value,
     ++which if supplied is going to be used for expansion instead of
     ++the string `Id`.
     ++
     ++------------------------
     ++*.[ch]		ident=FreeBSD
     ++------------------------
     ++
     + 
     + `filter`
     + ^^^^^^^^
     +
       ## builtin/checkout--worker.c ##
      @@ builtin/checkout--worker.c: static void packet_to_pc_item(const char *buffer, int len,
       			      struct parallel_checkout_item *pc_item)


 Documentation/gitattributes.txt         |   8 ++
 builtin/checkout--worker.c              |  17 +++-
 convert.c                               | 114 +++++++++++++++---------
 convert.h                               |   7 +-
 parallel-checkout.c                     |  11 ++-
 parallel-checkout.h                     |   8 +-
 t/t2082-parallel-checkout-attributes.sh |   7 +-
 7 files changed, 116 insertions(+), 56 deletions(-)

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 83fd4e19a41..9e486f3e8d3 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -382,6 +382,14 @@ sign `$` upon checkout.  Any byte sequence that begins with
 `$Id:` and ends with `$` in the worktree file is replaced
 with `$Id$` upon check-in.
 
+The `ident` attribute can also provide an optional value,
+which if supplied is going to be used for expansion instead of
+the string `Id`.
+
+------------------------
+*.[ch]		ident=FreeBSD
+------------------------
+
 
 `filter`
 ^^^^^^^^
diff --git a/builtin/checkout--worker.c b/builtin/checkout--worker.c
index fb9fd13b73c..61ba39402ad 100644
--- a/builtin/checkout--worker.c
+++ b/builtin/checkout--worker.c
@@ -9,7 +9,7 @@ static void packet_to_pc_item(const char *buffer, int len,
 			      struct parallel_checkout_item *pc_item)
 {
 	const struct pc_item_fixed_portion *fixed_portion;
-	const char *variant;
+	const char *variant, *ident_action;
 	char *encoding;
 
 	if (len < sizeof(struct pc_item_fixed_portion))
@@ -19,7 +19,8 @@ static void packet_to_pc_item(const char *buffer, int len,
 	fixed_portion = (struct pc_item_fixed_portion *)buffer;
 
 	if (len - sizeof(struct pc_item_fixed_portion) !=
-		fixed_portion->name_len + fixed_portion->working_tree_encoding_len)
+		fixed_portion->name_len + fixed_portion->working_tree_encoding_len +
+		fixed_portion->ident_action_len)
 		BUG("checkout worker received corrupted item");
 
 	variant = buffer + sizeof(struct pc_item_fixed_portion);
@@ -43,11 +44,21 @@ static void packet_to_pc_item(const char *buffer, int len,
 	pc_item->ce->ce_namelen = fixed_portion->name_len;
 	pc_item->ce->ce_mode = fixed_portion->ce_mode;
 	memcpy(pc_item->ce->name, variant, pc_item->ce->ce_namelen);
+	variant += pc_item->ce->ce_namelen;
 	oidcpy(&pc_item->ce->oid, &fixed_portion->oid);
 
+	if (fixed_portion->ident_action_len) {
+		ident_action = xmemdupz(variant,
+					fixed_portion->ident_action_len);
+		variant += fixed_portion->ident_action_len;
+	} else {
+		ident_action = NULL;
+	}
+
 	pc_item->id = fixed_portion->id;
 	pc_item->ca.crlf_action = fixed_portion->crlf_action;
-	pc_item->ca.ident = fixed_portion->ident;
+	pc_item->ca.ident_action.id = ident_action;
+	pc_item->ca.ident_action.id_len = fixed_portion->ident_action_len;
 	pc_item->ca.working_tree_encoding = encoding;
 }
 
diff --git a/convert.c b/convert.c
index 0d6fb3410ae..1e8940bf9d7 100644
--- a/convert.c
+++ b/convert.c
@@ -1056,7 +1056,12 @@ static int read_convert_config(const char *var, const char *value, void *cb)
 	return 0;
 }
 
-static int count_ident(const char *cp, unsigned long size)
+#define ID_STR "Id"
+
+#define GIT_MAX_IDENT_LEN 255
+
+static int count_ident(const char *cp, unsigned long size,
+		       const struct ident_action *idact)
 {
 	/*
 	 * "$Id: 0000000000000000000000000000000000000000 $" <=> "$Id$"
@@ -1069,13 +1074,13 @@ static int count_ident(const char *cp, unsigned long size)
 		size--;
 		if (ch != '$')
 			continue;
-		if (size < 3)
+		if (size < idact->id_len + 1)
 			break;
-		if (memcmp("Id", cp, 2))
+		if (memcmp(idact->id, cp, idact->id_len))
 			continue;
-		ch = cp[2];
-		cp += 3;
-		size -= 3;
+		ch = cp[idact->id_len];
+		cp += idact->id_len + 1;
+		size -= idact->id_len + 1;
 		if (ch == '$')
 			cnt++; /* $Id$ */
 		if (ch != ':')
@@ -1099,11 +1104,11 @@ static int count_ident(const char *cp, unsigned long size)
 }
 
 static int ident_to_git(const char *src, size_t len,
-			struct strbuf *buf, int ident)
+			struct strbuf *buf, const struct ident_action *idact)
 {
 	char *dst, *dollar;
 
-	if (!ident || (src && !count_ident(src, len)))
+	if (!idact->id || (src && !count_ident(src, len, idact)))
 		return 0;
 
 	if (!buf)
@@ -1122,17 +1127,18 @@ static int ident_to_git(const char *src, size_t len,
 		len -= dollar + 1 - src;
 		src  = dollar + 1;
 
-		if (len > 3 && !memcmp(src, "Id:", 3)) {
-			dollar = memchr(src + 3, '$', len - 3);
+		if (len > idact->id_len + 1 && !memcmp(src, idact->id, idact->id_len) && src[idact->id_len + 1] == ':') {
+			dollar = memchr(src + idact->id_len + 1, '$', len - (idact->id_len + 1));
 			if (!dollar)
 				break;
-			if (memchr(src + 3, '\n', dollar - src - 3)) {
+			if (memchr(src + idact->id_len + 1, '\n', dollar - src - (idact->id_len + 1))) {
 				/* Line break before the next dollar. */
 				continue;
 			}
 
-			memcpy(dst, "Id$", 3);
-			dst += 3;
+			memcpy(dst, idact->id, idact->id_len);
+			dst[idact->id_len] = '$';
+			dst += idact->id_len + 1;
 			len -= dollar + 1 - src;
 			src  = dollar + 1;
 		}
@@ -1143,16 +1149,16 @@ static int ident_to_git(const char *src, size_t len,
 }
 
 static int ident_to_worktree(const char *src, size_t len,
-			     struct strbuf *buf, int ident)
+			     struct strbuf *buf, const struct ident_action *idact)
 {
 	struct object_id oid;
 	char *to_free = NULL, *dollar, *spc;
 	int cnt;
 
-	if (!ident)
+	if (!idact->id)
 		return 0;
 
-	cnt = count_ident(src, len);
+	cnt = count_ident(src, len, idact);
 	if (!cnt)
 		return 0;
 
@@ -1161,7 +1167,7 @@ static int ident_to_worktree(const char *src, size_t len,
 		to_free = strbuf_detach(buf, NULL);
 	hash_object_file(the_hash_algo, src, len, "blob", &oid);
 
-	strbuf_grow(buf, len + cnt * (the_hash_algo->hexsz + 3));
+	strbuf_grow(buf, len + cnt * (the_hash_algo->hexsz + idact->id_len + 1));
 	for (;;) {
 		/* step 1: run to the next '$' */
 		dollar = memchr(src, '$', len);
@@ -1172,14 +1178,14 @@ static int ident_to_worktree(const char *src, size_t len,
 		src  = dollar + 1;
 
 		/* step 2: does it looks like a bit like Id:xxx$ or Id$ ? */
-		if (len < 3 || memcmp("Id", src, 2))
+		if (len < idact->id_len + 1 || memcmp(idact->id, src, idact->id_len))
 			continue;
 
 		/* step 3: skip over Id$ or Id:xxxxx$ */
-		if (src[2] == '$') {
-			src += 3;
-			len -= 3;
-		} else if (src[2] == ':') {
+		if (src[idact->id_len] == '$') {
+			src += idact->id_len + 1;
+			len -= idact->id_len + 1;
+		} else if (src[idact->id_len] == ':') {
 			/*
 			 * It's possible that an expanded Id has crept its way into the
 			 * repository, we cope with that by stripping the expansion out.
@@ -1187,18 +1193,18 @@ static int ident_to_worktree(const char *src, size_t len,
 			 * on checkout, which won't go away by stash, but let's keep it
 			 * for git-style ids.
 			 */
-			dollar = memchr(src + 3, '$', len - 3);
+			dollar = memchr(src + idact->id_len + 1, '$', len - (idact->id_len + 1));
 			if (!dollar) {
 				/* incomplete keyword, no more '$', so just quit the loop */
 				break;
 			}
 
-			if (memchr(src + 3, '\n', dollar - src - 3)) {
+			if (memchr(src + idact->id_len + 1, '\n', dollar - src - (idact->id_len + 1))) {
 				/* Line break before the next dollar. */
 				continue;
 			}
 
-			spc = memchr(src + 4, ' ', dollar - src - 4);
+			spc = memchr(src + idact->id_len + 2, ' ', dollar - src - (idact->id_len + 2));
 			if (spc && spc < dollar-1) {
 				/* There are spaces in unexpected places.
 				 * This is probably an id from some other
@@ -1215,7 +1221,8 @@ static int ident_to_worktree(const char *src, size_t len,
 		}
 
 		/* step 4: substitute */
-		strbuf_addstr(buf, "Id: ");
+		strbuf_addstr(buf, idact->id);
+		strbuf_addstr(buf, ": ");
 		strbuf_addstr(buf, oid_to_hex(&oid));
 		strbuf_addstr(buf, " $");
 	}
@@ -1286,11 +1293,21 @@ static struct convert_driver *git_path_check_convert(struct attr_check_item *che
 	return NULL;
 }
 
-static int git_path_check_ident(struct attr_check_item *check)
+static struct ident_action git_path_check_ident(struct attr_check_item *check)
 {
+	struct ident_action idact = {.id = NULL, .id_len = 0};
 	const char *value = check->value;
 
-	return !!ATTR_TRUE(value);
+	if (!ATTR_UNSET(value) && !ATTR_FALSE(value)) {
+		if (ATTR_TRUE(value))
+			idact.id = ID_STR;
+		else
+			idact.id = value;
+		idact.id_len = strlen(idact.id);
+		if (idact.id_len > GIT_MAX_IDENT_LEN)
+			die(_("ident value length exceeds GIT_MAX_IDENT_LEN"));
+	}
+	return idact;
 }
 
 static struct attr_check *check;
@@ -1313,7 +1330,7 @@ void convert_attrs(struct index_state *istate,
 	ca->crlf_action = git_path_check_crlf(ccheck + 4);
 	if (ca->crlf_action == CRLF_UNDEFINED)
 		ca->crlf_action = git_path_check_crlf(ccheck + 0);
-	ca->ident = git_path_check_ident(ccheck + 1);
+	ca->ident_action = git_path_check_ident(ccheck + 1);
 	ca->drv = git_path_check_convert(ccheck + 2);
 	if (ca->crlf_action != CRLF_BINARY) {
 		enum eol eol_attr = git_path_check_eol(ccheck + 3);
@@ -1433,7 +1450,7 @@ int convert_to_git(struct index_state *istate,
 			len = dst->len;
 		}
 	}
-	return ret | ident_to_git(src, len, dst, ca.ident);
+	return ret | ident_to_git(src, len, dst, &ca.ident_action);
 }
 
 void convert_to_git_filter_fd(struct index_state *istate,
@@ -1450,7 +1467,7 @@ void convert_to_git_filter_fd(struct index_state *istate,
 
 	encode_to_git(path, dst->buf, dst->len, dst, ca.working_tree_encoding, conv_flags);
 	crlf_to_git(istate, path, dst->buf, dst->len, dst, ca.crlf_action, conv_flags);
-	ident_to_git(dst->buf, dst->len, dst, ca.ident);
+	ident_to_git(dst->buf, dst->len, dst, &ca.ident_action);
 }
 
 static int convert_to_working_tree_ca_internal(const struct conv_attrs *ca,
@@ -1462,7 +1479,7 @@ static int convert_to_working_tree_ca_internal(const struct conv_attrs *ca,
 {
 	int ret = 0, ret_filter = 0;
 
-	ret |= ident_to_worktree(src, len, dst, ca->ident);
+	ret |= ident_to_worktree(src, len, dst, &(ca->ident_action));
 	if (ret) {
 		src = dst->buf;
 		len = dst->len;
@@ -1810,7 +1827,8 @@ struct ident_filter {
 	struct stream_filter filter;
 	struct strbuf left;
 	int state;
-	char ident[GIT_MAX_HEXSZ + 5]; /* ": x40 $" */
+	const struct ident_action *idact;
+	char ident[GIT_MAX_HEXSZ + GIT_MAX_IDENT_LEN + 3]; /* ": x40 $" */
 };
 
 static int is_foreign_ident(const char *str)
@@ -1847,13 +1865,16 @@ static int ident_filter_fn(struct stream_filter *filter,
 			   char *output, size_t *osize_p)
 {
 	struct ident_filter *ident = (struct ident_filter *)filter;
-	static const char head[] = "$Id";
+	const struct ident_action *idact = ident->idact;
 
 	if (!input) {
 		/* drain upon eof */
 		switch (ident->state) {
 		default:
-			strbuf_add(&ident->left, head, ident->state);
+			if (ident->state > 0)
+				strbuf_addch(&ident->left, '$');
+			if (ident->state > 1)
+				strbuf_add(&ident->left, idact->id, ident->state - 1);
 			/* fallthrough */
 		case IDENT_SKIPPING:
 			/* fallthrough */
@@ -1885,22 +1906,26 @@ static int ident_filter_fn(struct stream_filter *filter,
 			if (ch != '\n' && ch != '$')
 				continue;
 			if (ch == '$' && !is_foreign_ident(ident->left.buf)) {
-				strbuf_setlen(&ident->left, sizeof(head) - 1);
+				strbuf_setlen(&ident->left, idact->id_len + 1);
 				strbuf_addstr(&ident->left, ident->ident);
 			}
 			ident->state = IDENT_DRAINING;
 			continue;
 		}
 
-		if (ident->state < sizeof(head) &&
-		    head[ident->state] == ch) {
+		if ((ident->state == 0 && ch == '$') ||
+		    (ident->state > 0 && ident->state < idact->id_len + 1 &&
+		     idact->id[ident->state - 1] == ch)) {
 			ident->state++;
 			continue;
 		}
 
-		if (ident->state)
-			strbuf_add(&ident->left, head, ident->state);
-		if (ident->state == sizeof(head) - 1) {
+		if (ident->state) {
+			strbuf_addch(&ident->left, '$');
+			if (ident->state > 1)
+				strbuf_add(&ident->left, idact->id, ident->state - 1);
+		}
+		if (ident->state == idact->id_len + 1) {
 			if (ch != ':' && ch != '$') {
 				strbuf_addch(&ident->left, ch);
 				ident->state = 0;
@@ -1935,7 +1960,7 @@ static struct stream_filter_vtbl ident_vtbl = {
 	ident_free_fn,
 };
 
-static struct stream_filter *ident_filter(const struct object_id *oid)
+static struct stream_filter *ident_filter(const struct object_id *oid, const struct ident_action *idact)
 {
 	struct ident_filter *ident = xmalloc(sizeof(*ident));
 
@@ -1944,6 +1969,7 @@ static struct stream_filter *ident_filter(const struct object_id *oid)
 	strbuf_init(&ident->left, 0);
 	ident->filter.vtbl = &ident_vtbl;
 	ident->state = 0;
+	ident->idact = idact;
 	return (struct stream_filter *)ident;
 }
 
@@ -1963,8 +1989,8 @@ struct stream_filter *get_stream_filter_ca(const struct conv_attrs *ca,
 	if (classify_conv_attrs(ca) != CA_CLASS_STREAMABLE)
 		return NULL;
 
-	if (ca->ident)
-		filter = ident_filter(oid);
+	if (ca->ident_action.id)
+		filter = ident_filter(oid, &(ca->ident_action));
 
 	if (output_eol(ca->crlf_action) == EOL_CRLF)
 		filter = cascade_filter(filter, lf_to_crlf_filter());
diff --git a/convert.h b/convert.h
index 5ee1c322058..a02632e8104 100644
--- a/convert.h
+++ b/convert.h
@@ -76,11 +76,16 @@ enum convert_crlf_action {
 
 struct convert_driver;
 
+struct ident_action {
+	const char *id;
+	int id_len;
+};
+
 struct conv_attrs {
 	struct convert_driver *drv;
 	enum convert_crlf_action attr_action; /* What attr says */
 	enum convert_crlf_action crlf_action; /* When no attr is set, use core.autocrlf */
-	int ident;
+	struct ident_action ident_action; /* What ident says */
 	const char *working_tree_encoding; /* Supported encoding or default encoding if NULL */
 };
 
diff --git a/parallel-checkout.c b/parallel-checkout.c
index ddc0ff3c064..f9a3f2ff25b 100644
--- a/parallel-checkout.c
+++ b/parallel-checkout.c
@@ -403,13 +403,15 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
 	size_t name_len = pc_item->ce->ce_namelen;
 	size_t working_tree_encoding_len = working_tree_encoding ?
 					   strlen(working_tree_encoding) : 0;
+	const char *ident_action_id = pc_item->ca.ident_action.id;
+	size_t ident_action_len = pc_item->ca.ident_action.id_len;
 
 	/*
 	 * Any changes in the calculation of the message size must also be made
 	 * in is_eligible_for_parallel_checkout().
 	 */
 	len_data = sizeof(struct pc_item_fixed_portion) + name_len +
-		   working_tree_encoding_len;
+		   working_tree_encoding_len + ident_action_len;
 
 	data = xmalloc(len_data);
 
@@ -417,7 +419,7 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
 	fixed_portion->id = pc_item->id;
 	fixed_portion->ce_mode = pc_item->ce->ce_mode;
 	fixed_portion->crlf_action = pc_item->ca.crlf_action;
-	fixed_portion->ident = pc_item->ca.ident;
+	fixed_portion->ident_action_len = ident_action_len;
 	fixed_portion->name_len = name_len;
 	fixed_portion->working_tree_encoding_len = working_tree_encoding_len;
 	/*
@@ -434,6 +436,11 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
 		variant += working_tree_encoding_len;
 	}
 	memcpy(variant, pc_item->ce->name, name_len);
+	variant += name_len;
+	if (ident_action_len) {
+		memcpy(variant, ident_action_id, ident_action_len);
+		variant += ident_action_len;
+	}
 
 	packet_write(fd, data, len_data);
 
diff --git a/parallel-checkout.h b/parallel-checkout.h
index 80f539bcb77..c3c282f516b 100644
--- a/parallel-checkout.h
+++ b/parallel-checkout.h
@@ -76,9 +76,9 @@ struct parallel_checkout_item {
 
 /*
  * The fixed-size portion of `struct parallel_checkout_item` that is sent to the
- * workers. Following this will be 2 strings: ca.working_tree_encoding and
- * ce.name; These are NOT null terminated, since we have the size in the fixed
- * portion.
+ * workers. Following this will be 3 strings: ca.working_tree_encoding, ca.name
+ * and ca.ident_action.id; These are NOT null terminated, since we have the size
+ * in the fixed portion.
  *
  * Note that not all fields of conv_attrs and cache_entry are passed, only the
  * ones that will be required by the workers to smudge and write the entry.
@@ -88,7 +88,7 @@ struct pc_item_fixed_portion {
 	struct object_id oid;
 	unsigned int ce_mode;
 	enum convert_crlf_action crlf_action;
-	int ident;
+	size_t ident_action_len;
 	size_t working_tree_encoding_len;
 	size_t name_len;
 };
diff --git a/t/t2082-parallel-checkout-attributes.sh b/t/t2082-parallel-checkout-attributes.sh
index 25254579618..822957a8dc8 100755
--- a/t/t2082-parallel-checkout-attributes.sh
+++ b/t/t2082-parallel-checkout-attributes.sh
@@ -20,16 +20,19 @@ test_expect_success 'parallel-checkout with ident' '
 	(
 		cd ident &&
 		echo "A ident" >.gitattributes &&
+		echo "C ident=MyCusomVeryLongAndWordyId" >>.gitattributes &&
 		echo "\$Id\$" >A &&
 		echo "\$Id\$" >B &&
+		echo "\$MyCusomVeryLongAndWordyId\$" >C &&
 		git add -A &&
 		git commit -m id &&
 
-		rm A B &&
+		rm A B C &&
 		test_checkout_workers 2 git reset --hard &&
 		hexsz=$(test_oid hexsz) &&
 		grep -E "\\\$Id: [0-9a-f]{$hexsz} \\\$" A &&
-		grep "\\\$Id\\\$" B
+		grep "\\\$Id\\\$" B &&
+		grep -E "\\\$MyCusomVeryLongAndWordyId: [0-9a-f]{$hexsz} \\\$" C
 	)
 '
 

base-commit: 225bc32a989d7a22fa6addafd4ce7dcd04675dbf
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] Make ident dynamic, not just a hardcoded value of "$Id".
  2021-08-26  4:28 ` [PATCH v2] " Maksym Sobolyev via GitGitGadget
@ 2021-08-26 20:37   ` Matheus Tavares
  2021-09-02  0:58     ` Junio C Hamano
  2021-08-27  2:59   ` Junio C Hamano
  2021-09-01  2:13   ` [PATCH v3] " Maksym Sobolyev via GitGitGadget
  2 siblings, 1 reply; 11+ messages in thread
From: Matheus Tavares @ 2021-08-26 20:37 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, philipoakley, sobomax, sobomax

Hi, Maksym

I haven't read the entire patch (and I don't normally use the ident feature),
but I left a few comments below: 

On Thu, Aug 26, 2021 at 1:28 AM Maksym Sobolyev via GitGitGadget <gitgitgadget@gmail.com> wrote:
>
> diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
> index 83fd4e19a4..9e486f3e8d 100644
> --- a/Documentation/gitattributes.txt
> +++ b/Documentation/gitattributes.txt
> @@ -382,6 +382,14 @@ sign `$` upon checkout.  Any byte sequence that begins with
>  `$Id:` and ends with `$` in the worktree file is replaced
>  with `$Id$` upon check-in.
>
> +The `ident` attribute can also provide an optional value,
> +which if supplied is going to be used for expansion instead of
> +the string `Id`.
> +
> +------------------------
> +*.[ch]         ident=FreeBSD
> +------------------------

What happens if there is a ':' or '$' in the custom id name?

$ echo 'f	ident=$weird:$' >.gitattributes
$ echo '$$weird:$$' >f
$ git add .
$ git commit -m files
$ rm f
$ git checkout f
$ cat f

$$weird:$: 907f73b343505bf289a4a25e41ea91d90250fedb $

Seem right. Now let's see the cleaning direction:

$ echo foo >> f
$ git add f
$ git cat-file -p :f

$$weird:$: 907f73b343505bf289a4a25e41ea91d90250fedb $
foo

Hmm, this one is not right. I think I know the cause:

> diff --git a/convert.c b/convert.c
> index 0d6fb3410ae..1e8940bf9d7 100644
> --- a/convert.c
> +++ b/convert.c
> @@ -1122,17 +1127,18 @@ static int ident_to_git(const char *src, size_t len,
>                 len -= dollar + 1 - src;
>                 src  = dollar + 1;
>
> -               if (len > 3 && !memcmp(src, "Id:", 3)) {
> -                       dollar = memchr(src + 3, '$', len - 3);
> +               if (len > idact->id_len + 1 && !memcmp(src, idact->id, idact->id_len) && src[idact->id_len + 1] == ':') {

Shouldn't this be `... && src[idact->id_len] == ':')`? I.e. without the "+ 1".

> diff --git a/convert.h b/convert.h
> index 5ee1c322058..a02632e8104 100644
> --- a/convert.h
> +++ b/convert.h
> @@ -76,11 +76,16 @@ enum convert_crlf_action {
>
>  struct convert_driver;
>
> +struct ident_action {
> +       const char *id;
> +       int id_len;

I known the string size is limited to GIT_MAX_IDENT_LEN, but perhaps it
would be better to future-proof this and use `size_t` (or at least
`unsigned int`).

> diff --git a/parallel-checkout.c b/parallel-checkout.c
> index ddc0ff3c064..f9a3f2ff25b 100644
> --- a/parallel-checkout.c
> +++ b/parallel-checkout.c
> @@ -403,13 +403,15 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
>         size_t name_len = pc_item->ce->ce_namelen;
>         size_t working_tree_encoding_len = working_tree_encoding ?
>                                            strlen(working_tree_encoding) : 0;
> +       const char *ident_action_id = pc_item->ca.ident_action.id;
> +       size_t ident_action_len = pc_item->ca.ident_action.id_len;
>
>         /*
>          * Any changes in the calculation of the message size must also be made
>          * in is_eligible_for_parallel_checkout().
>          */
>         len_data = sizeof(struct pc_item_fixed_portion) + name_len +
> -                  working_tree_encoding_len;
> +                  working_tree_encoding_len + ident_action_len;

This update in the packet size calculation should also be reflected in
is_eligible_for_parallel_checkout():

	packed_item_size = sizeof(struct pc_item_fixed_portion) + ce->ce_namelen +
		(ca->working_tree_encoding ? strlen(ca->working_tree_encoding) : 0);

> diff --git a/t/t2082-parallel-checkout-attributes.sh b/t/t2082-parallel-checkout-attributes.sh
> index 25254579618..822957a8dc8 100755
> --- a/t/t2082-parallel-checkout-attributes.sh
> +++ b/t/t2082-parallel-checkout-attributes.sh
> @@ -20,16 +20,19 @@ test_expect_success 'parallel-checkout with ident' '
>         (
>                 cd ident &&
>                 echo "A ident" >.gitattributes &&
> +               echo "C ident=MyCusomVeryLongAndWordyId" >>.gitattributes &&
>                 echo "\$Id\$" >A &&
>                 echo "\$Id\$" >B &&
> +               echo "\$MyCusomVeryLongAndWordyId\$" >C &&
>                 git add -A &&
>                 git commit -m id &&
>
> -               rm A B &&
> +               rm A B C &&
>                 test_checkout_workers 2 git reset --hard &&
>                 hexsz=$(test_oid hexsz) &&
>                 grep -E "\\\$Id: [0-9a-f]{$hexsz} \\\$" A &&
> -               grep "\\\$Id\\\$" B
> +               grep "\\\$Id\\\$" B &&
> +               grep -E "\\\$MyCusomVeryLongAndWordyId: [0-9a-f]{$hexsz} \\\$" C
>         )
>  '

The change to this test looks good. But there are other things related
to the ident filter that are not covered here (as the test is mostly
interested in how ident interacts with parallel checkout). For example,
the test doesn't check whether the "cleaning" direction is performed
correctly, it just checks the "smudging" part. (That's why it didn't catch the
error I showed earlier.)

You might want to take a look at the other ident tests in
t0021-conversion.sh and perhaps addapt/copy some of them to ensure that
the expected behavior persists when using a custom id name. For example,
quickly changing the "$Id:" references to "$customId:" and replacing
"ident" with "ident=customId", I seem to get at least one test failure:

--- expected-output     2021-08-26 19:40:07.181596662 +0000
+++ expanded-keywords   2021-08-26 19:40:07.188263463 +0000
@@ -6,5 +6,5 @@
 $customId: bebd07c752ffffd6779e1056db5de66c3bb733ed $
 $customId: bebd07c752ffffd6779e1056db5de66c3bb733ed $
 $customId: NoTerminatingSymbol
-$customId: Foreign Commit With Spaces $
+$customId: bebd07c752ffffd6779e1056db5de66c3bb733ed $
 $customId: NoTerminatingSymbolAtEOF
\ No newline at end of file
error: last command exited with $?=1
not ok 3 - expanded_in_repo

(But I haven't dig further, and this was just a quick test, so it could be my
fault.)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] Make ident dynamic, not just a hardcoded value of "$Id".
  2021-08-26  4:28 ` [PATCH v2] " Maksym Sobolyev via GitGitGadget
  2021-08-26 20:37   ` Matheus Tavares
@ 2021-08-27  2:59   ` Junio C Hamano
       [not found]     ` <CABFYoQC_FzbU_E4hU0kCz-WFJNOLspwL2Gjc01sMXDZosxJWjw@mail.gmail.com>
  2021-09-01  2:13   ` [PATCH v3] " Maksym Sobolyev via GitGitGadget
  2 siblings, 1 reply; 11+ messages in thread
From: Junio C Hamano @ 2021-08-27  2:59 UTC (permalink / raw)
  To: Maksym Sobolyev via GitGitGadget
  Cc: git, Philip Oakley, Maksym Sobolyev, Maksym Sobolyev

"Maksym Sobolyev via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Maksym Sobolyev <sobomax@sippysoft.com>
>
> This allows ident to be something like $FreeBSD$ so it provides matching
> functionality for repos migrated from CVS / SVN.
>
> This works by allowing ident to have a parameter, i.e.:
>
> * ident=MyCustomId
>
> In .gitattributes.
>
> Signed-off-by: Maksym Sobolyev <sobomax@sippysoft.com>

Just like cvs uses its configuration mechanism for LocalKeyword,
this belongs to the .git/config file.

The attribute mechanism is inherently for stuff that you would want
to specify per-path, so the "do we do keyword expansion for this
file (yes/no)?" is a good match to be an attribute, and that
question is what the original 'ident' attribute asks.

I do not think "what keyword do we want to expand as if it were
$Id$?" is a question we want to be giving different answers per
path, and it does not smell like a good match to be an attribute.

By allowing a potentially separate values for

    *.txt ident=FreeBSD
    *.cc  ident=OpenBSD

this gives long rope to create a mess to the users.  In addition, a
project can easily attack and break its users by having .gitattributes
with overly long ident defined, as it would cause the attribute code
to trigger the die() call added by this patch.

In short, I do not think the approach taken by this patch to use
per-path attribute is a good idea at all.

I suspect that this patch mimics crlf_action when naming the new
"ident_action" thing, but that is a poor choice.  crlf_action does
specify what different actions to be taken, but the setting does not
change the way 'ident' filter behaves in any fundamental way.  It
just changes the constant string used to locate and replace from
$Id$ to something else.  ident_keyword (as this is "keyword
expansion" behaviour) may be a better name.

There is no inherent reason why the custom ident string must be
shorter than 255, even though it might not be a bad idea to have a
reasonable floor for the length.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v3] Make ident dynamic, not just a hardcoded value of "$Id".
  2021-08-26  4:28 ` [PATCH v2] " Maksym Sobolyev via GitGitGadget
  2021-08-26 20:37   ` Matheus Tavares
  2021-08-27  2:59   ` Junio C Hamano
@ 2021-09-01  2:13   ` Maksym Sobolyev via GitGitGadget
  2021-09-02  3:40     ` Đoàn Trần Công Danh
  2 siblings, 1 reply; 11+ messages in thread
From: Maksym Sobolyev via GitGitGadget @ 2021-09-01  2:13 UTC (permalink / raw)
  To: git; +Cc: Matheus Tavares, Maksym Sobolyev, Maksym Sobolyev

From: Maksym Sobolyev <sobomax@sippysoft.com>

This allows ident to be something like $FreeBSD$ so it provides matching
functionality for repos migrated from CVS / SVN.

This works by allowing ident to have a parameter, i.e.:

* ident=MyCustomId

In .gitattributes.

Extend the ident expansion tests to also verify custom ident.

Signed-off-by: Maksym Sobolyev <sobomax@sippysoft.com>
---
    Make ident dynamic, not just a hardcoded value of "$Id".
    
    This allows ident to be something like $FreeBSD$ so it provides matching
    functionality for repos migrated from CVS / SVN.
    
    This works by allowing ident to have a parameter, i.e.:
    
    * ident=MyCustomId
    
    In .gitattributes.
    
    cc: Philip Oakley philipoakley@iee.email

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1074%2Fsobomax%2Fpr-custom_ident-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1074/sobomax/pr-custom_ident-v3
Pull-Request: https://github.com/git/git/pull/1074

Range-diff vs v2:

 1:  16713788a26 ! 1:  74a705e5bbe Make ident dynamic, not just a hardcoded value of "$Id".
     @@ Commit message
      
          In .gitattributes.
      
     +    Extend the ident expansion tests to also verify custom ident.
     +
          Signed-off-by: Maksym Sobolyev <sobomax@sippysoft.com>
      
       ## Documentation/gitattributes.txt ##
     @@ convert.c: static int read_convert_config(const char *var, const char *value, vo
       }
       
      -static int count_ident(const char *cp, unsigned long size)
     -+#define ID_STR "Id"
     -+
     -+#define GIT_MAX_IDENT_LEN 255
     ++#define ID_STR_DEFAULT "Id"
      +
      +static int count_ident(const char *cp, unsigned long size,
      +		       const struct ident_action *idact)
     @@ convert.c: static int ident_to_git(const char *src, size_t len,
       
      -		if (len > 3 && !memcmp(src, "Id:", 3)) {
      -			dollar = memchr(src + 3, '$', len - 3);
     -+		if (len > idact->id_len + 1 && !memcmp(src, idact->id, idact->id_len) && src[idact->id_len + 1] == ':') {
     ++		if (len > idact->id_len + 1 && !memcmp(src, idact->id, idact->id_len) && src[idact->id_len] == ':') {
      +			dollar = memchr(src + idact->id_len + 1, '$', len - (idact->id_len + 1));
       			if (!dollar)
       				break;
     @@ convert.c: static struct convert_driver *git_path_check_convert(struct attr_chec
      -	return !!ATTR_TRUE(value);
      +	if (!ATTR_UNSET(value) && !ATTR_FALSE(value)) {
      +		if (ATTR_TRUE(value))
     -+			idact.id = ID_STR;
     ++			idact.id = ID_STR_DEFAULT;
      +		else
      +			idact.id = value;
      +		idact.id_len = strlen(idact.id);
     -+		if (idact.id_len > GIT_MAX_IDENT_LEN)
     -+			die(_("ident value length exceeds GIT_MAX_IDENT_LEN"));
      +	}
      +	return idact;
       }
     @@ convert.c: struct ident_filter {
       	struct stream_filter filter;
       	struct strbuf left;
       	int state;
     --	char ident[GIT_MAX_HEXSZ + 5]; /* ": x40 $" */
      +	const struct ident_action *idact;
     -+	char ident[GIT_MAX_HEXSZ + GIT_MAX_IDENT_LEN + 3]; /* ": x40 $" */
     + 	char ident[GIT_MAX_HEXSZ + 5]; /* ": x40 $" */
       };
       
     - static int is_foreign_ident(const char *str)
     +-static int is_foreign_ident(const char *str)
     ++static int is_foreign_ident(const struct ident_action *idact, const char *str)
     + {
     + 	int i;
     + 
     +-	if (!skip_prefix(str, "$Id: ", &str))
     ++	if (str[0] != '$' || strlen(str) < idact->id_len + 3 ||
     ++	    memcmp(str + 1, idact->id, idact->id_len) != 0 ||
     ++	    !skip_prefix(str + 1 + idact->id_len, ": ", &str))
     + 		return 0;
     + 	for (i = 0; str[i]; i++) {
     + 		if (isspace(str[i]) && str[i+1] != '$')
      @@ convert.c: static int ident_filter_fn(struct stream_filter *filter,
       			   char *output, size_t *osize_p)
       {
     @@ convert.c: static int ident_filter_fn(struct stream_filter *filter,
       		case IDENT_SKIPPING:
       			/* fallthrough */
      @@ convert.c: static int ident_filter_fn(struct stream_filter *filter,
     + 			strbuf_addch(&ident->left, ch);
       			if (ch != '\n' && ch != '$')
       				continue;
     - 			if (ch == '$' && !is_foreign_ident(ident->left.buf)) {
     +-			if (ch == '$' && !is_foreign_ident(ident->left.buf)) {
      -				strbuf_setlen(&ident->left, sizeof(head) - 1);
     ++			if (ch == '$' && !is_foreign_ident(idact, ident->left.buf)) {
      +				strbuf_setlen(&ident->left, idact->id_len + 1);
       				strbuf_addstr(&ident->left, ident->ident);
       			}
     @@ convert.h: enum convert_crlf_action {
       
      +struct ident_action {
      +	const char *id;
     -+	int id_len;
     ++	size_t id_len;
      +};
      +
       struct conv_attrs {
     @@ convert.h: enum convert_crlf_action {
       
      
       ## parallel-checkout.c ##
     +@@ parallel-checkout.c: static int is_eligible_for_parallel_checkout(const struct cache_entry *ce,
     + 		return 0;
     + 
     + 	packed_item_size = sizeof(struct pc_item_fixed_portion) + ce->ce_namelen +
     +-		(ca->working_tree_encoding ? strlen(ca->working_tree_encoding) : 0);
     ++		(ca->working_tree_encoding ? strlen(ca->working_tree_encoding) : 0) +
     ++		ca->ident_action.id_len;
     + 
     + 	/*
     + 	 * The amount of data we send to the workers per checkout item is
      @@ parallel-checkout.c: static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
       	size_t name_len = pc_item->ce->ce_namelen;
       	size_t working_tree_encoding_len = working_tree_encoding ?
     @@ parallel-checkout.h: struct pc_item_fixed_portion {
       	size_t name_len;
       };
      
     + ## t/t0021-conversion.sh ##
     +@@ t/t0021-conversion.sh: test_expect_success setup '
     + 	{
     + 	    echo "*.t filter=rot13"
     + 	    echo "*.i ident"
     ++	    echo "*.ci ident=customId"
     + 	} >.gitattributes &&
     + 
     + 	{
     + 	    echo a b c d e f g h i j k l m
     + 	    echo n o p q r s t u v w x y z
     + 	    echo '\''$Id$'\''
     ++	    echo '\''$customId$'\''
     + 	} >test &&
     + 	cat test >test.t &&
     + 	cat test >test.o &&
     + 	cat test >test.i &&
     +-	git add test test.t test.i &&
     +-	rm -f test test.t test.i &&
     +-	git checkout -- test test.t test.i &&
     ++	cat test >test.ci &&
     ++	git add test test.t test.i test.ci &&
     ++	rm -f test test.t test.i test.ci &&
     ++	git checkout -- test test.t test.i test.ci &&
     + 
     + 	echo "content-test2" >test2.o &&
     + 	echo "content-test3 - filename with special characters" >"test3 '\''sq'\'',\$x=.o"
     + '
     + 
     +-script='s/^\$Id: \([0-9a-f]*\) \$/\1/p'
     ++script_i='s/^\$Id: \([0-9a-f]*\) \$/\1/p'
     ++script_ci='s/^\$customId: \([0-9a-f]*\) \$/\1/p'
     + 
     + test_expect_success check '
     + 
     +@@ t/t0021-conversion.sh: test_expect_success check '
     + 	# ident should be stripped in the repository
     + 	git diff --raw --exit-code :test :test.i &&
     + 	id=$(git rev-parse --verify :test) &&
     +-	embedded=$(sed -ne "$script" test.i) &&
     ++	embedded=$(sed -ne "$script_i" test.i) &&
     ++	nembedded=$(sed -ne "$script_ci" test.i) &&
     + 	test "z$id" = "z$embedded" &&
     ++	test "z" = "z$nembedded" &&
     ++	embedded=$(sed -ne "$script_ci" test.ci) &&
     ++	nembedded=$(sed -ne "$script_i" test.ci) &&
     ++	test "z$id" = "z$embedded" &&
     ++	test "z" = "z$nembedded" &&
     + 
     + 	git cat-file blob :test.t >test.r &&
     + 
     +@@ t/t0021-conversion.sh: test_expect_success check '
     + 	test_cmp test.r test.t
     + '
     + 
     ++gen_expanded_keywords() {
     ++	local id="${1}"
     ++	echo "File with expanded keywords"
     ++	echo "\$$id\$"
     ++	echo "\$$id:\$"
     ++	echo "\$$id: 0000000000000000000000000000000000000000 \$"
     ++	echo "\$$id: NoSpaceAtEnd\$"
     ++	echo "\$$id:NoSpaceAtFront \$"
     ++	echo "\$$id:NoSpaceAtEitherEnd\$"
     ++	echo "\$$id: NoTerminatingSymbol"
     ++	echo "\$$id: Foreign Commit With Spaces \$"
     ++	printf "\$$id: NoTerminatingSymbolAtEOF"
     ++}
     ++
     ++gen_expected_output_0() {
     ++	local id="${1}"
     ++	local hid="${2}"
     ++	echo "File with expanded keywords"
     ++	echo "\$$id: $hid \$"
     ++	echo "\$$id: $hid \$"
     ++	echo "\$$id: $hid \$"
     ++	echo "\$$id: $hid \$"
     ++	echo "\$$id: $hid \$"
     ++	echo "\$$id: $hid \$"
     ++	echo "\$$id: NoTerminatingSymbol"
     ++	echo "\$$id: Foreign Commit With Spaces \$"
     ++}
     ++
     ++gen_expected_output() {
     ++	local id="${1}"
     ++	gen_expected_output_0 "${@}"
     ++	printf "\$$id: NoTerminatingSymbolAtEOF"
     ++}
     ++
     ++gen_expected_output_crlf() {
     ++	local id="${1}"
     ++	gen_expected_output_0 "${@}" | append_cr
     ++	printf "\$$id: NoTerminatingSymbolAtEOF"
     ++}
     ++
     + # If an expanded ident ever gets into the repository, we want to make sure that
     + # it is collapsed before being expanded again on checkout
     + test_expect_success expanded_in_repo '
     +-	{
     +-		echo "File with expanded keywords"
     +-		echo "\$Id\$"
     +-		echo "\$Id:\$"
     +-		echo "\$Id: 0000000000000000000000000000000000000000 \$"
     +-		echo "\$Id: NoSpaceAtEnd\$"
     +-		echo "\$Id:NoSpaceAtFront \$"
     +-		echo "\$Id:NoSpaceAtEitherEnd\$"
     +-		echo "\$Id: NoTerminatingSymbol"
     +-		echo "\$Id: Foreign Commit With Spaces \$"
     +-	} >expanded-keywords.0 &&
     +-
     +-	{
     +-		cat expanded-keywords.0 &&
     +-		printf "\$Id: NoTerminatingSymbolAtEOF"
     +-	} >expanded-keywords &&
     ++	gen_expanded_keywords Id >expanded-keywords &&
     ++	gen_expanded_keywords customId >expanded-keywords_ci &&
     + 	cat expanded-keywords >expanded-keywords-crlf &&
     ++	cat expanded-keywords_ci >expanded-keywords-crlf_ci &&
     + 	git add expanded-keywords expanded-keywords-crlf &&
     ++	git add expanded-keywords_ci expanded-keywords-crlf_ci &&
     + 	git commit -m "File with keywords expanded" &&
     + 	id=$(git rev-parse --verify :expanded-keywords) &&
     ++	id_ci=$(git rev-parse --verify :expanded-keywords_ci) &&
     + 
     +-	{
     +-		echo "File with expanded keywords"
     +-		echo "\$Id: $id \$"
     +-		echo "\$Id: $id \$"
     +-		echo "\$Id: $id \$"
     +-		echo "\$Id: $id \$"
     +-		echo "\$Id: $id \$"
     +-		echo "\$Id: $id \$"
     +-		echo "\$Id: NoTerminatingSymbol"
     +-		echo "\$Id: Foreign Commit With Spaces \$"
     +-	} >expected-output.0 &&
     +-	{
     +-		cat expected-output.0 &&
     +-		printf "\$Id: NoTerminatingSymbolAtEOF"
     +-	} >expected-output &&
     +-	{
     +-		append_cr <expected-output.0 &&
     +-		printf "\$Id: NoTerminatingSymbolAtEOF"
     +-	} >expected-output-crlf &&
     ++	gen_expected_output Id $id >expected-output &&
     ++	gen_expected_output customId $id_ci >expected-output_ci &&
     ++	gen_expected_output_crlf Id $id >expected-output-crlf &&
     ++	gen_expected_output_crlf customId $id_ci >expected-output-crlf_ci &&
     + 	{
     + 		echo "expanded-keywords ident"
     ++		echo "expanded-keywords_ci ident=customId"
     + 		echo "expanded-keywords-crlf ident text eol=crlf"
     ++		echo "expanded-keywords-crlf_ci ident=customId text eol=crlf"
     + 	} >>.gitattributes &&
     + 
     + 	rm -f expanded-keywords expanded-keywords-crlf &&
     ++	rm -f expanded-keywords_ci expanded-keywords-crlf_ci &&
     + 
     + 	git checkout -- expanded-keywords &&
     + 	test_cmp expected-output expanded-keywords &&
     + 
     + 	git checkout -- expanded-keywords-crlf &&
     +-	test_cmp expected-output-crlf expanded-keywords-crlf
     ++	test_cmp expected-output-crlf expanded-keywords-crlf &&
     ++
     ++	git checkout -- expanded-keywords_ci &&
     ++	test_cmp expected-output_ci expanded-keywords_ci &&
     ++
     ++	git checkout -- expanded-keywords-crlf_ci &&
     ++	test_cmp expected-output-crlf_ci expanded-keywords-crlf_ci
     + '
     + 
     + # The use of %f in a filter definition is expanded to the path to
     +
       ## t/t2082-parallel-checkout-attributes.sh ##
      @@ t/t2082-parallel-checkout-attributes.sh: test_expect_success 'parallel-checkout with ident' '
       	(


 Documentation/gitattributes.txt         |   8 ++
 builtin/checkout--worker.c              |  17 +++-
 convert.c                               | 116 ++++++++++++++----------
 convert.h                               |   7 +-
 parallel-checkout.c                     |  14 ++-
 parallel-checkout.h                     |   8 +-
 t/t0021-conversion.sh                   | 115 ++++++++++++++---------
 t/t2082-parallel-checkout-attributes.sh |   7 +-
 8 files changed, 192 insertions(+), 100 deletions(-)

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 83fd4e19a41..9e486f3e8d3 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -382,6 +382,14 @@ sign `$` upon checkout.  Any byte sequence that begins with
 `$Id:` and ends with `$` in the worktree file is replaced
 with `$Id$` upon check-in.
 
+The `ident` attribute can also provide an optional value,
+which if supplied is going to be used for expansion instead of
+the string `Id`.
+
+------------------------
+*.[ch]		ident=FreeBSD
+------------------------
+
 
 `filter`
 ^^^^^^^^
diff --git a/builtin/checkout--worker.c b/builtin/checkout--worker.c
index fb9fd13b73c..61ba39402ad 100644
--- a/builtin/checkout--worker.c
+++ b/builtin/checkout--worker.c
@@ -9,7 +9,7 @@ static void packet_to_pc_item(const char *buffer, int len,
 			      struct parallel_checkout_item *pc_item)
 {
 	const struct pc_item_fixed_portion *fixed_portion;
-	const char *variant;
+	const char *variant, *ident_action;
 	char *encoding;
 
 	if (len < sizeof(struct pc_item_fixed_portion))
@@ -19,7 +19,8 @@ static void packet_to_pc_item(const char *buffer, int len,
 	fixed_portion = (struct pc_item_fixed_portion *)buffer;
 
 	if (len - sizeof(struct pc_item_fixed_portion) !=
-		fixed_portion->name_len + fixed_portion->working_tree_encoding_len)
+		fixed_portion->name_len + fixed_portion->working_tree_encoding_len +
+		fixed_portion->ident_action_len)
 		BUG("checkout worker received corrupted item");
 
 	variant = buffer + sizeof(struct pc_item_fixed_portion);
@@ -43,11 +44,21 @@ static void packet_to_pc_item(const char *buffer, int len,
 	pc_item->ce->ce_namelen = fixed_portion->name_len;
 	pc_item->ce->ce_mode = fixed_portion->ce_mode;
 	memcpy(pc_item->ce->name, variant, pc_item->ce->ce_namelen);
+	variant += pc_item->ce->ce_namelen;
 	oidcpy(&pc_item->ce->oid, &fixed_portion->oid);
 
+	if (fixed_portion->ident_action_len) {
+		ident_action = xmemdupz(variant,
+					fixed_portion->ident_action_len);
+		variant += fixed_portion->ident_action_len;
+	} else {
+		ident_action = NULL;
+	}
+
 	pc_item->id = fixed_portion->id;
 	pc_item->ca.crlf_action = fixed_portion->crlf_action;
-	pc_item->ca.ident = fixed_portion->ident;
+	pc_item->ca.ident_action.id = ident_action;
+	pc_item->ca.ident_action.id_len = fixed_portion->ident_action_len;
 	pc_item->ca.working_tree_encoding = encoding;
 }
 
diff --git a/convert.c b/convert.c
index 0d6fb3410ae..111bfeeaf36 100644
--- a/convert.c
+++ b/convert.c
@@ -1056,7 +1056,10 @@ static int read_convert_config(const char *var, const char *value, void *cb)
 	return 0;
 }
 
-static int count_ident(const char *cp, unsigned long size)
+#define ID_STR_DEFAULT "Id"
+
+static int count_ident(const char *cp, unsigned long size,
+		       const struct ident_action *idact)
 {
 	/*
 	 * "$Id: 0000000000000000000000000000000000000000 $" <=> "$Id$"
@@ -1069,13 +1072,13 @@ static int count_ident(const char *cp, unsigned long size)
 		size--;
 		if (ch != '$')
 			continue;
-		if (size < 3)
+		if (size < idact->id_len + 1)
 			break;
-		if (memcmp("Id", cp, 2))
+		if (memcmp(idact->id, cp, idact->id_len))
 			continue;
-		ch = cp[2];
-		cp += 3;
-		size -= 3;
+		ch = cp[idact->id_len];
+		cp += idact->id_len + 1;
+		size -= idact->id_len + 1;
 		if (ch == '$')
 			cnt++; /* $Id$ */
 		if (ch != ':')
@@ -1099,11 +1102,11 @@ static int count_ident(const char *cp, unsigned long size)
 }
 
 static int ident_to_git(const char *src, size_t len,
-			struct strbuf *buf, int ident)
+			struct strbuf *buf, const struct ident_action *idact)
 {
 	char *dst, *dollar;
 
-	if (!ident || (src && !count_ident(src, len)))
+	if (!idact->id || (src && !count_ident(src, len, idact)))
 		return 0;
 
 	if (!buf)
@@ -1122,17 +1125,18 @@ static int ident_to_git(const char *src, size_t len,
 		len -= dollar + 1 - src;
 		src  = dollar + 1;
 
-		if (len > 3 && !memcmp(src, "Id:", 3)) {
-			dollar = memchr(src + 3, '$', len - 3);
+		if (len > idact->id_len + 1 && !memcmp(src, idact->id, idact->id_len) && src[idact->id_len] == ':') {
+			dollar = memchr(src + idact->id_len + 1, '$', len - (idact->id_len + 1));
 			if (!dollar)
 				break;
-			if (memchr(src + 3, '\n', dollar - src - 3)) {
+			if (memchr(src + idact->id_len + 1, '\n', dollar - src - (idact->id_len + 1))) {
 				/* Line break before the next dollar. */
 				continue;
 			}
 
-			memcpy(dst, "Id$", 3);
-			dst += 3;
+			memcpy(dst, idact->id, idact->id_len);
+			dst[idact->id_len] = '$';
+			dst += idact->id_len + 1;
 			len -= dollar + 1 - src;
 			src  = dollar + 1;
 		}
@@ -1143,16 +1147,16 @@ static int ident_to_git(const char *src, size_t len,
 }
 
 static int ident_to_worktree(const char *src, size_t len,
-			     struct strbuf *buf, int ident)
+			     struct strbuf *buf, const struct ident_action *idact)
 {
 	struct object_id oid;
 	char *to_free = NULL, *dollar, *spc;
 	int cnt;
 
-	if (!ident)
+	if (!idact->id)
 		return 0;
 
-	cnt = count_ident(src, len);
+	cnt = count_ident(src, len, idact);
 	if (!cnt)
 		return 0;
 
@@ -1161,7 +1165,7 @@ static int ident_to_worktree(const char *src, size_t len,
 		to_free = strbuf_detach(buf, NULL);
 	hash_object_file(the_hash_algo, src, len, "blob", &oid);
 
-	strbuf_grow(buf, len + cnt * (the_hash_algo->hexsz + 3));
+	strbuf_grow(buf, len + cnt * (the_hash_algo->hexsz + idact->id_len + 1));
 	for (;;) {
 		/* step 1: run to the next '$' */
 		dollar = memchr(src, '$', len);
@@ -1172,14 +1176,14 @@ static int ident_to_worktree(const char *src, size_t len,
 		src  = dollar + 1;
 
 		/* step 2: does it looks like a bit like Id:xxx$ or Id$ ? */
-		if (len < 3 || memcmp("Id", src, 2))
+		if (len < idact->id_len + 1 || memcmp(idact->id, src, idact->id_len))
 			continue;
 
 		/* step 3: skip over Id$ or Id:xxxxx$ */
-		if (src[2] == '$') {
-			src += 3;
-			len -= 3;
-		} else if (src[2] == ':') {
+		if (src[idact->id_len] == '$') {
+			src += idact->id_len + 1;
+			len -= idact->id_len + 1;
+		} else if (src[idact->id_len] == ':') {
 			/*
 			 * It's possible that an expanded Id has crept its way into the
 			 * repository, we cope with that by stripping the expansion out.
@@ -1187,18 +1191,18 @@ static int ident_to_worktree(const char *src, size_t len,
 			 * on checkout, which won't go away by stash, but let's keep it
 			 * for git-style ids.
 			 */
-			dollar = memchr(src + 3, '$', len - 3);
+			dollar = memchr(src + idact->id_len + 1, '$', len - (idact->id_len + 1));
 			if (!dollar) {
 				/* incomplete keyword, no more '$', so just quit the loop */
 				break;
 			}
 
-			if (memchr(src + 3, '\n', dollar - src - 3)) {
+			if (memchr(src + idact->id_len + 1, '\n', dollar - src - (idact->id_len + 1))) {
 				/* Line break before the next dollar. */
 				continue;
 			}
 
-			spc = memchr(src + 4, ' ', dollar - src - 4);
+			spc = memchr(src + idact->id_len + 2, ' ', dollar - src - (idact->id_len + 2));
 			if (spc && spc < dollar-1) {
 				/* There are spaces in unexpected places.
 				 * This is probably an id from some other
@@ -1215,7 +1219,8 @@ static int ident_to_worktree(const char *src, size_t len,
 		}
 
 		/* step 4: substitute */
-		strbuf_addstr(buf, "Id: ");
+		strbuf_addstr(buf, idact->id);
+		strbuf_addstr(buf, ": ");
 		strbuf_addstr(buf, oid_to_hex(&oid));
 		strbuf_addstr(buf, " $");
 	}
@@ -1286,11 +1291,19 @@ static struct convert_driver *git_path_check_convert(struct attr_check_item *che
 	return NULL;
 }
 
-static int git_path_check_ident(struct attr_check_item *check)
+static struct ident_action git_path_check_ident(struct attr_check_item *check)
 {
+	struct ident_action idact = {.id = NULL, .id_len = 0};
 	const char *value = check->value;
 
-	return !!ATTR_TRUE(value);
+	if (!ATTR_UNSET(value) && !ATTR_FALSE(value)) {
+		if (ATTR_TRUE(value))
+			idact.id = ID_STR_DEFAULT;
+		else
+			idact.id = value;
+		idact.id_len = strlen(idact.id);
+	}
+	return idact;
 }
 
 static struct attr_check *check;
@@ -1313,7 +1326,7 @@ void convert_attrs(struct index_state *istate,
 	ca->crlf_action = git_path_check_crlf(ccheck + 4);
 	if (ca->crlf_action == CRLF_UNDEFINED)
 		ca->crlf_action = git_path_check_crlf(ccheck + 0);
-	ca->ident = git_path_check_ident(ccheck + 1);
+	ca->ident_action = git_path_check_ident(ccheck + 1);
 	ca->drv = git_path_check_convert(ccheck + 2);
 	if (ca->crlf_action != CRLF_BINARY) {
 		enum eol eol_attr = git_path_check_eol(ccheck + 3);
@@ -1433,7 +1446,7 @@ int convert_to_git(struct index_state *istate,
 			len = dst->len;
 		}
 	}
-	return ret | ident_to_git(src, len, dst, ca.ident);
+	return ret | ident_to_git(src, len, dst, &ca.ident_action);
 }
 
 void convert_to_git_filter_fd(struct index_state *istate,
@@ -1450,7 +1463,7 @@ void convert_to_git_filter_fd(struct index_state *istate,
 
 	encode_to_git(path, dst->buf, dst->len, dst, ca.working_tree_encoding, conv_flags);
 	crlf_to_git(istate, path, dst->buf, dst->len, dst, ca.crlf_action, conv_flags);
-	ident_to_git(dst->buf, dst->len, dst, ca.ident);
+	ident_to_git(dst->buf, dst->len, dst, &ca.ident_action);
 }
 
 static int convert_to_working_tree_ca_internal(const struct conv_attrs *ca,
@@ -1462,7 +1475,7 @@ static int convert_to_working_tree_ca_internal(const struct conv_attrs *ca,
 {
 	int ret = 0, ret_filter = 0;
 
-	ret |= ident_to_worktree(src, len, dst, ca->ident);
+	ret |= ident_to_worktree(src, len, dst, &(ca->ident_action));
 	if (ret) {
 		src = dst->buf;
 		len = dst->len;
@@ -1810,14 +1823,17 @@ struct ident_filter {
 	struct stream_filter filter;
 	struct strbuf left;
 	int state;
+	const struct ident_action *idact;
 	char ident[GIT_MAX_HEXSZ + 5]; /* ": x40 $" */
 };
 
-static int is_foreign_ident(const char *str)
+static int is_foreign_ident(const struct ident_action *idact, const char *str)
 {
 	int i;
 
-	if (!skip_prefix(str, "$Id: ", &str))
+	if (str[0] != '$' || strlen(str) < idact->id_len + 3 ||
+	    memcmp(str + 1, idact->id, idact->id_len) != 0 ||
+	    !skip_prefix(str + 1 + idact->id_len, ": ", &str))
 		return 0;
 	for (i = 0; str[i]; i++) {
 		if (isspace(str[i]) && str[i+1] != '$')
@@ -1847,13 +1863,16 @@ static int ident_filter_fn(struct stream_filter *filter,
 			   char *output, size_t *osize_p)
 {
 	struct ident_filter *ident = (struct ident_filter *)filter;
-	static const char head[] = "$Id";
+	const struct ident_action *idact = ident->idact;
 
 	if (!input) {
 		/* drain upon eof */
 		switch (ident->state) {
 		default:
-			strbuf_add(&ident->left, head, ident->state);
+			if (ident->state > 0)
+				strbuf_addch(&ident->left, '$');
+			if (ident->state > 1)
+				strbuf_add(&ident->left, idact->id, ident->state - 1);
 			/* fallthrough */
 		case IDENT_SKIPPING:
 			/* fallthrough */
@@ -1884,23 +1903,27 @@ static int ident_filter_fn(struct stream_filter *filter,
 			strbuf_addch(&ident->left, ch);
 			if (ch != '\n' && ch != '$')
 				continue;
-			if (ch == '$' && !is_foreign_ident(ident->left.buf)) {
-				strbuf_setlen(&ident->left, sizeof(head) - 1);
+			if (ch == '$' && !is_foreign_ident(idact, ident->left.buf)) {
+				strbuf_setlen(&ident->left, idact->id_len + 1);
 				strbuf_addstr(&ident->left, ident->ident);
 			}
 			ident->state = IDENT_DRAINING;
 			continue;
 		}
 
-		if (ident->state < sizeof(head) &&
-		    head[ident->state] == ch) {
+		if ((ident->state == 0 && ch == '$') ||
+		    (ident->state > 0 && ident->state < idact->id_len + 1 &&
+		     idact->id[ident->state - 1] == ch)) {
 			ident->state++;
 			continue;
 		}
 
-		if (ident->state)
-			strbuf_add(&ident->left, head, ident->state);
-		if (ident->state == sizeof(head) - 1) {
+		if (ident->state) {
+			strbuf_addch(&ident->left, '$');
+			if (ident->state > 1)
+				strbuf_add(&ident->left, idact->id, ident->state - 1);
+		}
+		if (ident->state == idact->id_len + 1) {
 			if (ch != ':' && ch != '$') {
 				strbuf_addch(&ident->left, ch);
 				ident->state = 0;
@@ -1935,7 +1958,7 @@ static struct stream_filter_vtbl ident_vtbl = {
 	ident_free_fn,
 };
 
-static struct stream_filter *ident_filter(const struct object_id *oid)
+static struct stream_filter *ident_filter(const struct object_id *oid, const struct ident_action *idact)
 {
 	struct ident_filter *ident = xmalloc(sizeof(*ident));
 
@@ -1944,6 +1967,7 @@ static struct stream_filter *ident_filter(const struct object_id *oid)
 	strbuf_init(&ident->left, 0);
 	ident->filter.vtbl = &ident_vtbl;
 	ident->state = 0;
+	ident->idact = idact;
 	return (struct stream_filter *)ident;
 }
 
@@ -1963,8 +1987,8 @@ struct stream_filter *get_stream_filter_ca(const struct conv_attrs *ca,
 	if (classify_conv_attrs(ca) != CA_CLASS_STREAMABLE)
 		return NULL;
 
-	if (ca->ident)
-		filter = ident_filter(oid);
+	if (ca->ident_action.id)
+		filter = ident_filter(oid, &(ca->ident_action));
 
 	if (output_eol(ca->crlf_action) == EOL_CRLF)
 		filter = cascade_filter(filter, lf_to_crlf_filter());
diff --git a/convert.h b/convert.h
index 5ee1c322058..2422e289784 100644
--- a/convert.h
+++ b/convert.h
@@ -76,11 +76,16 @@ enum convert_crlf_action {
 
 struct convert_driver;
 
+struct ident_action {
+	const char *id;
+	size_t id_len;
+};
+
 struct conv_attrs {
 	struct convert_driver *drv;
 	enum convert_crlf_action attr_action; /* What attr says */
 	enum convert_crlf_action crlf_action; /* When no attr is set, use core.autocrlf */
-	int ident;
+	struct ident_action ident_action; /* What ident says */
 	const char *working_tree_encoding; /* Supported encoding or default encoding if NULL */
 };
 
diff --git a/parallel-checkout.c b/parallel-checkout.c
index ddc0ff3c064..b5908c299af 100644
--- a/parallel-checkout.c
+++ b/parallel-checkout.c
@@ -91,7 +91,8 @@ static int is_eligible_for_parallel_checkout(const struct cache_entry *ce,
 		return 0;
 
 	packed_item_size = sizeof(struct pc_item_fixed_portion) + ce->ce_namelen +
-		(ca->working_tree_encoding ? strlen(ca->working_tree_encoding) : 0);
+		(ca->working_tree_encoding ? strlen(ca->working_tree_encoding) : 0) +
+		ca->ident_action.id_len;
 
 	/*
 	 * The amount of data we send to the workers per checkout item is
@@ -403,13 +404,15 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
 	size_t name_len = pc_item->ce->ce_namelen;
 	size_t working_tree_encoding_len = working_tree_encoding ?
 					   strlen(working_tree_encoding) : 0;
+	const char *ident_action_id = pc_item->ca.ident_action.id;
+	size_t ident_action_len = pc_item->ca.ident_action.id_len;
 
 	/*
 	 * Any changes in the calculation of the message size must also be made
 	 * in is_eligible_for_parallel_checkout().
 	 */
 	len_data = sizeof(struct pc_item_fixed_portion) + name_len +
-		   working_tree_encoding_len;
+		   working_tree_encoding_len + ident_action_len;
 
 	data = xmalloc(len_data);
 
@@ -417,7 +420,7 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
 	fixed_portion->id = pc_item->id;
 	fixed_portion->ce_mode = pc_item->ce->ce_mode;
 	fixed_portion->crlf_action = pc_item->ca.crlf_action;
-	fixed_portion->ident = pc_item->ca.ident;
+	fixed_portion->ident_action_len = ident_action_len;
 	fixed_portion->name_len = name_len;
 	fixed_portion->working_tree_encoding_len = working_tree_encoding_len;
 	/*
@@ -434,6 +437,11 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
 		variant += working_tree_encoding_len;
 	}
 	memcpy(variant, pc_item->ce->name, name_len);
+	variant += name_len;
+	if (ident_action_len) {
+		memcpy(variant, ident_action_id, ident_action_len);
+		variant += ident_action_len;
+	}
 
 	packet_write(fd, data, len_data);
 
diff --git a/parallel-checkout.h b/parallel-checkout.h
index 80f539bcb77..c3c282f516b 100644
--- a/parallel-checkout.h
+++ b/parallel-checkout.h
@@ -76,9 +76,9 @@ struct parallel_checkout_item {
 
 /*
  * The fixed-size portion of `struct parallel_checkout_item` that is sent to the
- * workers. Following this will be 2 strings: ca.working_tree_encoding and
- * ce.name; These are NOT null terminated, since we have the size in the fixed
- * portion.
+ * workers. Following this will be 3 strings: ca.working_tree_encoding, ca.name
+ * and ca.ident_action.id; These are NOT null terminated, since we have the size
+ * in the fixed portion.
  *
  * Note that not all fields of conv_attrs and cache_entry are passed, only the
  * ones that will be required by the workers to smudge and write the entry.
@@ -88,7 +88,7 @@ struct pc_item_fixed_portion {
 	struct object_id oid;
 	unsigned int ce_mode;
 	enum convert_crlf_action crlf_action;
-	int ident;
+	size_t ident_action_len;
 	size_t working_tree_encoding_len;
 	size_t name_len;
 };
diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index b5749f327dd..07df50a7e5d 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -77,25 +77,29 @@ test_expect_success setup '
 	{
 	    echo "*.t filter=rot13"
 	    echo "*.i ident"
+	    echo "*.ci ident=customId"
 	} >.gitattributes &&
 
 	{
 	    echo a b c d e f g h i j k l m
 	    echo n o p q r s t u v w x y z
 	    echo '\''$Id$'\''
+	    echo '\''$customId$'\''
 	} >test &&
 	cat test >test.t &&
 	cat test >test.o &&
 	cat test >test.i &&
-	git add test test.t test.i &&
-	rm -f test test.t test.i &&
-	git checkout -- test test.t test.i &&
+	cat test >test.ci &&
+	git add test test.t test.i test.ci &&
+	rm -f test test.t test.i test.ci &&
+	git checkout -- test test.t test.i test.ci &&
 
 	echo "content-test2" >test2.o &&
 	echo "content-test3 - filename with special characters" >"test3 '\''sq'\'',\$x=.o"
 '
 
-script='s/^\$Id: \([0-9a-f]*\) \$/\1/p'
+script_i='s/^\$Id: \([0-9a-f]*\) \$/\1/p'
+script_ci='s/^\$customId: \([0-9a-f]*\) \$/\1/p'
 
 test_expect_success check '
 
@@ -105,8 +109,14 @@ test_expect_success check '
 	# ident should be stripped in the repository
 	git diff --raw --exit-code :test :test.i &&
 	id=$(git rev-parse --verify :test) &&
-	embedded=$(sed -ne "$script" test.i) &&
+	embedded=$(sed -ne "$script_i" test.i) &&
+	nembedded=$(sed -ne "$script_ci" test.i) &&
 	test "z$id" = "z$embedded" &&
+	test "z" = "z$nembedded" &&
+	embedded=$(sed -ne "$script_ci" test.ci) &&
+	nembedded=$(sed -ne "$script_i" test.ci) &&
+	test "z$id" = "z$embedded" &&
+	test "z" = "z$nembedded" &&
 
 	git cat-file blob :test.t >test.r &&
 
@@ -114,61 +124,84 @@ test_expect_success check '
 	test_cmp test.r test.t
 '
 
+gen_expanded_keywords() {
+	local id="${1}"
+	echo "File with expanded keywords"
+	echo "\$$id\$"
+	echo "\$$id:\$"
+	echo "\$$id: 0000000000000000000000000000000000000000 \$"
+	echo "\$$id: NoSpaceAtEnd\$"
+	echo "\$$id:NoSpaceAtFront \$"
+	echo "\$$id:NoSpaceAtEitherEnd\$"
+	echo "\$$id: NoTerminatingSymbol"
+	echo "\$$id: Foreign Commit With Spaces \$"
+	printf "\$$id: NoTerminatingSymbolAtEOF"
+}
+
+gen_expected_output_0() {
+	local id="${1}"
+	local hid="${2}"
+	echo "File with expanded keywords"
+	echo "\$$id: $hid \$"
+	echo "\$$id: $hid \$"
+	echo "\$$id: $hid \$"
+	echo "\$$id: $hid \$"
+	echo "\$$id: $hid \$"
+	echo "\$$id: $hid \$"
+	echo "\$$id: NoTerminatingSymbol"
+	echo "\$$id: Foreign Commit With Spaces \$"
+}
+
+gen_expected_output() {
+	local id="${1}"
+	gen_expected_output_0 "${@}"
+	printf "\$$id: NoTerminatingSymbolAtEOF"
+}
+
+gen_expected_output_crlf() {
+	local id="${1}"
+	gen_expected_output_0 "${@}" | append_cr
+	printf "\$$id: NoTerminatingSymbolAtEOF"
+}
+
 # If an expanded ident ever gets into the repository, we want to make sure that
 # it is collapsed before being expanded again on checkout
 test_expect_success expanded_in_repo '
-	{
-		echo "File with expanded keywords"
-		echo "\$Id\$"
-		echo "\$Id:\$"
-		echo "\$Id: 0000000000000000000000000000000000000000 \$"
-		echo "\$Id: NoSpaceAtEnd\$"
-		echo "\$Id:NoSpaceAtFront \$"
-		echo "\$Id:NoSpaceAtEitherEnd\$"
-		echo "\$Id: NoTerminatingSymbol"
-		echo "\$Id: Foreign Commit With Spaces \$"
-	} >expanded-keywords.0 &&
-
-	{
-		cat expanded-keywords.0 &&
-		printf "\$Id: NoTerminatingSymbolAtEOF"
-	} >expanded-keywords &&
+	gen_expanded_keywords Id >expanded-keywords &&
+	gen_expanded_keywords customId >expanded-keywords_ci &&
 	cat expanded-keywords >expanded-keywords-crlf &&
+	cat expanded-keywords_ci >expanded-keywords-crlf_ci &&
 	git add expanded-keywords expanded-keywords-crlf &&
+	git add expanded-keywords_ci expanded-keywords-crlf_ci &&
 	git commit -m "File with keywords expanded" &&
 	id=$(git rev-parse --verify :expanded-keywords) &&
+	id_ci=$(git rev-parse --verify :expanded-keywords_ci) &&
 
-	{
-		echo "File with expanded keywords"
-		echo "\$Id: $id \$"
-		echo "\$Id: $id \$"
-		echo "\$Id: $id \$"
-		echo "\$Id: $id \$"
-		echo "\$Id: $id \$"
-		echo "\$Id: $id \$"
-		echo "\$Id: NoTerminatingSymbol"
-		echo "\$Id: Foreign Commit With Spaces \$"
-	} >expected-output.0 &&
-	{
-		cat expected-output.0 &&
-		printf "\$Id: NoTerminatingSymbolAtEOF"
-	} >expected-output &&
-	{
-		append_cr <expected-output.0 &&
-		printf "\$Id: NoTerminatingSymbolAtEOF"
-	} >expected-output-crlf &&
+	gen_expected_output Id $id >expected-output &&
+	gen_expected_output customId $id_ci >expected-output_ci &&
+	gen_expected_output_crlf Id $id >expected-output-crlf &&
+	gen_expected_output_crlf customId $id_ci >expected-output-crlf_ci &&
 	{
 		echo "expanded-keywords ident"
+		echo "expanded-keywords_ci ident=customId"
 		echo "expanded-keywords-crlf ident text eol=crlf"
+		echo "expanded-keywords-crlf_ci ident=customId text eol=crlf"
 	} >>.gitattributes &&
 
 	rm -f expanded-keywords expanded-keywords-crlf &&
+	rm -f expanded-keywords_ci expanded-keywords-crlf_ci &&
 
 	git checkout -- expanded-keywords &&
 	test_cmp expected-output expanded-keywords &&
 
 	git checkout -- expanded-keywords-crlf &&
-	test_cmp expected-output-crlf expanded-keywords-crlf
+	test_cmp expected-output-crlf expanded-keywords-crlf &&
+
+	git checkout -- expanded-keywords_ci &&
+	test_cmp expected-output_ci expanded-keywords_ci &&
+
+	git checkout -- expanded-keywords-crlf_ci &&
+	test_cmp expected-output-crlf_ci expanded-keywords-crlf_ci
 '
 
 # The use of %f in a filter definition is expanded to the path to
diff --git a/t/t2082-parallel-checkout-attributes.sh b/t/t2082-parallel-checkout-attributes.sh
index 25254579618..822957a8dc8 100755
--- a/t/t2082-parallel-checkout-attributes.sh
+++ b/t/t2082-parallel-checkout-attributes.sh
@@ -20,16 +20,19 @@ test_expect_success 'parallel-checkout with ident' '
 	(
 		cd ident &&
 		echo "A ident" >.gitattributes &&
+		echo "C ident=MyCusomVeryLongAndWordyId" >>.gitattributes &&
 		echo "\$Id\$" >A &&
 		echo "\$Id\$" >B &&
+		echo "\$MyCusomVeryLongAndWordyId\$" >C &&
 		git add -A &&
 		git commit -m id &&
 
-		rm A B &&
+		rm A B C &&
 		test_checkout_workers 2 git reset --hard &&
 		hexsz=$(test_oid hexsz) &&
 		grep -E "\\\$Id: [0-9a-f]{$hexsz} \\\$" A &&
-		grep "\\\$Id\\\$" B
+		grep "\\\$Id\\\$" B &&
+		grep -E "\\\$MyCusomVeryLongAndWordyId: [0-9a-f]{$hexsz} \\\$" C
 	)
 '
 

base-commit: 225bc32a989d7a22fa6addafd4ce7dcd04675dbf
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] Make ident dynamic, not just a hardcoded value of "$Id".
       [not found]     ` <CABFYoQC_FzbU_E4hU0kCz-WFJNOLspwL2Gjc01sMXDZosxJWjw@mail.gmail.com>
@ 2021-09-01  5:35       ` Junio C Hamano
  0 siblings, 0 replies; 11+ messages in thread
From: Junio C Hamano @ 2021-09-01  5:35 UTC (permalink / raw)
  To: Maksym Sobolyev
  Cc: Maksym Sobolyev via GitGitGadget, git, Philip Oakley, Maksym Sobolyev

Maksym Sobolyev <sobomax@gmail.com> writes:

> Junio, I've got rid of the  GIT_MAX_IDENT_LEN in rev3 posted here. You were
> right it was not really necessary.

Sounds good.

> Let me see if there are any more issues with the functional part of the
> change, if not I'll see about changing the structure name from ident_action
> to something more relevant.

I do not think the "structure" should be relevant, not just its name
but its presence.  As I said already, the data (i.e. the name of the
custom token that replaces $Id$) should not come from the attribute
subsystem---it belongs to the config subsystem, I would think.

Thanks.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] Make ident dynamic, not just a hardcoded value of "$Id".
  2021-08-26 20:37   ` Matheus Tavares
@ 2021-09-02  0:58     ` Junio C Hamano
  2021-09-02 19:04       ` Junio C Hamano
  0 siblings, 1 reply; 11+ messages in thread
From: Junio C Hamano @ 2021-09-02  0:58 UTC (permalink / raw)
  To: Matheus Tavares; +Cc: gitgitgadget, git, philipoakley, sobomax, sobomax

Matheus Tavares <matheus.bernardino@usp.br> writes:

> Hi, Maksym
>
> I haven't read the entire patch (and I don't normally use the ident feature),
> but I left a few comments below: 
>
> On Thu, Aug 26, 2021 at 1:28 AM Maksym Sobolyev via GitGitGadget <gitgitgadget@gmail.com> wrote:
>>
>> diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
>> index 83fd4e19a4..9e486f3e8d 100644
>> --- a/Documentation/gitattributes.txt
>> +++ b/Documentation/gitattributes.txt
>> @@ -382,6 +382,14 @@ sign `$` upon checkout.  Any byte sequence that begins with
>>  `$Id:` and ends with `$` in the worktree file is replaced
>>  with `$Id$` upon check-in.
>>
>> +The `ident` attribute can also provide an optional value,
>> +which if supplied is going to be used for expansion instead of
>> +the string `Id`.
>> +
>> +------------------------
>> +*.[ch]         ident=FreeBSD
>> +------------------------
>
> What happens if there is a ':' or '$' in the custom id name?

It is a valid question.  I actually think these two bytes should be
excluded from the custom ID string --- FWIW, I also do not think
there is any practical problem if we limited the set of characters
to [A-Za-z0-9] and nothing else.

> You might want to take a look at the other ident tests in
> t0021-conversion.sh and perhaps addapt/copy some of them to ensure that
> the expected behavior persists when using a custom id name. For example,
> quickly changing the "$Id:" references to "$customId:" and replacing
> "ident" with "ident=customId", I seem to get at least one test failure:
>
> --- expected-output     2021-08-26 19:40:07.181596662 +0000
> +++ expanded-keywords   2021-08-26 19:40:07.188263463 +0000
> @@ -6,5 +6,5 @@
>  $customId: bebd07c752ffffd6779e1056db5de66c3bb733ed $
>  $customId: bebd07c752ffffd6779e1056db5de66c3bb733ed $
>  $customId: NoTerminatingSymbol
> -$customId: Foreign Commit With Spaces $
> +$customId: bebd07c752ffffd6779e1056db5de66c3bb733ed $
>  $customId: NoTerminatingSymbolAtEOF
> \ No newline at end of file
> error: last command exited with $?=1
> not ok 3 - expanded_in_repo
>
> (But I haven't dig further, and this was just a quick test, so it could be my
> fault.)

If we are not restricting the characters that can be used in the
custom ID placeholder, we probably should have tests that use
allowed but unusual characters.

Thanks.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] Make ident dynamic, not just a hardcoded value of "$Id".
  2021-09-01  2:13   ` [PATCH v3] " Maksym Sobolyev via GitGitGadget
@ 2021-09-02  3:40     ` Đoàn Trần Công Danh
  0 siblings, 0 replies; 11+ messages in thread
From: Đoàn Trần Công Danh @ 2021-09-02  3:40 UTC (permalink / raw)
  To: Maksym Sobolyev via GitGitGadget
  Cc: git, Matheus Tavares, Maksym Sobolyev, Maksym Sobolyev

On 2021-09-01 02:13:05+0000, Maksym Sobolyev via GitGitGadget <gitgitgadget@gmail.com> wrote:
> diff --git a/convert.h b/convert.h
> index 5ee1c322058..2422e289784 100644
> --- a/convert.h
> +++ b/convert.h
> @@ -76,11 +76,16 @@ enum convert_crlf_action {
>  
>  struct convert_driver;
>  
> +struct ident_action {
> +	const char *id;
> +	size_t id_len;
> +};

Just out of curiousity, can we use strbuf instead of inventing new
structure for this one? This struct is purely a string representation?


-- 
Danh

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] Make ident dynamic, not just a hardcoded value of "$Id".
  2021-09-02  0:58     ` Junio C Hamano
@ 2021-09-02 19:04       ` Junio C Hamano
  0 siblings, 0 replies; 11+ messages in thread
From: Junio C Hamano @ 2021-09-02 19:04 UTC (permalink / raw)
  To: sobomax, sobomax; +Cc: gitgitgadget, git, philipoakley, Matheus Tavares

Junio C Hamano <gitster@pobox.com> writes:

> If we are not restricting the characters that can be used in the
> custom ID placeholder, we probably should have tests that use
> allowed but unusual characters.

There is a test in t/t0021-conversion.sh that ensures that we
recognise expanded and unexpanded keyword strings using various
lines that has placeholder-lookalike on it, which starts like this:

    # If an expanded ident ever gets into the repository, we want to make sure that
    # it is collapsed before being expanded again on checkout
    test_expect_success expanded_in_repo '
            {
                    echo "File with expanded keywords"
                    echo "\$Id\$"
                    echo "\$Id:\$"
                    echo "\$Id: 0000000000000000000000000000000000000000 \$"
                    echo "\$Id: NoSpaceAtEnd\$"
                    echo "\$Id:NoSpaceAtFront \$"
                    echo "\$Id:NoSpaceAtEitherEnd\$"
                    echo "\$Id: NoTerminatingSymbol"
                    echo "\$Id: Foreign Commit With Spaces \$"
            } >expanded-keywords.0 &&
	    ...

It would be good to have a copy of this test with the placeholder
customized to something unusual like " Id:", "Id: $ $", ":", "$ $"
etc. to make sure we can parse out the keyword correctly without
imposing the letters that can be used in the placeholder.

    Side note.  I of course do not literally mean to "copy" and make
    the number of tests explode.  I think it can be done with a loop
    to test various custom strings, which may begin like ...

	for kwd in "Id" " Id:" "Id $" "Id: $ $" ":" "$ $"
	do
		cat >expanded-keywords.0 <<-EOF &&
		\$$kwd\$
		\$$kwd:\$
		\$$kwd: 0000000000000000000000000000000000000000 \$
		\$$kwd: NoSpaceAtEnd\$
		\$$kwd:NoSpaceAtFront \$
		\$$kwd:NoSpaceAtEitherEnd\$
		\$$kwd: NoTerminatingSymbol
		\$$kwd: Foreign Commit With Spaces \$
		EOF
		...

If we can do without restriction, that would be even better, but I
do not think it is the end of the world if we found some corner case
that a certain customized placeholder can make the syntax ambiguous
and we cannot recognise expanded keywords in the working tree files,
as it is reasonable to limit the letters we allow in placeholders.

One thing we should not do is to tell users that they can use
anything as their customized Id string, and then later find such a
corner case and tell them now we forbid certain letters.  To avoid
such a mistake, it is better to start reasonably tight, allowing
what we know will never cause problems (like alphanumerics) and
forbidding what we know will not be missed in real-world usecases
(like whitespaces and punctuations).  After we discover that certain
things that we initially forbid have real uses in the future, we can
loosen the restriction.  "Start loose and then tighten" will lead to
a regression when viewed from the end-user's workflow.  "Start tight
and then loosen" will not have that problem.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-09-02 19:04 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-23 16:41 [PATCH] Make ident dynamic, not just a hardcoded value of "$Id" Maksym Sobolyev via GitGitGadget
2021-08-23 18:10 ` Junio C Hamano
2021-08-23 18:41 ` Philip Oakley
2021-08-26  4:28 ` [PATCH v2] " Maksym Sobolyev via GitGitGadget
2021-08-26 20:37   ` Matheus Tavares
2021-09-02  0:58     ` Junio C Hamano
2021-09-02 19:04       ` Junio C Hamano
2021-08-27  2:59   ` Junio C Hamano
     [not found]     ` <CABFYoQC_FzbU_E4hU0kCz-WFJNOLspwL2Gjc01sMXDZosxJWjw@mail.gmail.com>
2021-09-01  5:35       ` Junio C Hamano
2021-09-01  2:13   ` [PATCH v3] " Maksym Sobolyev via GitGitGadget
2021-09-02  3:40     ` Đoàn Trần Công Danh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.