All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Maksym Sobolyev via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Matheus Tavares <matheus.bernardino@usp.br>,
	Maksym Sobolyev <sobomax@gmail.com>,
	Maksym Sobolyev <sobomax@sippysoft.com>
Subject: [PATCH v3] Make ident dynamic, not just a hardcoded value of "$Id".
Date: Wed, 01 Sep 2021 02:13:05 +0000	[thread overview]
Message-ID: <pull.1074.v3.git.git.1630462385587.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1074.v2.git.git.1629952119446.gitgitgadget@gmail.com>

From: Maksym Sobolyev <sobomax@sippysoft.com>

This allows ident to be something like $FreeBSD$ so it provides matching
functionality for repos migrated from CVS / SVN.

This works by allowing ident to have a parameter, i.e.:

* ident=MyCustomId

In .gitattributes.

Extend the ident expansion tests to also verify custom ident.

Signed-off-by: Maksym Sobolyev <sobomax@sippysoft.com>
---
    Make ident dynamic, not just a hardcoded value of "$Id".
    
    This allows ident to be something like $FreeBSD$ so it provides matching
    functionality for repos migrated from CVS / SVN.
    
    This works by allowing ident to have a parameter, i.e.:
    
    * ident=MyCustomId
    
    In .gitattributes.
    
    cc: Philip Oakley philipoakley@iee.email

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1074%2Fsobomax%2Fpr-custom_ident-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1074/sobomax/pr-custom_ident-v3
Pull-Request: https://github.com/git/git/pull/1074

Range-diff vs v2:

 1:  16713788a26 ! 1:  74a705e5bbe Make ident dynamic, not just a hardcoded value of "$Id".
     @@ Commit message
      
          In .gitattributes.
      
     +    Extend the ident expansion tests to also verify custom ident.
     +
          Signed-off-by: Maksym Sobolyev <sobomax@sippysoft.com>
      
       ## Documentation/gitattributes.txt ##
     @@ convert.c: static int read_convert_config(const char *var, const char *value, vo
       }
       
      -static int count_ident(const char *cp, unsigned long size)
     -+#define ID_STR "Id"
     -+
     -+#define GIT_MAX_IDENT_LEN 255
     ++#define ID_STR_DEFAULT "Id"
      +
      +static int count_ident(const char *cp, unsigned long size,
      +		       const struct ident_action *idact)
     @@ convert.c: static int ident_to_git(const char *src, size_t len,
       
      -		if (len > 3 && !memcmp(src, "Id:", 3)) {
      -			dollar = memchr(src + 3, '$', len - 3);
     -+		if (len > idact->id_len + 1 && !memcmp(src, idact->id, idact->id_len) && src[idact->id_len + 1] == ':') {
     ++		if (len > idact->id_len + 1 && !memcmp(src, idact->id, idact->id_len) && src[idact->id_len] == ':') {
      +			dollar = memchr(src + idact->id_len + 1, '$', len - (idact->id_len + 1));
       			if (!dollar)
       				break;
     @@ convert.c: static struct convert_driver *git_path_check_convert(struct attr_chec
      -	return !!ATTR_TRUE(value);
      +	if (!ATTR_UNSET(value) && !ATTR_FALSE(value)) {
      +		if (ATTR_TRUE(value))
     -+			idact.id = ID_STR;
     ++			idact.id = ID_STR_DEFAULT;
      +		else
      +			idact.id = value;
      +		idact.id_len = strlen(idact.id);
     -+		if (idact.id_len > GIT_MAX_IDENT_LEN)
     -+			die(_("ident value length exceeds GIT_MAX_IDENT_LEN"));
      +	}
      +	return idact;
       }
     @@ convert.c: struct ident_filter {
       	struct stream_filter filter;
       	struct strbuf left;
       	int state;
     --	char ident[GIT_MAX_HEXSZ + 5]; /* ": x40 $" */
      +	const struct ident_action *idact;
     -+	char ident[GIT_MAX_HEXSZ + GIT_MAX_IDENT_LEN + 3]; /* ": x40 $" */
     + 	char ident[GIT_MAX_HEXSZ + 5]; /* ": x40 $" */
       };
       
     - static int is_foreign_ident(const char *str)
     +-static int is_foreign_ident(const char *str)
     ++static int is_foreign_ident(const struct ident_action *idact, const char *str)
     + {
     + 	int i;
     + 
     +-	if (!skip_prefix(str, "$Id: ", &str))
     ++	if (str[0] != '$' || strlen(str) < idact->id_len + 3 ||
     ++	    memcmp(str + 1, idact->id, idact->id_len) != 0 ||
     ++	    !skip_prefix(str + 1 + idact->id_len, ": ", &str))
     + 		return 0;
     + 	for (i = 0; str[i]; i++) {
     + 		if (isspace(str[i]) && str[i+1] != '$')
      @@ convert.c: static int ident_filter_fn(struct stream_filter *filter,
       			   char *output, size_t *osize_p)
       {
     @@ convert.c: static int ident_filter_fn(struct stream_filter *filter,
       		case IDENT_SKIPPING:
       			/* fallthrough */
      @@ convert.c: static int ident_filter_fn(struct stream_filter *filter,
     + 			strbuf_addch(&ident->left, ch);
       			if (ch != '\n' && ch != '$')
       				continue;
     - 			if (ch == '$' && !is_foreign_ident(ident->left.buf)) {
     +-			if (ch == '$' && !is_foreign_ident(ident->left.buf)) {
      -				strbuf_setlen(&ident->left, sizeof(head) - 1);
     ++			if (ch == '$' && !is_foreign_ident(idact, ident->left.buf)) {
      +				strbuf_setlen(&ident->left, idact->id_len + 1);
       				strbuf_addstr(&ident->left, ident->ident);
       			}
     @@ convert.h: enum convert_crlf_action {
       
      +struct ident_action {
      +	const char *id;
     -+	int id_len;
     ++	size_t id_len;
      +};
      +
       struct conv_attrs {
     @@ convert.h: enum convert_crlf_action {
       
      
       ## parallel-checkout.c ##
     +@@ parallel-checkout.c: static int is_eligible_for_parallel_checkout(const struct cache_entry *ce,
     + 		return 0;
     + 
     + 	packed_item_size = sizeof(struct pc_item_fixed_portion) + ce->ce_namelen +
     +-		(ca->working_tree_encoding ? strlen(ca->working_tree_encoding) : 0);
     ++		(ca->working_tree_encoding ? strlen(ca->working_tree_encoding) : 0) +
     ++		ca->ident_action.id_len;
     + 
     + 	/*
     + 	 * The amount of data we send to the workers per checkout item is
      @@ parallel-checkout.c: static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
       	size_t name_len = pc_item->ce->ce_namelen;
       	size_t working_tree_encoding_len = working_tree_encoding ?
     @@ parallel-checkout.h: struct pc_item_fixed_portion {
       	size_t name_len;
       };
      
     + ## t/t0021-conversion.sh ##
     +@@ t/t0021-conversion.sh: test_expect_success setup '
     + 	{
     + 	    echo "*.t filter=rot13"
     + 	    echo "*.i ident"
     ++	    echo "*.ci ident=customId"
     + 	} >.gitattributes &&
     + 
     + 	{
     + 	    echo a b c d e f g h i j k l m
     + 	    echo n o p q r s t u v w x y z
     + 	    echo '\''$Id$'\''
     ++	    echo '\''$customId$'\''
     + 	} >test &&
     + 	cat test >test.t &&
     + 	cat test >test.o &&
     + 	cat test >test.i &&
     +-	git add test test.t test.i &&
     +-	rm -f test test.t test.i &&
     +-	git checkout -- test test.t test.i &&
     ++	cat test >test.ci &&
     ++	git add test test.t test.i test.ci &&
     ++	rm -f test test.t test.i test.ci &&
     ++	git checkout -- test test.t test.i test.ci &&
     + 
     + 	echo "content-test2" >test2.o &&
     + 	echo "content-test3 - filename with special characters" >"test3 '\''sq'\'',\$x=.o"
     + '
     + 
     +-script='s/^\$Id: \([0-9a-f]*\) \$/\1/p'
     ++script_i='s/^\$Id: \([0-9a-f]*\) \$/\1/p'
     ++script_ci='s/^\$customId: \([0-9a-f]*\) \$/\1/p'
     + 
     + test_expect_success check '
     + 
     +@@ t/t0021-conversion.sh: test_expect_success check '
     + 	# ident should be stripped in the repository
     + 	git diff --raw --exit-code :test :test.i &&
     + 	id=$(git rev-parse --verify :test) &&
     +-	embedded=$(sed -ne "$script" test.i) &&
     ++	embedded=$(sed -ne "$script_i" test.i) &&
     ++	nembedded=$(sed -ne "$script_ci" test.i) &&
     + 	test "z$id" = "z$embedded" &&
     ++	test "z" = "z$nembedded" &&
     ++	embedded=$(sed -ne "$script_ci" test.ci) &&
     ++	nembedded=$(sed -ne "$script_i" test.ci) &&
     ++	test "z$id" = "z$embedded" &&
     ++	test "z" = "z$nembedded" &&
     + 
     + 	git cat-file blob :test.t >test.r &&
     + 
     +@@ t/t0021-conversion.sh: test_expect_success check '
     + 	test_cmp test.r test.t
     + '
     + 
     ++gen_expanded_keywords() {
     ++	local id="${1}"
     ++	echo "File with expanded keywords"
     ++	echo "\$$id\$"
     ++	echo "\$$id:\$"
     ++	echo "\$$id: 0000000000000000000000000000000000000000 \$"
     ++	echo "\$$id: NoSpaceAtEnd\$"
     ++	echo "\$$id:NoSpaceAtFront \$"
     ++	echo "\$$id:NoSpaceAtEitherEnd\$"
     ++	echo "\$$id: NoTerminatingSymbol"
     ++	echo "\$$id: Foreign Commit With Spaces \$"
     ++	printf "\$$id: NoTerminatingSymbolAtEOF"
     ++}
     ++
     ++gen_expected_output_0() {
     ++	local id="${1}"
     ++	local hid="${2}"
     ++	echo "File with expanded keywords"
     ++	echo "\$$id: $hid \$"
     ++	echo "\$$id: $hid \$"
     ++	echo "\$$id: $hid \$"
     ++	echo "\$$id: $hid \$"
     ++	echo "\$$id: $hid \$"
     ++	echo "\$$id: $hid \$"
     ++	echo "\$$id: NoTerminatingSymbol"
     ++	echo "\$$id: Foreign Commit With Spaces \$"
     ++}
     ++
     ++gen_expected_output() {
     ++	local id="${1}"
     ++	gen_expected_output_0 "${@}"
     ++	printf "\$$id: NoTerminatingSymbolAtEOF"
     ++}
     ++
     ++gen_expected_output_crlf() {
     ++	local id="${1}"
     ++	gen_expected_output_0 "${@}" | append_cr
     ++	printf "\$$id: NoTerminatingSymbolAtEOF"
     ++}
     ++
     + # If an expanded ident ever gets into the repository, we want to make sure that
     + # it is collapsed before being expanded again on checkout
     + test_expect_success expanded_in_repo '
     +-	{
     +-		echo "File with expanded keywords"
     +-		echo "\$Id\$"
     +-		echo "\$Id:\$"
     +-		echo "\$Id: 0000000000000000000000000000000000000000 \$"
     +-		echo "\$Id: NoSpaceAtEnd\$"
     +-		echo "\$Id:NoSpaceAtFront \$"
     +-		echo "\$Id:NoSpaceAtEitherEnd\$"
     +-		echo "\$Id: NoTerminatingSymbol"
     +-		echo "\$Id: Foreign Commit With Spaces \$"
     +-	} >expanded-keywords.0 &&
     +-
     +-	{
     +-		cat expanded-keywords.0 &&
     +-		printf "\$Id: NoTerminatingSymbolAtEOF"
     +-	} >expanded-keywords &&
     ++	gen_expanded_keywords Id >expanded-keywords &&
     ++	gen_expanded_keywords customId >expanded-keywords_ci &&
     + 	cat expanded-keywords >expanded-keywords-crlf &&
     ++	cat expanded-keywords_ci >expanded-keywords-crlf_ci &&
     + 	git add expanded-keywords expanded-keywords-crlf &&
     ++	git add expanded-keywords_ci expanded-keywords-crlf_ci &&
     + 	git commit -m "File with keywords expanded" &&
     + 	id=$(git rev-parse --verify :expanded-keywords) &&
     ++	id_ci=$(git rev-parse --verify :expanded-keywords_ci) &&
     + 
     +-	{
     +-		echo "File with expanded keywords"
     +-		echo "\$Id: $id \$"
     +-		echo "\$Id: $id \$"
     +-		echo "\$Id: $id \$"
     +-		echo "\$Id: $id \$"
     +-		echo "\$Id: $id \$"
     +-		echo "\$Id: $id \$"
     +-		echo "\$Id: NoTerminatingSymbol"
     +-		echo "\$Id: Foreign Commit With Spaces \$"
     +-	} >expected-output.0 &&
     +-	{
     +-		cat expected-output.0 &&
     +-		printf "\$Id: NoTerminatingSymbolAtEOF"
     +-	} >expected-output &&
     +-	{
     +-		append_cr <expected-output.0 &&
     +-		printf "\$Id: NoTerminatingSymbolAtEOF"
     +-	} >expected-output-crlf &&
     ++	gen_expected_output Id $id >expected-output &&
     ++	gen_expected_output customId $id_ci >expected-output_ci &&
     ++	gen_expected_output_crlf Id $id >expected-output-crlf &&
     ++	gen_expected_output_crlf customId $id_ci >expected-output-crlf_ci &&
     + 	{
     + 		echo "expanded-keywords ident"
     ++		echo "expanded-keywords_ci ident=customId"
     + 		echo "expanded-keywords-crlf ident text eol=crlf"
     ++		echo "expanded-keywords-crlf_ci ident=customId text eol=crlf"
     + 	} >>.gitattributes &&
     + 
     + 	rm -f expanded-keywords expanded-keywords-crlf &&
     ++	rm -f expanded-keywords_ci expanded-keywords-crlf_ci &&
     + 
     + 	git checkout -- expanded-keywords &&
     + 	test_cmp expected-output expanded-keywords &&
     + 
     + 	git checkout -- expanded-keywords-crlf &&
     +-	test_cmp expected-output-crlf expanded-keywords-crlf
     ++	test_cmp expected-output-crlf expanded-keywords-crlf &&
     ++
     ++	git checkout -- expanded-keywords_ci &&
     ++	test_cmp expected-output_ci expanded-keywords_ci &&
     ++
     ++	git checkout -- expanded-keywords-crlf_ci &&
     ++	test_cmp expected-output-crlf_ci expanded-keywords-crlf_ci
     + '
     + 
     + # The use of %f in a filter definition is expanded to the path to
     +
       ## t/t2082-parallel-checkout-attributes.sh ##
      @@ t/t2082-parallel-checkout-attributes.sh: test_expect_success 'parallel-checkout with ident' '
       	(


 Documentation/gitattributes.txt         |   8 ++
 builtin/checkout--worker.c              |  17 +++-
 convert.c                               | 116 ++++++++++++++----------
 convert.h                               |   7 +-
 parallel-checkout.c                     |  14 ++-
 parallel-checkout.h                     |   8 +-
 t/t0021-conversion.sh                   | 115 ++++++++++++++---------
 t/t2082-parallel-checkout-attributes.sh |   7 +-
 8 files changed, 192 insertions(+), 100 deletions(-)

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 83fd4e19a41..9e486f3e8d3 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -382,6 +382,14 @@ sign `$` upon checkout.  Any byte sequence that begins with
 `$Id:` and ends with `$` in the worktree file is replaced
 with `$Id$` upon check-in.
 
+The `ident` attribute can also provide an optional value,
+which if supplied is going to be used for expansion instead of
+the string `Id`.
+
+------------------------
+*.[ch]		ident=FreeBSD
+------------------------
+
 
 `filter`
 ^^^^^^^^
diff --git a/builtin/checkout--worker.c b/builtin/checkout--worker.c
index fb9fd13b73c..61ba39402ad 100644
--- a/builtin/checkout--worker.c
+++ b/builtin/checkout--worker.c
@@ -9,7 +9,7 @@ static void packet_to_pc_item(const char *buffer, int len,
 			      struct parallel_checkout_item *pc_item)
 {
 	const struct pc_item_fixed_portion *fixed_portion;
-	const char *variant;
+	const char *variant, *ident_action;
 	char *encoding;
 
 	if (len < sizeof(struct pc_item_fixed_portion))
@@ -19,7 +19,8 @@ static void packet_to_pc_item(const char *buffer, int len,
 	fixed_portion = (struct pc_item_fixed_portion *)buffer;
 
 	if (len - sizeof(struct pc_item_fixed_portion) !=
-		fixed_portion->name_len + fixed_portion->working_tree_encoding_len)
+		fixed_portion->name_len + fixed_portion->working_tree_encoding_len +
+		fixed_portion->ident_action_len)
 		BUG("checkout worker received corrupted item");
 
 	variant = buffer + sizeof(struct pc_item_fixed_portion);
@@ -43,11 +44,21 @@ static void packet_to_pc_item(const char *buffer, int len,
 	pc_item->ce->ce_namelen = fixed_portion->name_len;
 	pc_item->ce->ce_mode = fixed_portion->ce_mode;
 	memcpy(pc_item->ce->name, variant, pc_item->ce->ce_namelen);
+	variant += pc_item->ce->ce_namelen;
 	oidcpy(&pc_item->ce->oid, &fixed_portion->oid);
 
+	if (fixed_portion->ident_action_len) {
+		ident_action = xmemdupz(variant,
+					fixed_portion->ident_action_len);
+		variant += fixed_portion->ident_action_len;
+	} else {
+		ident_action = NULL;
+	}
+
 	pc_item->id = fixed_portion->id;
 	pc_item->ca.crlf_action = fixed_portion->crlf_action;
-	pc_item->ca.ident = fixed_portion->ident;
+	pc_item->ca.ident_action.id = ident_action;
+	pc_item->ca.ident_action.id_len = fixed_portion->ident_action_len;
 	pc_item->ca.working_tree_encoding = encoding;
 }
 
diff --git a/convert.c b/convert.c
index 0d6fb3410ae..111bfeeaf36 100644
--- a/convert.c
+++ b/convert.c
@@ -1056,7 +1056,10 @@ static int read_convert_config(const char *var, const char *value, void *cb)
 	return 0;
 }
 
-static int count_ident(const char *cp, unsigned long size)
+#define ID_STR_DEFAULT "Id"
+
+static int count_ident(const char *cp, unsigned long size,
+		       const struct ident_action *idact)
 {
 	/*
 	 * "$Id: 0000000000000000000000000000000000000000 $" <=> "$Id$"
@@ -1069,13 +1072,13 @@ static int count_ident(const char *cp, unsigned long size)
 		size--;
 		if (ch != '$')
 			continue;
-		if (size < 3)
+		if (size < idact->id_len + 1)
 			break;
-		if (memcmp("Id", cp, 2))
+		if (memcmp(idact->id, cp, idact->id_len))
 			continue;
-		ch = cp[2];
-		cp += 3;
-		size -= 3;
+		ch = cp[idact->id_len];
+		cp += idact->id_len + 1;
+		size -= idact->id_len + 1;
 		if (ch == '$')
 			cnt++; /* $Id$ */
 		if (ch != ':')
@@ -1099,11 +1102,11 @@ static int count_ident(const char *cp, unsigned long size)
 }
 
 static int ident_to_git(const char *src, size_t len,
-			struct strbuf *buf, int ident)
+			struct strbuf *buf, const struct ident_action *idact)
 {
 	char *dst, *dollar;
 
-	if (!ident || (src && !count_ident(src, len)))
+	if (!idact->id || (src && !count_ident(src, len, idact)))
 		return 0;
 
 	if (!buf)
@@ -1122,17 +1125,18 @@ static int ident_to_git(const char *src, size_t len,
 		len -= dollar + 1 - src;
 		src  = dollar + 1;
 
-		if (len > 3 && !memcmp(src, "Id:", 3)) {
-			dollar = memchr(src + 3, '$', len - 3);
+		if (len > idact->id_len + 1 && !memcmp(src, idact->id, idact->id_len) && src[idact->id_len] == ':') {
+			dollar = memchr(src + idact->id_len + 1, '$', len - (idact->id_len + 1));
 			if (!dollar)
 				break;
-			if (memchr(src + 3, '\n', dollar - src - 3)) {
+			if (memchr(src + idact->id_len + 1, '\n', dollar - src - (idact->id_len + 1))) {
 				/* Line break before the next dollar. */
 				continue;
 			}
 
-			memcpy(dst, "Id$", 3);
-			dst += 3;
+			memcpy(dst, idact->id, idact->id_len);
+			dst[idact->id_len] = '$';
+			dst += idact->id_len + 1;
 			len -= dollar + 1 - src;
 			src  = dollar + 1;
 		}
@@ -1143,16 +1147,16 @@ static int ident_to_git(const char *src, size_t len,
 }
 
 static int ident_to_worktree(const char *src, size_t len,
-			     struct strbuf *buf, int ident)
+			     struct strbuf *buf, const struct ident_action *idact)
 {
 	struct object_id oid;
 	char *to_free = NULL, *dollar, *spc;
 	int cnt;
 
-	if (!ident)
+	if (!idact->id)
 		return 0;
 
-	cnt = count_ident(src, len);
+	cnt = count_ident(src, len, idact);
 	if (!cnt)
 		return 0;
 
@@ -1161,7 +1165,7 @@ static int ident_to_worktree(const char *src, size_t len,
 		to_free = strbuf_detach(buf, NULL);
 	hash_object_file(the_hash_algo, src, len, "blob", &oid);
 
-	strbuf_grow(buf, len + cnt * (the_hash_algo->hexsz + 3));
+	strbuf_grow(buf, len + cnt * (the_hash_algo->hexsz + idact->id_len + 1));
 	for (;;) {
 		/* step 1: run to the next '$' */
 		dollar = memchr(src, '$', len);
@@ -1172,14 +1176,14 @@ static int ident_to_worktree(const char *src, size_t len,
 		src  = dollar + 1;
 
 		/* step 2: does it looks like a bit like Id:xxx$ or Id$ ? */
-		if (len < 3 || memcmp("Id", src, 2))
+		if (len < idact->id_len + 1 || memcmp(idact->id, src, idact->id_len))
 			continue;
 
 		/* step 3: skip over Id$ or Id:xxxxx$ */
-		if (src[2] == '$') {
-			src += 3;
-			len -= 3;
-		} else if (src[2] == ':') {
+		if (src[idact->id_len] == '$') {
+			src += idact->id_len + 1;
+			len -= idact->id_len + 1;
+		} else if (src[idact->id_len] == ':') {
 			/*
 			 * It's possible that an expanded Id has crept its way into the
 			 * repository, we cope with that by stripping the expansion out.
@@ -1187,18 +1191,18 @@ static int ident_to_worktree(const char *src, size_t len,
 			 * on checkout, which won't go away by stash, but let's keep it
 			 * for git-style ids.
 			 */
-			dollar = memchr(src + 3, '$', len - 3);
+			dollar = memchr(src + idact->id_len + 1, '$', len - (idact->id_len + 1));
 			if (!dollar) {
 				/* incomplete keyword, no more '$', so just quit the loop */
 				break;
 			}
 
-			if (memchr(src + 3, '\n', dollar - src - 3)) {
+			if (memchr(src + idact->id_len + 1, '\n', dollar - src - (idact->id_len + 1))) {
 				/* Line break before the next dollar. */
 				continue;
 			}
 
-			spc = memchr(src + 4, ' ', dollar - src - 4);
+			spc = memchr(src + idact->id_len + 2, ' ', dollar - src - (idact->id_len + 2));
 			if (spc && spc < dollar-1) {
 				/* There are spaces in unexpected places.
 				 * This is probably an id from some other
@@ -1215,7 +1219,8 @@ static int ident_to_worktree(const char *src, size_t len,
 		}
 
 		/* step 4: substitute */
-		strbuf_addstr(buf, "Id: ");
+		strbuf_addstr(buf, idact->id);
+		strbuf_addstr(buf, ": ");
 		strbuf_addstr(buf, oid_to_hex(&oid));
 		strbuf_addstr(buf, " $");
 	}
@@ -1286,11 +1291,19 @@ static struct convert_driver *git_path_check_convert(struct attr_check_item *che
 	return NULL;
 }
 
-static int git_path_check_ident(struct attr_check_item *check)
+static struct ident_action git_path_check_ident(struct attr_check_item *check)
 {
+	struct ident_action idact = {.id = NULL, .id_len = 0};
 	const char *value = check->value;
 
-	return !!ATTR_TRUE(value);
+	if (!ATTR_UNSET(value) && !ATTR_FALSE(value)) {
+		if (ATTR_TRUE(value))
+			idact.id = ID_STR_DEFAULT;
+		else
+			idact.id = value;
+		idact.id_len = strlen(idact.id);
+	}
+	return idact;
 }
 
 static struct attr_check *check;
@@ -1313,7 +1326,7 @@ void convert_attrs(struct index_state *istate,
 	ca->crlf_action = git_path_check_crlf(ccheck + 4);
 	if (ca->crlf_action == CRLF_UNDEFINED)
 		ca->crlf_action = git_path_check_crlf(ccheck + 0);
-	ca->ident = git_path_check_ident(ccheck + 1);
+	ca->ident_action = git_path_check_ident(ccheck + 1);
 	ca->drv = git_path_check_convert(ccheck + 2);
 	if (ca->crlf_action != CRLF_BINARY) {
 		enum eol eol_attr = git_path_check_eol(ccheck + 3);
@@ -1433,7 +1446,7 @@ int convert_to_git(struct index_state *istate,
 			len = dst->len;
 		}
 	}
-	return ret | ident_to_git(src, len, dst, ca.ident);
+	return ret | ident_to_git(src, len, dst, &ca.ident_action);
 }
 
 void convert_to_git_filter_fd(struct index_state *istate,
@@ -1450,7 +1463,7 @@ void convert_to_git_filter_fd(struct index_state *istate,
 
 	encode_to_git(path, dst->buf, dst->len, dst, ca.working_tree_encoding, conv_flags);
 	crlf_to_git(istate, path, dst->buf, dst->len, dst, ca.crlf_action, conv_flags);
-	ident_to_git(dst->buf, dst->len, dst, ca.ident);
+	ident_to_git(dst->buf, dst->len, dst, &ca.ident_action);
 }
 
 static int convert_to_working_tree_ca_internal(const struct conv_attrs *ca,
@@ -1462,7 +1475,7 @@ static int convert_to_working_tree_ca_internal(const struct conv_attrs *ca,
 {
 	int ret = 0, ret_filter = 0;
 
-	ret |= ident_to_worktree(src, len, dst, ca->ident);
+	ret |= ident_to_worktree(src, len, dst, &(ca->ident_action));
 	if (ret) {
 		src = dst->buf;
 		len = dst->len;
@@ -1810,14 +1823,17 @@ struct ident_filter {
 	struct stream_filter filter;
 	struct strbuf left;
 	int state;
+	const struct ident_action *idact;
 	char ident[GIT_MAX_HEXSZ + 5]; /* ": x40 $" */
 };
 
-static int is_foreign_ident(const char *str)
+static int is_foreign_ident(const struct ident_action *idact, const char *str)
 {
 	int i;
 
-	if (!skip_prefix(str, "$Id: ", &str))
+	if (str[0] != '$' || strlen(str) < idact->id_len + 3 ||
+	    memcmp(str + 1, idact->id, idact->id_len) != 0 ||
+	    !skip_prefix(str + 1 + idact->id_len, ": ", &str))
 		return 0;
 	for (i = 0; str[i]; i++) {
 		if (isspace(str[i]) && str[i+1] != '$')
@@ -1847,13 +1863,16 @@ static int ident_filter_fn(struct stream_filter *filter,
 			   char *output, size_t *osize_p)
 {
 	struct ident_filter *ident = (struct ident_filter *)filter;
-	static const char head[] = "$Id";
+	const struct ident_action *idact = ident->idact;
 
 	if (!input) {
 		/* drain upon eof */
 		switch (ident->state) {
 		default:
-			strbuf_add(&ident->left, head, ident->state);
+			if (ident->state > 0)
+				strbuf_addch(&ident->left, '$');
+			if (ident->state > 1)
+				strbuf_add(&ident->left, idact->id, ident->state - 1);
 			/* fallthrough */
 		case IDENT_SKIPPING:
 			/* fallthrough */
@@ -1884,23 +1903,27 @@ static int ident_filter_fn(struct stream_filter *filter,
 			strbuf_addch(&ident->left, ch);
 			if (ch != '\n' && ch != '$')
 				continue;
-			if (ch == '$' && !is_foreign_ident(ident->left.buf)) {
-				strbuf_setlen(&ident->left, sizeof(head) - 1);
+			if (ch == '$' && !is_foreign_ident(idact, ident->left.buf)) {
+				strbuf_setlen(&ident->left, idact->id_len + 1);
 				strbuf_addstr(&ident->left, ident->ident);
 			}
 			ident->state = IDENT_DRAINING;
 			continue;
 		}
 
-		if (ident->state < sizeof(head) &&
-		    head[ident->state] == ch) {
+		if ((ident->state == 0 && ch == '$') ||
+		    (ident->state > 0 && ident->state < idact->id_len + 1 &&
+		     idact->id[ident->state - 1] == ch)) {
 			ident->state++;
 			continue;
 		}
 
-		if (ident->state)
-			strbuf_add(&ident->left, head, ident->state);
-		if (ident->state == sizeof(head) - 1) {
+		if (ident->state) {
+			strbuf_addch(&ident->left, '$');
+			if (ident->state > 1)
+				strbuf_add(&ident->left, idact->id, ident->state - 1);
+		}
+		if (ident->state == idact->id_len + 1) {
 			if (ch != ':' && ch != '$') {
 				strbuf_addch(&ident->left, ch);
 				ident->state = 0;
@@ -1935,7 +1958,7 @@ static struct stream_filter_vtbl ident_vtbl = {
 	ident_free_fn,
 };
 
-static struct stream_filter *ident_filter(const struct object_id *oid)
+static struct stream_filter *ident_filter(const struct object_id *oid, const struct ident_action *idact)
 {
 	struct ident_filter *ident = xmalloc(sizeof(*ident));
 
@@ -1944,6 +1967,7 @@ static struct stream_filter *ident_filter(const struct object_id *oid)
 	strbuf_init(&ident->left, 0);
 	ident->filter.vtbl = &ident_vtbl;
 	ident->state = 0;
+	ident->idact = idact;
 	return (struct stream_filter *)ident;
 }
 
@@ -1963,8 +1987,8 @@ struct stream_filter *get_stream_filter_ca(const struct conv_attrs *ca,
 	if (classify_conv_attrs(ca) != CA_CLASS_STREAMABLE)
 		return NULL;
 
-	if (ca->ident)
-		filter = ident_filter(oid);
+	if (ca->ident_action.id)
+		filter = ident_filter(oid, &(ca->ident_action));
 
 	if (output_eol(ca->crlf_action) == EOL_CRLF)
 		filter = cascade_filter(filter, lf_to_crlf_filter());
diff --git a/convert.h b/convert.h
index 5ee1c322058..2422e289784 100644
--- a/convert.h
+++ b/convert.h
@@ -76,11 +76,16 @@ enum convert_crlf_action {
 
 struct convert_driver;
 
+struct ident_action {
+	const char *id;
+	size_t id_len;
+};
+
 struct conv_attrs {
 	struct convert_driver *drv;
 	enum convert_crlf_action attr_action; /* What attr says */
 	enum convert_crlf_action crlf_action; /* When no attr is set, use core.autocrlf */
-	int ident;
+	struct ident_action ident_action; /* What ident says */
 	const char *working_tree_encoding; /* Supported encoding or default encoding if NULL */
 };
 
diff --git a/parallel-checkout.c b/parallel-checkout.c
index ddc0ff3c064..b5908c299af 100644
--- a/parallel-checkout.c
+++ b/parallel-checkout.c
@@ -91,7 +91,8 @@ static int is_eligible_for_parallel_checkout(const struct cache_entry *ce,
 		return 0;
 
 	packed_item_size = sizeof(struct pc_item_fixed_portion) + ce->ce_namelen +
-		(ca->working_tree_encoding ? strlen(ca->working_tree_encoding) : 0);
+		(ca->working_tree_encoding ? strlen(ca->working_tree_encoding) : 0) +
+		ca->ident_action.id_len;
 
 	/*
 	 * The amount of data we send to the workers per checkout item is
@@ -403,13 +404,15 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
 	size_t name_len = pc_item->ce->ce_namelen;
 	size_t working_tree_encoding_len = working_tree_encoding ?
 					   strlen(working_tree_encoding) : 0;
+	const char *ident_action_id = pc_item->ca.ident_action.id;
+	size_t ident_action_len = pc_item->ca.ident_action.id_len;
 
 	/*
 	 * Any changes in the calculation of the message size must also be made
 	 * in is_eligible_for_parallel_checkout().
 	 */
 	len_data = sizeof(struct pc_item_fixed_portion) + name_len +
-		   working_tree_encoding_len;
+		   working_tree_encoding_len + ident_action_len;
 
 	data = xmalloc(len_data);
 
@@ -417,7 +420,7 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
 	fixed_portion->id = pc_item->id;
 	fixed_portion->ce_mode = pc_item->ce->ce_mode;
 	fixed_portion->crlf_action = pc_item->ca.crlf_action;
-	fixed_portion->ident = pc_item->ca.ident;
+	fixed_portion->ident_action_len = ident_action_len;
 	fixed_portion->name_len = name_len;
 	fixed_portion->working_tree_encoding_len = working_tree_encoding_len;
 	/*
@@ -434,6 +437,11 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
 		variant += working_tree_encoding_len;
 	}
 	memcpy(variant, pc_item->ce->name, name_len);
+	variant += name_len;
+	if (ident_action_len) {
+		memcpy(variant, ident_action_id, ident_action_len);
+		variant += ident_action_len;
+	}
 
 	packet_write(fd, data, len_data);
 
diff --git a/parallel-checkout.h b/parallel-checkout.h
index 80f539bcb77..c3c282f516b 100644
--- a/parallel-checkout.h
+++ b/parallel-checkout.h
@@ -76,9 +76,9 @@ struct parallel_checkout_item {
 
 /*
  * The fixed-size portion of `struct parallel_checkout_item` that is sent to the
- * workers. Following this will be 2 strings: ca.working_tree_encoding and
- * ce.name; These are NOT null terminated, since we have the size in the fixed
- * portion.
+ * workers. Following this will be 3 strings: ca.working_tree_encoding, ca.name
+ * and ca.ident_action.id; These are NOT null terminated, since we have the size
+ * in the fixed portion.
  *
  * Note that not all fields of conv_attrs and cache_entry are passed, only the
  * ones that will be required by the workers to smudge and write the entry.
@@ -88,7 +88,7 @@ struct pc_item_fixed_portion {
 	struct object_id oid;
 	unsigned int ce_mode;
 	enum convert_crlf_action crlf_action;
-	int ident;
+	size_t ident_action_len;
 	size_t working_tree_encoding_len;
 	size_t name_len;
 };
diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index b5749f327dd..07df50a7e5d 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -77,25 +77,29 @@ test_expect_success setup '
 	{
 	    echo "*.t filter=rot13"
 	    echo "*.i ident"
+	    echo "*.ci ident=customId"
 	} >.gitattributes &&
 
 	{
 	    echo a b c d e f g h i j k l m
 	    echo n o p q r s t u v w x y z
 	    echo '\''$Id$'\''
+	    echo '\''$customId$'\''
 	} >test &&
 	cat test >test.t &&
 	cat test >test.o &&
 	cat test >test.i &&
-	git add test test.t test.i &&
-	rm -f test test.t test.i &&
-	git checkout -- test test.t test.i &&
+	cat test >test.ci &&
+	git add test test.t test.i test.ci &&
+	rm -f test test.t test.i test.ci &&
+	git checkout -- test test.t test.i test.ci &&
 
 	echo "content-test2" >test2.o &&
 	echo "content-test3 - filename with special characters" >"test3 '\''sq'\'',\$x=.o"
 '
 
-script='s/^\$Id: \([0-9a-f]*\) \$/\1/p'
+script_i='s/^\$Id: \([0-9a-f]*\) \$/\1/p'
+script_ci='s/^\$customId: \([0-9a-f]*\) \$/\1/p'
 
 test_expect_success check '
 
@@ -105,8 +109,14 @@ test_expect_success check '
 	# ident should be stripped in the repository
 	git diff --raw --exit-code :test :test.i &&
 	id=$(git rev-parse --verify :test) &&
-	embedded=$(sed -ne "$script" test.i) &&
+	embedded=$(sed -ne "$script_i" test.i) &&
+	nembedded=$(sed -ne "$script_ci" test.i) &&
 	test "z$id" = "z$embedded" &&
+	test "z" = "z$nembedded" &&
+	embedded=$(sed -ne "$script_ci" test.ci) &&
+	nembedded=$(sed -ne "$script_i" test.ci) &&
+	test "z$id" = "z$embedded" &&
+	test "z" = "z$nembedded" &&
 
 	git cat-file blob :test.t >test.r &&
 
@@ -114,61 +124,84 @@ test_expect_success check '
 	test_cmp test.r test.t
 '
 
+gen_expanded_keywords() {
+	local id="${1}"
+	echo "File with expanded keywords"
+	echo "\$$id\$"
+	echo "\$$id:\$"
+	echo "\$$id: 0000000000000000000000000000000000000000 \$"
+	echo "\$$id: NoSpaceAtEnd\$"
+	echo "\$$id:NoSpaceAtFront \$"
+	echo "\$$id:NoSpaceAtEitherEnd\$"
+	echo "\$$id: NoTerminatingSymbol"
+	echo "\$$id: Foreign Commit With Spaces \$"
+	printf "\$$id: NoTerminatingSymbolAtEOF"
+}
+
+gen_expected_output_0() {
+	local id="${1}"
+	local hid="${2}"
+	echo "File with expanded keywords"
+	echo "\$$id: $hid \$"
+	echo "\$$id: $hid \$"
+	echo "\$$id: $hid \$"
+	echo "\$$id: $hid \$"
+	echo "\$$id: $hid \$"
+	echo "\$$id: $hid \$"
+	echo "\$$id: NoTerminatingSymbol"
+	echo "\$$id: Foreign Commit With Spaces \$"
+}
+
+gen_expected_output() {
+	local id="${1}"
+	gen_expected_output_0 "${@}"
+	printf "\$$id: NoTerminatingSymbolAtEOF"
+}
+
+gen_expected_output_crlf() {
+	local id="${1}"
+	gen_expected_output_0 "${@}" | append_cr
+	printf "\$$id: NoTerminatingSymbolAtEOF"
+}
+
 # If an expanded ident ever gets into the repository, we want to make sure that
 # it is collapsed before being expanded again on checkout
 test_expect_success expanded_in_repo '
-	{
-		echo "File with expanded keywords"
-		echo "\$Id\$"
-		echo "\$Id:\$"
-		echo "\$Id: 0000000000000000000000000000000000000000 \$"
-		echo "\$Id: NoSpaceAtEnd\$"
-		echo "\$Id:NoSpaceAtFront \$"
-		echo "\$Id:NoSpaceAtEitherEnd\$"
-		echo "\$Id: NoTerminatingSymbol"
-		echo "\$Id: Foreign Commit With Spaces \$"
-	} >expanded-keywords.0 &&
-
-	{
-		cat expanded-keywords.0 &&
-		printf "\$Id: NoTerminatingSymbolAtEOF"
-	} >expanded-keywords &&
+	gen_expanded_keywords Id >expanded-keywords &&
+	gen_expanded_keywords customId >expanded-keywords_ci &&
 	cat expanded-keywords >expanded-keywords-crlf &&
+	cat expanded-keywords_ci >expanded-keywords-crlf_ci &&
 	git add expanded-keywords expanded-keywords-crlf &&
+	git add expanded-keywords_ci expanded-keywords-crlf_ci &&
 	git commit -m "File with keywords expanded" &&
 	id=$(git rev-parse --verify :expanded-keywords) &&
+	id_ci=$(git rev-parse --verify :expanded-keywords_ci) &&
 
-	{
-		echo "File with expanded keywords"
-		echo "\$Id: $id \$"
-		echo "\$Id: $id \$"
-		echo "\$Id: $id \$"
-		echo "\$Id: $id \$"
-		echo "\$Id: $id \$"
-		echo "\$Id: $id \$"
-		echo "\$Id: NoTerminatingSymbol"
-		echo "\$Id: Foreign Commit With Spaces \$"
-	} >expected-output.0 &&
-	{
-		cat expected-output.0 &&
-		printf "\$Id: NoTerminatingSymbolAtEOF"
-	} >expected-output &&
-	{
-		append_cr <expected-output.0 &&
-		printf "\$Id: NoTerminatingSymbolAtEOF"
-	} >expected-output-crlf &&
+	gen_expected_output Id $id >expected-output &&
+	gen_expected_output customId $id_ci >expected-output_ci &&
+	gen_expected_output_crlf Id $id >expected-output-crlf &&
+	gen_expected_output_crlf customId $id_ci >expected-output-crlf_ci &&
 	{
 		echo "expanded-keywords ident"
+		echo "expanded-keywords_ci ident=customId"
 		echo "expanded-keywords-crlf ident text eol=crlf"
+		echo "expanded-keywords-crlf_ci ident=customId text eol=crlf"
 	} >>.gitattributes &&
 
 	rm -f expanded-keywords expanded-keywords-crlf &&
+	rm -f expanded-keywords_ci expanded-keywords-crlf_ci &&
 
 	git checkout -- expanded-keywords &&
 	test_cmp expected-output expanded-keywords &&
 
 	git checkout -- expanded-keywords-crlf &&
-	test_cmp expected-output-crlf expanded-keywords-crlf
+	test_cmp expected-output-crlf expanded-keywords-crlf &&
+
+	git checkout -- expanded-keywords_ci &&
+	test_cmp expected-output_ci expanded-keywords_ci &&
+
+	git checkout -- expanded-keywords-crlf_ci &&
+	test_cmp expected-output-crlf_ci expanded-keywords-crlf_ci
 '
 
 # The use of %f in a filter definition is expanded to the path to
diff --git a/t/t2082-parallel-checkout-attributes.sh b/t/t2082-parallel-checkout-attributes.sh
index 25254579618..822957a8dc8 100755
--- a/t/t2082-parallel-checkout-attributes.sh
+++ b/t/t2082-parallel-checkout-attributes.sh
@@ -20,16 +20,19 @@ test_expect_success 'parallel-checkout with ident' '
 	(
 		cd ident &&
 		echo "A ident" >.gitattributes &&
+		echo "C ident=MyCusomVeryLongAndWordyId" >>.gitattributes &&
 		echo "\$Id\$" >A &&
 		echo "\$Id\$" >B &&
+		echo "\$MyCusomVeryLongAndWordyId\$" >C &&
 		git add -A &&
 		git commit -m id &&
 
-		rm A B &&
+		rm A B C &&
 		test_checkout_workers 2 git reset --hard &&
 		hexsz=$(test_oid hexsz) &&
 		grep -E "\\\$Id: [0-9a-f]{$hexsz} \\\$" A &&
-		grep "\\\$Id\\\$" B
+		grep "\\\$Id\\\$" B &&
+		grep -E "\\\$MyCusomVeryLongAndWordyId: [0-9a-f]{$hexsz} \\\$" C
 	)
 '
 

base-commit: 225bc32a989d7a22fa6addafd4ce7dcd04675dbf
-- 
gitgitgadget

  parent reply	other threads:[~2021-09-01  2:13 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-23 16:41 [PATCH] Make ident dynamic, not just a hardcoded value of "$Id" Maksym Sobolyev via GitGitGadget
2021-08-23 18:10 ` Junio C Hamano
2021-08-23 18:41 ` Philip Oakley
2021-08-26  4:28 ` [PATCH v2] " Maksym Sobolyev via GitGitGadget
2021-08-26 20:37   ` Matheus Tavares
2021-09-02  0:58     ` Junio C Hamano
2021-09-02 19:04       ` Junio C Hamano
2021-08-27  2:59   ` Junio C Hamano
     [not found]     ` <CABFYoQC_FzbU_E4hU0kCz-WFJNOLspwL2Gjc01sMXDZosxJWjw@mail.gmail.com>
2021-09-01  5:35       ` Junio C Hamano
2021-09-01  2:13   ` Maksym Sobolyev via GitGitGadget [this message]
2021-09-02  3:40     ` [PATCH v3] " Đoàn Trần Công Danh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=pull.1074.v3.git.git.1630462385587.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=matheus.bernardino@usp.br \
    --cc=sobomax@gmail.com \
    --cc=sobomax@sippysoft.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.