All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] convert: fix normalization of foreign idents
  2010-09-07 19:37         ` Marcus Comstedt
@ 2010-08-23  7:05           ` Marcus Comstedt
  2010-09-10  0:26             ` Junio C Hamano
  2010-09-07 19:16           ` [PATCH v2 2/2] t0021: test checkout and commit of foreign idents Marcus Comstedt
                             ` (2 subsequent siblings)
  3 siblings, 1 reply; 19+ messages in thread
From: Marcus Comstedt @ 2010-08-23  7:05 UTC (permalink / raw)
  To: git; +Cc: gitster

Since ident_to_worktree() does not touch $Id$ tags which contain
foreign expansions in the repository, make sure that ident_to_git()
does not either.  This fixes the problem that such files show
spurious modification upon checkout.

There is however one case where we want ident_to_git() to normalize
the tag to $Id$ despite the asymmetry:  When committing a modification
to a file which has a foreign ident, the foreign ident should be
replaced with a regular git ident.  Thus, add a new parameter to
convert_to_git() that indicates if we want the foreign idents
normalized after all.

Signed-off-by: Marcus Comstedt <marcus@mc.pp.se>
---
 builtin/apply.c |    2 +-
 builtin/blame.c |    2 +-
 cache.h         |    3 ++-
 combine-diff.c  |    2 +-
 convert.c       |   23 ++++++++++++++++++-----
 diff.c          |    2 +-
 sha1_file.c     |    3 ++-
 7 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index 23c18c5..7abff80 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -1932,7 +1932,7 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
 	case S_IFREG:
 		if (strbuf_read_file(buf, path, st->st_size) != st->st_size)
 			return error("unable to open or read %s", path);
-		convert_to_git(path, buf->buf, buf->len, buf, 0);
+		convert_to_git(path, buf->buf, buf->len, buf, 0, 0);
 		return 0;
 	default:
 		return -1;
diff --git a/builtin/blame.c b/builtin/blame.c
index 1015354..4f3b004 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -2095,7 +2095,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt,
 		if (strbuf_read(&buf, 0, 0) < 0)
 			die_errno("failed to read from stdin");
 	}
-	convert_to_git(path, buf.buf, buf.len, &buf, 0);
+	convert_to_git(path, buf.buf, buf.len, &buf, 0, 0);
 	origin->file.ptr = buf.buf;
 	origin->file.size = buf.len;
 	pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1);
diff --git a/cache.h b/cache.h
index be02a42..23ae1f1 100644
--- a/cache.h
+++ b/cache.h
@@ -1055,7 +1055,8 @@ extern void trace_argv_printf(const char **argv, const char *format, ...);
 /* convert.c */
 /* returns 1 if *dst was used */
 extern int convert_to_git(const char *path, const char *src, size_t len,
-                          struct strbuf *dst, enum safe_crlf checksafe);
+			  struct strbuf *dst, enum safe_crlf checksafe,
+			  int normalize_foreign_ident);
 extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
 extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst);
 
diff --git a/combine-diff.c b/combine-diff.c
index 655fa89..e81aa7d 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -758,7 +758,7 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,
 			if (is_file) {
 				struct strbuf buf = STRBUF_INIT;
 
-				if (convert_to_git(elem->path, result, len, &buf, safe_crlf)) {
+				if (convert_to_git(elem->path, result, len, &buf, safe_crlf, 0)) {
 					free(result);
 					result = strbuf_detach(&buf, &len);
 					result_size = len;
diff --git a/convert.c b/convert.c
index 01de9a8..6abab3f 100644
--- a/convert.c
+++ b/convert.c
@@ -520,9 +520,10 @@ static int count_ident(const char *cp, unsigned long size)
 }
 
 static int ident_to_git(const char *path, const char *src, size_t len,
-                        struct strbuf *buf, int ident)
+			struct strbuf *buf, int ident,
+			int normalize_foreign_ident)
 {
-	char *dst, *dollar;
+	char *dst, *dollar, *spc;
 
 	if (!ident || !count_ident(src, len))
 		return 0;
@@ -549,6 +550,16 @@ static int ident_to_git(const char *path, const char *src, size_t len,
 				continue;
 			}
 
+			spc = memchr(src + 4, ' ', dollar - src - 4);
+			if (spc && spc < dollar-1 &&
+			    !normalize_foreign_ident) {
+				/* There are spaces in unexpected places.
+				 * This is probably an id from some other
+				 * versioning system. Keep it for now.
+				 */
+				continue;
+			}
+
 			memcpy(dst, "Id$", 3);
 			dst += 3;
 			len -= dollar + 1 - src;
@@ -706,7 +717,8 @@ static enum action determine_action(enum action text_attr, enum eol eol_attr)
 }
 
 int convert_to_git(const char *path, const char *src, size_t len,
-                   struct strbuf *dst, enum safe_crlf checksafe)
+		   struct strbuf *dst, enum safe_crlf checksafe,
+		   int normalize_foreign_ident)
 {
 	struct git_attr_check check[5];
 	enum action action = CRLF_GUESS;
@@ -738,7 +750,8 @@ int convert_to_git(const char *path, const char *src, size_t len,
 		src = dst->buf;
 		len = dst->len;
 	}
-	return ret | ident_to_git(path, src, len, dst, ident);
+	return ret | ident_to_git(path, src, len, dst, ident,
+				  normalize_foreign_ident);
 }
 
 static int convert_to_working_tree_internal(const char *path, const char *src,
@@ -796,5 +809,5 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
 		src = dst->buf;
 		len = dst->len;
 	}
-	return ret | convert_to_git(path, src, len, dst, 0);
+	return ret | convert_to_git(path, src, len, dst, 0, 0);
 }
diff --git a/diff.c b/diff.c
index 144f2aa..d481cb6 100644
--- a/diff.c
+++ b/diff.c
@@ -2372,7 +2372,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
 		/*
 		 * Convert from working tree format to canonical git format
 		 */
-		if (convert_to_git(s->path, s->data, s->size, &buf, safe_crlf)) {
+		if (convert_to_git(s->path, s->data, s->size, &buf, safe_crlf, 0)) {
 			size_t size = 0;
 			munmap(s->data, s->size);
 			s->should_munmap = 0;
diff --git a/sha1_file.c b/sha1_file.c
index 0cd9435..37e8657 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2434,7 +2434,8 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 	if ((type == OBJ_BLOB) && path) {
 		struct strbuf nbuf = STRBUF_INIT;
 		if (convert_to_git(path, buf, size, &nbuf,
-		                   write_object ? safe_crlf : 0)) {
+				   write_object ? safe_crlf : 0,
+				   write_object)) {
 			buf = strbuf_detach(&nbuf, &size);
 			re_allocated = 1;
 		}
-- 
1.7.2

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Fix for normalization of foreign idents
@ 2010-08-23 21:30 Marcus Comstedt
  2010-08-23 21:30 ` [PATCH] convert: fix " Marcus Comstedt
  2010-08-23 21:35 ` Fix for " Jonathan Nieder
  0 siblings, 2 replies; 19+ messages in thread
From: Marcus Comstedt @ 2010-08-23 21:30 UTC (permalink / raw)
  To: git; +Cc: gitster

Hi.

The new behaviour that $Id$ tags containing expanded idents from
other version control systems, nice though it is, has a rather
serious problem.  This is because convert_to_git is no longer
the inverse operation of convert_to_working_tree.  For native
git idents, the transformations are

  $Id$ --(c_t_w_t)--> $Id: 123...$ --(c_t_g)--> $Id$

but for foreign idents, it becomes

  $Id: blah$ --(c_t_w_t)--> $Id: blah$ --(c_t_g)--> $Id$

The result of this is that git may consider even newly checked out
files as modified, even though neither the file contents nor its
attributes have been modified after the checkout.  I say _may_, because
it can also happen that it decides based on the time stamps that it
doesn't need to compare the actual contents, in which case the file
does not show as modified.

The following patch fixes this by preserving the foreign ident
also in convert_to_git, meaning that convert_to_git is again the
inverse operation of convert_to_working_tree, with the following
transformation series:

  $Id: blah$ --(c_t_w_t)--> $Id: blah$ --(c_t_g)--> $Id: blah$

This restores correct and deterministic operation of status and
diff, meaning that if the file hasn't actually been modified, no
modifications are shown.

As you might suggest, always keeping the foreign ident would mean it
is never updated when you commit new versions of the file, which isn't
really what we want.  Keeping the foreign ident as long as the last
modification to the file was made in the previous version control
system makes perfect sense, but once we make a commit to the file
within git, it should be replaced with a git ident.  The patch is
therefore slightly more complex, adding an extra parameter to control
whether foreign idents are collapsed or not.  This parameter is set
to true only in the case when index_mem is called with write_object
set to true, which is to say when we create a new blob from the
working tree (i.e. we are committing the file).

I hope we can agree that this is a sound and unintrusive way of
handling the problem.  :-)

Incidentally, should one want to create a commit to replace a foreign
ident with a native git one without making any other changes to the
file, this is still simple to do.  All that is needed is to change any
character inside the expanded ident in the working tree, and the file
will show as modified, and will have the foreign ident removed on
commit.


  // Marcus

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] convert: fix normalization of foreign idents
  2010-08-23 21:30 Fix for normalization of foreign idents Marcus Comstedt
@ 2010-08-23 21:30 ` Marcus Comstedt
  2010-08-23 21:35 ` Fix for " Jonathan Nieder
  1 sibling, 0 replies; 19+ messages in thread
From: Marcus Comstedt @ 2010-08-23 21:30 UTC (permalink / raw)
  To: git; +Cc: gitster, Marcus Comstedt

Since ident_to_worktree() does not touch $Id$ tags which contain
foreign expansions in the repository, make sure that ident_to_git()
does not either.  This fixes the problem that such files show
spurious modification upon checkout.

There is however one case where we want ident_to_git() to normalize
the tag to $Id$ despite the asymmetry:  When committing a modification
to a file which has a foreign ident, the foreign ident should be
replaced with a regular git ident.  Thus, add a new parameter to
convert_to_git() that indicates if we want the foreign idents
normalized after all.

Signed-off-by: Marcus Comstedt <marcus@mc.pp.se>
---
 builtin/apply.c |    2 +-
 builtin/blame.c |    2 +-
 cache.h         |    3 ++-
 combine-diff.c  |    2 +-
 convert.c       |   21 +++++++++++++++++----
 diff.c          |    2 +-
 sha1_file.c     |    3 ++-
 7 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index f38c1f7..7e503f4 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -1759,7 +1759,7 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
 	case S_IFREG:
 		if (strbuf_read_file(buf, path, st->st_size) != st->st_size)
 			return error("unable to open or read %s", path);
-		convert_to_git(path, buf->buf, buf->len, buf, 0);
+		convert_to_git(path, buf->buf, buf->len, buf, 0, 0);
 		return 0;
 	default:
 		return -1;
diff --git a/builtin/blame.c b/builtin/blame.c
index 437b1a4..83c6561 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -2095,7 +2095,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt,
 		if (strbuf_read(&buf, 0, 0) < 0)
 			die_errno("failed to read from stdin");
 	}
-	convert_to_git(path, buf.buf, buf.len, &buf, 0);
+	convert_to_git(path, buf.buf, buf.len, &buf, 0, 0);
 	origin->file.ptr = buf.buf;
 	origin->file.size = buf.len;
 	pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1);
diff --git a/cache.h b/cache.h
index eb77e1d..b98042f 100644
--- a/cache.h
+++ b/cache.h
@@ -1055,7 +1055,8 @@ extern void trace_argv_printf(const char **argv, const char *format, ...);
 /* convert.c */
 /* returns 1 if *dst was used */
 extern int convert_to_git(const char *path, const char *src, size_t len,
-                          struct strbuf *dst, enum safe_crlf checksafe);
+			  struct strbuf *dst, enum safe_crlf checksafe,
+			  int normalize_foreign_ident);
 extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
 
 /* add */
diff --git a/combine-diff.c b/combine-diff.c
index 655fa89..e81aa7d 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -758,7 +758,7 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,
 			if (is_file) {
 				struct strbuf buf = STRBUF_INIT;
 
-				if (convert_to_git(elem->path, result, len, &buf, safe_crlf)) {
+				if (convert_to_git(elem->path, result, len, &buf, safe_crlf, 0)) {
 					free(result);
 					result = strbuf_detach(&buf, &len);
 					result_size = len;
diff --git a/convert.c b/convert.c
index e41a31e..00d0612 100644
--- a/convert.c
+++ b/convert.c
@@ -519,9 +519,10 @@ static int count_ident(const char *cp, unsigned long size)
 }
 
 static int ident_to_git(const char *path, const char *src, size_t len,
-                        struct strbuf *buf, int ident)
+			struct strbuf *buf, int ident,
+			int normalize_foreign_ident)
 {
-	char *dst, *dollar;
+	char *dst, *dollar, *spc;
 
 	if (!ident || !count_ident(src, len))
 		return 0;
@@ -548,6 +549,16 @@ static int ident_to_git(const char *path, const char *src, size_t len,
 				continue;
 			}
 
+			spc = memchr(src + 4, ' ', dollar - src - 4);
+			if (spc && spc < dollar-1 &&
+			    !normalize_foreign_ident) {
+				/* There are spaces in unexpected places.
+				 * This is probably an id from some other
+				 * versioning system. Keep it for now.
+				 */
+				continue;
+			}
+
 			memcpy(dst, "Id$", 3);
 			dst += 3;
 			len -= dollar + 1 - src;
@@ -704,7 +715,8 @@ enum action determine_action(enum action text_attr, enum eol eol_attr) {
 }
 
 int convert_to_git(const char *path, const char *src, size_t len,
-                   struct strbuf *dst, enum safe_crlf checksafe)
+		   struct strbuf *dst, enum safe_crlf checksafe,
+		   int normalize_foreign_ident)
 {
 	struct git_attr_check check[5];
 	enum action action = CRLF_GUESS;
@@ -736,7 +748,8 @@ int convert_to_git(const char *path, const char *src, size_t len,
 		src = dst->buf;
 		len = dst->len;
 	}
-	return ret | ident_to_git(path, src, len, dst, ident);
+	return ret | ident_to_git(path, src, len, dst, ident,
+				  normalize_foreign_ident);
 }
 
 int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst)
diff --git a/diff.c b/diff.c
index 6fb97d4..c5be513 100644
--- a/diff.c
+++ b/diff.c
@@ -2372,7 +2372,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
 		/*
 		 * Convert from working tree format to canonical git format
 		 */
-		if (convert_to_git(s->path, s->data, s->size, &buf, safe_crlf)) {
+		if (convert_to_git(s->path, s->data, s->size, &buf, safe_crlf, 0)) {
 			size_t size = 0;
 			munmap(s->data, s->size);
 			s->should_munmap = 0;
diff --git a/sha1_file.c b/sha1_file.c
index 0cd9435..37e8657 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2434,7 +2434,8 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 	if ((type == OBJ_BLOB) && path) {
 		struct strbuf nbuf = STRBUF_INIT;
 		if (convert_to_git(path, buf, size, &nbuf,
-		                   write_object ? safe_crlf : 0)) {
+				   write_object ? safe_crlf : 0,
+				   write_object)) {
 			buf = strbuf_detach(&nbuf, &size);
 			re_allocated = 1;
 		}
-- 
1.7.2

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: Fix for normalization of foreign idents
  2010-08-23 21:30 Fix for normalization of foreign idents Marcus Comstedt
  2010-08-23 21:30 ` [PATCH] convert: fix " Marcus Comstedt
@ 2010-08-23 21:35 ` Jonathan Nieder
  2010-08-23 21:44   ` Marcus Comstedt
  1 sibling, 1 reply; 19+ messages in thread
From: Jonathan Nieder @ 2010-08-23 21:35 UTC (permalink / raw)
  To: Marcus Comstedt; +Cc: git, gitster

Hi,

Marcus Comstedt wrote:

>   $Id: blah$ --(c_t_w_t)--> $Id: blah$ --(c_t_g)--> $Id: blah$
> 
> This restores correct and deterministic operation of status and
> diff, meaning that if the file hasn't actually been modified, no
> modifications are shown.
> 
> As you might suggest, always keeping the foreign ident would mean it
> is never updated when you commit new versions of the file, which isn't
> really what we want.  Keeping the foreign ident as long as the last
> modification to the file was made in the previous version control
> system makes perfect sense, but once we make a commit to the file
> within git, it should be replaced with a git ident.

I was with you up to here.  Is commit time really the right moment
to clobber a foreign ident?  I suspect it would be confusing.

It might be simpler to just never clobber a foreign ident and instead
rely on local policy (scripts, pre-commit hooks, and so on) to remove
them at the appropriate time.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: Fix for normalization of foreign idents
  2010-08-23 21:35 ` Fix for " Jonathan Nieder
@ 2010-08-23 21:44   ` Marcus Comstedt
  2010-08-23 22:33     ` Jonathan Nieder
  2010-09-06  9:42     ` Marcus Comstedt
  0 siblings, 2 replies; 19+ messages in thread
From: Marcus Comstedt @ 2010-08-23 21:44 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: git, gitster


Hi Jonathan.

Thanks for the quick feedback.


Jonathan Nieder <jrnieder@gmail.com> writes:

> I was with you up to here.  Is commit time really the right moment
> to clobber a foreign ident?  I suspect it would be confusing.

Well, it's how ident is normally expected to behave; when you
commit something new, the file should get a new ident.


> It might be simpler to just never clobber a foreign ident and instead
> rely on local policy (scripts, pre-commit hooks, and so on) to remove
> them at the appropriate time.

Simpler as far as patch size is concerned (although this patch isn't
really that complex as it is), but it sounds like much more of a
hassle to actually use.  Do you have a use case in mind where you have
the ident attribute on a file but do _not_ want a new ident each time
you commit a change to the file?


  // Marcus

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: Fix for normalization of foreign idents
  2010-08-23 21:44   ` Marcus Comstedt
@ 2010-08-23 22:33     ` Jonathan Nieder
  2010-08-23 23:46       ` Junio C Hamano
  2010-09-06  9:42     ` Marcus Comstedt
  1 sibling, 1 reply; 19+ messages in thread
From: Jonathan Nieder @ 2010-08-23 22:33 UTC (permalink / raw)
  To: Marcus Comstedt; +Cc: git, gitster

Marcus Comstedt wrote:

> it sounds like much more of a
> hassle to actually use.  Do you have a use case in mind where you have
> the ident attribute on a file but do _not_ want a new ident each time
> you commit a change to the file?

No, I don't use the $Id$ feature at all and if I inherited a codebase
with a bunch of foreign $Id$ tags, I don't know what I'd do. :)  So I
can trust your judgement on this.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: Fix for normalization of foreign idents
  2010-08-23 22:33     ` Jonathan Nieder
@ 2010-08-23 23:46       ` Junio C Hamano
  2010-08-24  7:23         ` Marcus Comstedt
  0 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2010-08-23 23:46 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Marcus Comstedt, git

Jonathan Nieder <jrnieder@gmail.com> writes:

> Marcus Comstedt wrote:
>
>> it sounds like much more of a
>> hassle to actually use.  Do you have a use case in mind where you have
>> the ident attribute on a file but do _not_ want a new ident each time
>> you commit a change to the file?
>
> No, I don't use the $Id$ feature at all and if I inherited a codebase
> with a bunch of foreign $Id$ tags, I don't know what I'd do.

Heh, I know what I would do---the first commit will be to remove them.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: Fix for normalization of foreign idents
  2010-08-23 23:46       ` Junio C Hamano
@ 2010-08-24  7:23         ` Marcus Comstedt
  0 siblings, 0 replies; 19+ messages in thread
From: Marcus Comstedt @ 2010-08-24  7:23 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jonathan Nieder, git


Junio C Hamano <gitster@pobox.com> writes:

> Jonathan Nieder <jrnieder@gmail.com> writes:
>>
>> No, I don't use the $Id$ feature at all and if I inherited a codebase
>> with a bunch of foreign $Id$ tags, I don't know what I'd do.
>
> Heh, I know what I would do---the first commit will be to remove them.


Remove just the foreign ident, or remove $Id$ completely?

Either way, this doesn't sound like a vote for preserving the old $Id$
expansion on commit.  :-)


  // Marcus

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: Fix for normalization of foreign idents
  2010-08-23 21:44   ` Marcus Comstedt
  2010-08-23 22:33     ` Jonathan Nieder
@ 2010-09-06  9:42     ` Marcus Comstedt
  2010-09-06 21:07       ` Jonathan Nieder
  1 sibling, 1 reply; 19+ messages in thread
From: Marcus Comstedt @ 2010-09-06  9:42 UTC (permalink / raw)
  To: git; +Cc: Jonathan Nieder, gitster


Hi.

Was this patch simply forgotten, or are there some remaining
concerns about it?

Should I submit a new patch which simply fixes the inconsistency
which breaks checkout, and leaves the removal of foreign idents
on commit to user interaction or hook scripts, as suggested by
Jonathan?  That would at least restore deterministic behavior...


  // Marcus

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: Fix for normalization of foreign idents
  2010-09-06  9:42     ` Marcus Comstedt
@ 2010-09-06 21:07       ` Jonathan Nieder
  2010-09-07 19:37         ` Marcus Comstedt
  0 siblings, 1 reply; 19+ messages in thread
From: Jonathan Nieder @ 2010-09-06 21:07 UTC (permalink / raw)
  To: Marcus Comstedt; +Cc: git, gitster

Hi Marcus,

Marcus Comstedt wrote:

> Was this patch simply forgotten, or are there some remaining
> concerns about it?

I assume it is just that no one using the $ident$ feature took a
look at it, which leaves us without a sanity-check that it
consistently works and improves things.

If you have the time, a test and documentation might help (the former
plays the role of an artificial user, who can describe the feature and
will make noise if we break it with future changes).

> Should I submit a new patch which simply fixes the inconsistency
> which breaks checkout, and leaves the removal of foreign idents
> on commit to user interaction or hook scripts, as suggested by
> Jonathan?  That would at least restore deterministic behavior...

That doesn't sound necessary to me.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v2 2/2] t0021: test checkout and commit of foreign idents
  2010-09-07 19:37         ` Marcus Comstedt
  2010-08-23  7:05           ` [PATCH v2 1/2] convert: fix " Marcus Comstedt
@ 2010-09-07 19:16           ` Marcus Comstedt
  2010-09-07 20:00           ` [PATCH v2 0/2] fix normalization of foreign idents (now with test cases) Marcus Comstedt
  2010-09-08  4:32           ` Fix for normalization of foreign idents Jonathan Nieder
  3 siblings, 0 replies; 19+ messages in thread
From: Marcus Comstedt @ 2010-09-07 19:16 UTC (permalink / raw)
  To: git; +Cc: gitster

Add test cases for the following behaviors:

  * Checking out a file with a foreign ident should not flag
    the file as modified.  This is to prevent a mess when checking
    out old versions, and to allow a migration model where files
    are allowed to keep their foreign ident as long as their
    content is also "foreign", i.e. not modified since the migration
    to git.

  * Committing to a file with a foreign ident should replace the
    foreign ident with a native ident.  This is simply to get
    the normal behavior of ident:  When the contents of the file is
    updated, so is the ident.

Signed-off-by: Marcus Comstedt <marcus@mc.pp.se>
---
 t/t0021-conversion.sh |   58 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 58 insertions(+), 0 deletions(-)

diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index 828e35b..c0ad9e8 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -93,4 +93,62 @@ test_expect_success expanded_in_repo '
 	cmp expanded-keywords expected-output
 '
 
+# Check that a file containing idents (native or foreign) is not
+# spuriously flagged as modified on checkout
+test_expect_success 'ident pristine after checkout' '
+	{
+		echo "File with expanded keywords"
+		echo "\$Id\$"
+		echo "\$Id: Foreign Commit With Spaces \$"
+	} > native-and-foreign-idents &&
+
+	{
+		echo "File with expanded keywords"
+		echo "\$Id: 9bdc217750894eed31bb870e9ffa00599f0573a2 \$"
+		echo "\$Id: Foreign Commit With Spaces \$"
+	} > expected-output &&
+
+	git add native-and-foreign-idents &&
+	git commit -m "File with native and foreign idents" &&
+
+	echo "native-and-foreign-idents ident" >> .gitattributes &&
+
+	rm -f native-and-foreign-idents &&
+	git checkout -- native-and-foreign-idents &&
+	cat native-and-foreign-idents &&
+	cmp native-and-foreign-idents expected-output &&
+	touch native-and-foreign-idents &&
+	git status --porcelain native-and-foreign-idents > output &&
+	test ! -s output &&
+	git diff -- native-and-foreign-idents > output &&
+	test ! -s output
+'
+
+# Check that actually modifying the file and committing it produces a
+# new ident on checkout
+test_expect_success 'foreign ident replaced on commit' '
+	{
+		echo "File with expanded keywords"
+		echo "\$Id: 2499e26293e36cc92399835e497ef6396d710055 \$"
+		echo "\$Id: 2499e26293e36cc92399835e497ef6396d710055 \$"
+		echo "Some new content"
+	} > expected-output &&
+
+	echo "1	0	native-and-foreign-idents" > expected-stat1 &&
+	echo "2	1	native-and-foreign-idents" > expected-stat2 &&
+
+	echo "Some new content" >> native-and-foreign-idents &&
+	git diff --numstat -- native-and-foreign-idents > output &&
+	cmp output expected-stat1 &&
+	git add native-and-foreign-idents &&
+	git commit -m "Modified file" &&
+	git diff --numstat HEAD^ HEAD -- native-and-foreign-idents > output &&
+	cmp output expected-stat2 &&
+	rm -f native-and-foreign-idents &&
+	git checkout -- native-and-foreign-idents &&
+	cat native-and-foreign-idents &&
+	cmp native-and-foreign-idents expected-output
+'
+
+
 test_done
-- 
1.7.2

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: Fix for normalization of foreign idents
  2010-09-06 21:07       ` Jonathan Nieder
@ 2010-09-07 19:37         ` Marcus Comstedt
  2010-08-23  7:05           ` [PATCH v2 1/2] convert: fix " Marcus Comstedt
                             ` (3 more replies)
  0 siblings, 4 replies; 19+ messages in thread
From: Marcus Comstedt @ 2010-09-07 19:37 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: git, gitster


Jonathan Nieder <jrnieder@gmail.com> writes:

> If you have the time, a test and documentation might help (the former
> plays the role of an artificial user, who can describe the feature and
> will make noise if we break it with future changes).

Ok.  I'll shortly post an updated patch with some test cases.

As for documentation, I suppose that would mean documenting the
"foreign ident" concept as a whole, as I don't think there's currently
anything about that in the documentation?  Would the `ident` section
of gitattributes.txt be a suitable place for this?


Thanks


  // Marcus

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v2 0/2] fix normalization of foreign idents (now with test cases)
  2010-09-07 19:37         ` Marcus Comstedt
  2010-08-23  7:05           ` [PATCH v2 1/2] convert: fix " Marcus Comstedt
  2010-09-07 19:16           ` [PATCH v2 2/2] t0021: test checkout and commit of foreign idents Marcus Comstedt
@ 2010-09-07 20:00           ` Marcus Comstedt
  2010-09-08  4:32           ` Fix for normalization of foreign idents Jonathan Nieder
  3 siblings, 0 replies; 19+ messages in thread
From: Marcus Comstedt @ 2010-09-07 20:00 UTC (permalink / raw)
  To: git; +Cc: gitster

* Rebased against current master (7505ae2)
* Test cases added

Marcus Comstedt (2):
  convert: fix normalization of foreign idents
  t0021: test checkout and commit of foreign idents

 builtin/apply.c       |    2 +-
 builtin/blame.c       |    2 +-
 cache.h               |    3 +-
 combine-diff.c        |    2 +-
 convert.c             |   23 +++++++++++++++----
 diff.c                |    2 +-
 sha1_file.c           |    3 +-
 t/t0021-conversion.sh |   58 +++++++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 84 insertions(+), 11 deletions(-)

-- 
1.7.2

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: Fix for normalization of foreign idents
  2010-09-07 19:37         ` Marcus Comstedt
                             ` (2 preceding siblings ...)
  2010-09-07 20:00           ` [PATCH v2 0/2] fix normalization of foreign idents (now with test cases) Marcus Comstedt
@ 2010-09-08  4:32           ` Jonathan Nieder
  3 siblings, 0 replies; 19+ messages in thread
From: Jonathan Nieder @ 2010-09-08  4:32 UTC (permalink / raw)
  To: Marcus Comstedt; +Cc: git, gitster

Marcus Comstedt wrote:

> Ok.  I'll shortly post an updated patch with some test cases.

Thanks.

> As for documentation, I suppose that would mean documenting the
> "foreign ident" concept as a whole, as I don't think there's currently
> anything about that in the documentation?  Would the `ident` section
> of gitattributes.txt be a suitable place for this?

Yep, that sounds good (though by no means necessary either).

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] convert: fix normalization of foreign idents
  2010-08-23  7:05           ` [PATCH v2 1/2] convert: fix " Marcus Comstedt
@ 2010-09-10  0:26             ` Junio C Hamano
  2010-09-12 21:01               ` Marcus Comstedt
  0 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2010-09-10  0:26 UTC (permalink / raw)
  To: Marcus Comstedt; +Cc: git

Marcus Comstedt <marcus@mc.pp.se> writes:

> Since ident_to_worktree() does not touch $Id$ tags which contain
> foreign expansions in the repository, make sure that ident_to_git()
> does not either.  This fixes the problem that such files show
> spurious modification upon checkout.
>
> There is however one case where we want ident_to_git() to normalize
> the tag to $Id$ despite the asymmetry:  When committing a modification
> to a file which has a foreign ident, the foreign ident should be
> replaced with a regular git ident.  Thus, add a new parameter to
> convert_to_git() that indicates if we want the foreign idents
> normalized after all.

Would it be possible that the real culprit is that ident_to_worktree()
does not always touch $Id$ in the first place?  Why isn't "$Id: garbage$"
first cleaned and then smudged upon checkout?

It also smells wrong that this "sometimes we convert, sometimes we don't"
is a special case for "$Id$" and for no other conversion.  Why don't
smudge/clean filter or CRLF conversion have the same issue that can be
solved with the same approach as this patch takes?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] convert: fix normalization of foreign idents
  2010-09-10  0:26             ` Junio C Hamano
@ 2010-09-12 21:01               ` Marcus Comstedt
  2010-09-12 21:44                 ` Junio C Hamano
  0 siblings, 1 reply; 19+ messages in thread
From: Marcus Comstedt @ 2010-09-12 21:01 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git


Hi Junio.


Junio C Hamano <gitster@pobox.com> writes:

> Would it be possible that the real culprit is that ident_to_worktree()
> does not always touch $Id$ in the first place?  Why isn't "$Id: garbage$"
> first cleaned and then smudged upon checkout?

Please see commit 07814d90095b65b4594efd47c69f9f171ef162d4, and
the discussion preceeding it.


> It also smells wrong that this "sometimes we convert, sometimes we don't"
> is a special case for "$Id$" and for no other conversion.  Why don't
> smudge/clean filter or CRLF conversion have the same issue that can be
> solved with the same approach as this patch takes?

I gather that this is because nobody has come up with a use case
for smudge/clean or CRLF where a (pervasive) non-normalized
representation in the repository makes sense.

Specifically, a foreign ident in the repo is not "garbage", but
something useful when you migrate a repo from a different VCS for (at
least) the following reasons:

* It allows you to check out a historical tree from the git repo which
  looks exactly like what it would look like if you checked it out
  from the previous system

* It provides an indication that a version of a file comes directly
  from the previous VCS, without any modification since the migration
  to git, and exactly where in the history of the previous VCS it has
  originated

* Quite frankly, idents generated by other VCSs contain more useful
  information than those generated by git, so it's a waste to discard
  them prematurely

The same effect can be achieved without direct support for foreign
idents by instead using fine-grained control in .gitattributes to
force -ident on any file which still has foreign idents, but there are
two downsides to this approach:

* A commit hook (and probably a pre-receive hook at the "blessed"
  repository) is needed to make sure that no commits are allowed to a
  file with foreign idents without also flipping the attribute from
  -ident to +ident

* Either a full enumeration of all files with foreign idents, or
  of all files with native idents, is needed in .gitattributes, so
  that a file can be either added to or removed from this list when
  making the first "native" commit to it

So it's possible, albeit slightly less practical, to do without this
feature.  If the decision to include it is reversed, 07814d90095b65
should probably be reverted.


  // Marcus

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] convert: fix normalization of foreign idents
  2010-09-12 21:01               ` Marcus Comstedt
@ 2010-09-12 21:44                 ` Junio C Hamano
  2010-09-12 22:06                   ` Marcus Comstedt
  0 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2010-09-12 21:44 UTC (permalink / raw)
  To: Marcus Comstedt; +Cc: git

Marcus Comstedt <marcus@mc.pp.se> writes:

>> It also smells wrong that this "sometimes we convert, sometimes we don't"
>> is a special case for "$Id$" and for no other conversion.  Why don't
>> smudge/clean filter or CRLF conversion have the same issue that can be
>> solved with the same approach as this patch takes?
>
> I gather that this is because nobody has come up with a use case
> for smudge/clean or CRLF where a (pervasive) non-normalized
> representation in the repository makes sense.

Think a bit more about what you just wrote means.

Imagine there isn't any "$Id$" (or "$ident$" as it was originally known)
expansion in git.  You can implement it easily using a smudge/clean pair,
and the smudge and clean should be conditionally applied in the codepath
you touched using exactly the same logic as your patch uses, no?

That is what I meant.  It smells wrong to make this "sometime we do,
sometimes we don't" as a special case for "$Id$".  Specifically, the
parameter name "normalize_foreign_ident" feels wrong; the concept that the
parameter tries to convey covers much wider than just "foreign ident", no?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 1/2] convert: fix normalization of foreign idents
  2010-09-12 21:44                 ` Junio C Hamano
@ 2010-09-12 22:06                   ` Marcus Comstedt
  2010-09-13 22:00                     ` [PATCH] convert: generalize checksafe parameter Marcus Comstedt
  0 siblings, 1 reply; 19+ messages in thread
From: Marcus Comstedt @ 2010-09-12 22:06 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git


Junio C Hamano <gitster@pobox.com> writes:

> Imagine there isn't any "$Id$" (or "$ident$" as it was originally known)
> expansion in git.  You can implement it easily using a smudge/clean pair,

Sorry, but here I have to go off a little at a tangent.  Yes, you
could implement the ident-expansion currently provided by git as a
smudge/clean pair.  However, you could not implement an ident which
actually puts something more useful (such as the id of the commit
where the file was last modified) into the id string by using
smudge/clean.  I know, because I tried to do just that.  ;-)
The reason:  smudge/clean do not get the pathname, so they are not
able to query any information about the file from the repository.
I might submit a patch adressing this issue later.


> and the smudge and clean should be conditionally applied in the codepath
> you touched using exactly the same logic as your patch uses, no?
>
> That is what I meant.  It smells wrong to make this "sometime we do,
> sometimes we don't" as a special case for "$Id$".  Specifically, the
> parameter name "normalize_foreign_ident" feels wrong; the concept that the
> parameter tries to convey covers much wider than just "foreign ident", no?

Ok, I think I follow where you are going.  _If_ we say that clean
(and smudge?) should be able to run in different "modes", with
cleaning for a commit being such an mode, then this ought to be
triggered by the same parameter, yes.  The parameter name describes
what the parameter does now, but not necessarily what it would do in a
possible future where such new concepts as modal clean scripts have
been introduced.

Generally, I'm kind of wondering if the parameters of convert_to_git
wouldn't be better off just specifying a mode (like the, perhaps also
slightly mis-named, write_object paremeter to index_mem) rather than
trying to micro-manage specific features like they have before.  Was
that what you had in mind?


  // Marcus

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] convert: generalize checksafe parameter
  2010-09-12 22:06                   ` Marcus Comstedt
@ 2010-09-13 22:00                     ` Marcus Comstedt
  0 siblings, 0 replies; 19+ messages in thread
From: Marcus Comstedt @ 2010-09-13 22:00 UTC (permalink / raw)
  To: git; +Cc: gitster

The convert_to_git() function used to have a checksafe parameter,
which could be used to prevent safe_crlf checks by passing 0
instead of the value of the global variable safe_crlf.

Since preventing checks is a wider concept than just disabling
safe_crlf, generalize the parameter so that it can be used for other
types of checks as well.

Signed-off-by: Marcus Comstedt <marcus@mc.pp.se>
---

Ok, here's a stab at generalizing the parameters which affect
the behavior of convert_to_git(), starting with the already
existing one.  I'd originally intended to combine it with the
new one, but I couldn't seem to find a correspondence between
the intended use of the normalized data and whether checks
are allowed or not (for example, diff allows checks but blame
does not), so I'm keeping that as a separate facet.

I could add this to the current patch series, or rebase that
against this.

 builtin/apply.c |    2 +-
 builtin/blame.c |    2 +-
 cache.h         |    7 ++++++-
 combine-diff.c  |    2 +-
 convert.c       |    7 ++++---
 diff.c          |    2 +-
 sha1_file.c     |    2 +-
 7 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index 23c18c5..638e7be 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -1932,7 +1932,7 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
 	case S_IFREG:
 		if (strbuf_read_file(buf, path, st->st_size) != st->st_size)
 			return error("unable to open or read %s", path);
-		convert_to_git(path, buf->buf, buf->len, buf, 0);
+		convert_to_git(path, buf->buf, buf->len, buf, CHECKS_DISALLOWED);
 		return 0;
 	default:
 		return -1;
diff --git a/builtin/blame.c b/builtin/blame.c
index 1015354..850e165 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -2095,7 +2095,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt,
 		if (strbuf_read(&buf, 0, 0) < 0)
 			die_errno("failed to read from stdin");
 	}
-	convert_to_git(path, buf.buf, buf.len, &buf, 0);
+	convert_to_git(path, buf.buf, buf.len, &buf, CHECKS_DISALLOWED);
 	origin->file.ptr = buf.buf;
 	origin->file.size = buf.len;
 	pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1);
diff --git a/cache.h b/cache.h
index 2ef2fa3..250abc1 100644
--- a/cache.h
+++ b/cache.h
@@ -581,6 +581,11 @@ enum eol {
 
 extern enum eol eol;
 
+enum allow_checks {
+	CHECKS_DISALLOWED = 0,
+	CHECKS_ALLOWED = 1,
+};
+
 enum branch_track {
 	BRANCH_TRACK_UNSPECIFIED = -1,
 	BRANCH_TRACK_NEVER = 0,
@@ -1059,7 +1064,7 @@ extern void trace_argv_printf(const char **argv, const char *format, ...);
 /* convert.c */
 /* returns 1 if *dst was used */
 extern int convert_to_git(const char *path, const char *src, size_t len,
-                          struct strbuf *dst, enum safe_crlf checksafe);
+                          struct strbuf *dst, enum allow_checks checksallowed);
 extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
 extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst);
 
diff --git a/combine-diff.c b/combine-diff.c
index 655fa89..c7f132d 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -758,7 +758,7 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,
 			if (is_file) {
 				struct strbuf buf = STRBUF_INIT;
 
-				if (convert_to_git(elem->path, result, len, &buf, safe_crlf)) {
+				if (convert_to_git(elem->path, result, len, &buf, CHECKS_ALLOWED)) {
 					free(result);
 					result = strbuf_detach(&buf, &len);
 					result_size = len;
diff --git a/convert.c b/convert.c
index 01de9a8..8050c24 100644
--- a/convert.c
+++ b/convert.c
@@ -706,7 +706,7 @@ static enum action determine_action(enum action text_attr, enum eol eol_attr)
 }
 
 int convert_to_git(const char *path, const char *src, size_t len,
-                   struct strbuf *dst, enum safe_crlf checksafe)
+                   struct strbuf *dst, enum allow_checks checksallowed)
 {
 	struct git_attr_check check[5];
 	enum action action = CRLF_GUESS;
@@ -733,7 +733,8 @@ int convert_to_git(const char *path, const char *src, size_t len,
 		len = dst->len;
 	}
 	action = determine_action(action, eol_attr);
-	ret |= crlf_to_git(path, src, len, dst, action, checksafe);
+	ret |= crlf_to_git(path, src, len, dst, action,
+			   (checksallowed? safe_crlf : 0));
 	if (ret) {
 		src = dst->buf;
 		len = dst->len;
@@ -796,5 +797,5 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
 		src = dst->buf;
 		len = dst->len;
 	}
-	return ret | convert_to_git(path, src, len, dst, 0);
+	return ret | convert_to_git(path, src, len, dst, CHECKS_DISALLOWED);
 }
diff --git a/diff.c b/diff.c
index 9a5c77c..ed74f6b 100644
--- a/diff.c
+++ b/diff.c
@@ -2375,7 +2375,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
 		/*
 		 * Convert from working tree format to canonical git format
 		 */
-		if (convert_to_git(s->path, s->data, s->size, &buf, safe_crlf)) {
+		if (convert_to_git(s->path, s->data, s->size, &buf, CHECKS_ALLOWED)) {
 			size_t size = 0;
 			munmap(s->data, s->size);
 			s->should_munmap = 0;
diff --git a/sha1_file.c b/sha1_file.c
index 0cd9435..13624a6 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2434,7 +2434,7 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 	if ((type == OBJ_BLOB) && path) {
 		struct strbuf nbuf = STRBUF_INIT;
 		if (convert_to_git(path, buf, size, &nbuf,
-		                   write_object ? safe_crlf : 0)) {
+		                   write_object ? CHECKS_ALLOWED : CHECKS_DISALLOWED)) {
 			buf = strbuf_detach(&nbuf, &size);
 			re_allocated = 1;
 		}
-- 
1.7.2

^ permalink raw reply related	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2010-09-13 22:34 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-08-23 21:30 Fix for normalization of foreign idents Marcus Comstedt
2010-08-23 21:30 ` [PATCH] convert: fix " Marcus Comstedt
2010-08-23 21:35 ` Fix for " Jonathan Nieder
2010-08-23 21:44   ` Marcus Comstedt
2010-08-23 22:33     ` Jonathan Nieder
2010-08-23 23:46       ` Junio C Hamano
2010-08-24  7:23         ` Marcus Comstedt
2010-09-06  9:42     ` Marcus Comstedt
2010-09-06 21:07       ` Jonathan Nieder
2010-09-07 19:37         ` Marcus Comstedt
2010-08-23  7:05           ` [PATCH v2 1/2] convert: fix " Marcus Comstedt
2010-09-10  0:26             ` Junio C Hamano
2010-09-12 21:01               ` Marcus Comstedt
2010-09-12 21:44                 ` Junio C Hamano
2010-09-12 22:06                   ` Marcus Comstedt
2010-09-13 22:00                     ` [PATCH] convert: generalize checksafe parameter Marcus Comstedt
2010-09-07 19:16           ` [PATCH v2 2/2] t0021: test checkout and commit of foreign idents Marcus Comstedt
2010-09-07 20:00           ` [PATCH v2 0/2] fix normalization of foreign idents (now with test cases) Marcus Comstedt
2010-09-08  4:32           ` Fix for normalization of foreign idents Jonathan Nieder

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.