All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5 0/3] cat-file --follow-symlinks
@ 2015-05-11 22:50 dturner
  2015-05-11 22:50 ` [PATCH v5 1/3] tree-walk: learn get_tree_entry_follow_symlinks dturner
                   ` (2 more replies)
  0 siblings, 3 replies; 20+ messages in thread
From: dturner @ 2015-05-11 22:50 UTC (permalink / raw)
  To: git

This version improves the documentation for git cat-file
--follow-symlinks.  Only Patch 3/3 is changed.

Thanks to Junio Hamano for suggestions.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v5 1/3] tree-walk: learn get_tree_entry_follow_symlinks
  2015-05-11 22:50 [PATCH v5 0/3] cat-file --follow-symlinks dturner
@ 2015-05-11 22:50 ` dturner
  2015-05-12 17:29   ` Johannes Sixt
  2015-05-11 22:50 ` [PATCH v5 2/3] sha1_name: get_sha1_with_context learns to follow symlinks dturner
  2015-05-11 22:50 ` [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch dturner
  2 siblings, 1 reply; 20+ messages in thread
From: dturner @ 2015-05-11 22:50 UTC (permalink / raw)
  To: git; +Cc: David Turner

From: David Turner <dturner@twitter.com>

Add a new function, get_tree_entry_follow_symlinks, to tree-walk.[ch].
The function is not yet used.  It will be used to implement git
cat-file --batch --follow-symlinks.

The function locates an object by path, following symlinks in the
repository.  If the symlinks lead outside the repository, the function
reports this to the caller.

Signed-off-by: David Turner <dturner@twitter.com>
---
 tree-walk.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tree-walk.h |   2 +
 2 files changed, 195 insertions(+)

diff --git a/tree-walk.c b/tree-walk.c
index 5dd9a71..3d088b6 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -415,6 +415,12 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 	return error;
 }
 
+struct dir_state {
+	void *tree;
+	unsigned long size;
+	unsigned char sha1[20];
+};
+
 static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode)
 {
 	int namelen = strlen(name);
@@ -478,6 +484,193 @@ int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned ch
 	return retval;
 }
 
+/*
+ * This is Linux's built-in max for the number of symlinks to follow.
+ * That limit, of course, does not affect git, but it's a reasonable
+ * choice.
+ */
+#define GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS 40
+
+/**
+ * Find a tree entry by following symlinks in tree_sha (which is
+ * assumed to be the root of the repository).  In the event that a
+ * symlink points outside the repository (e.g. a link to /foo or a
+ * root-level link to ../foo), the portion of the link which is
+ * outside the repository will be returned in result_path, and *mode
+ * will be set to 0.  It is assumed that result_path is uninitialized.
+ * If there are no symlinks, or the end result of the symlink chain
+ * points to an object inside the repository, result will be filled in
+ * with the sha1 of the found object, and *mode will hold the mode of
+ * the object.  If there is a symlink loop, -1 will be returned.
+ */
+int get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, struct strbuf *result_path, unsigned *mode)
+{
+	int retval = -1;
+	struct dir_state *parents = NULL;
+	size_t parents_alloc = 0;
+	ssize_t parents_nr = 0;
+	unsigned char current_tree_sha1[20];
+	struct strbuf namebuf = STRBUF_INIT;
+	struct tree_desc t = {0};
+	int follows_remaining = GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS;
+	int i;
+
+	result_path->buf = 0;
+	result_path->alloc = 0;
+	result_path->len = 0;
+	strbuf_addstr(&namebuf, name);
+	hashcpy(current_tree_sha1, tree_sha1);
+
+	while (1) {
+		int find_result;
+		char *first_slash;
+		char *remainder = NULL;
+
+		if (!t.buffer) {
+			void *tree;
+			unsigned char root[20];
+			unsigned long size;
+			tree = read_object_with_reference(current_tree_sha1,
+							  tree_type, &size,
+							  root);
+			if (!tree)
+				goto done;
+
+			ALLOC_GROW(parents, parents_nr + 1, parents_alloc);
+			parents[parents_nr].tree = tree;
+			parents[parents_nr].size = size;
+			hashcpy(parents[parents_nr].sha1, root);
+			parents_nr++;
+
+			if (namebuf.buf[0] == '\0') {
+				hashcpy(result, root);
+				retval = 0;
+				goto done;
+			}
+
+			if (!size)
+				goto done;
+
+			/* descend */
+			init_tree_desc(&t, tree, size);
+		}
+
+		/* Handle symlinks to e.g. a//b by removing leading slashes */
+		while (namebuf.buf[0] == '/') {
+			strbuf_remove(&namebuf, 0, 1);
+		}
+
+		/* Split namebuf into a first component and a remainder */
+		if ((first_slash = strchr(namebuf.buf, '/'))) {
+			*first_slash = 0;
+			remainder = first_slash + 1;
+		}
+
+		if (!strcmp(namebuf.buf, "..")) {
+			struct dir_state *parent;
+			/*
+			 * We could end up with .. in the namebuf if it
+			 * appears in a symlink.
+			 */
+
+			if (parents_nr == 1) {
+				if (remainder)
+					*first_slash = '/';
+				strbuf_add(result_path, namebuf.buf,
+					   namebuf.len);
+				*mode = 0;
+				retval = 0;
+				goto done;
+			}
+			parent = &parents[parents_nr - 1];
+			free(parent->tree);
+			parents_nr--;
+			parent = &parents[parents_nr - 1];
+			init_tree_desc(&t, parent->tree, parent->size);
+			strbuf_remove(&namebuf, 0, remainder ? 3 : 2);
+			continue;
+		}
+
+		/* We could end up here via a symlink to dir/.. */
+		if (namebuf.buf[0] == '\0') {
+			hashcpy(result, parents[parents_nr - 1].sha1);
+			retval = 0;
+			goto done;
+		}
+
+		/* Look up the first (or only) path component in the tree. */
+		find_result = find_tree_entry(&t, namebuf.buf,
+					      current_tree_sha1, mode);
+		if (find_result) {
+			retval = find_result;
+			goto done;
+		}
+
+		if (S_ISDIR(*mode)) {
+			if (!remainder) {
+				hashcpy(result, current_tree_sha1);
+				retval = 0;
+				goto done;
+			}
+			/* Descend the tree */
+			t.buffer = NULL;
+			strbuf_remove(&namebuf, 0,
+				      1 + first_slash - namebuf.buf);
+		} else if (S_ISREG(*mode)) {
+			if (!remainder) {
+				hashcpy(result, current_tree_sha1);
+				retval = 0;
+			}
+			goto done;
+		} else if (S_ISLNK(*mode)) {
+			/* Follow a symlink */
+			size_t link_len, len;
+			char *contents, *contents_start;
+			struct dir_state *parent;
+			enum object_type type;
+
+			if (follows_remaining-- == 0)
+				/* Too many symlinks followed */
+				goto done;
+
+			contents = read_sha1_file(current_tree_sha1, &type,
+						  &link_len);
+
+			if (!contents)
+				goto done;
+
+			if (contents[0] == '/') {
+				strbuf_addstr(result_path, contents);
+				*mode = 0;
+				retval = 0;
+				goto done;
+			}
+
+			if (remainder)
+				len = first_slash - namebuf.buf;
+			else
+				len = namebuf.len;
+
+			contents_start = contents;
+
+			parent = &parents[parents_nr - 1];
+			init_tree_desc(&t, parent->tree, parent->size);
+			strbuf_splice(&namebuf, 0, len,
+				      contents_start, link_len);
+			if (remainder)
+				namebuf.buf[link_len] = '/';
+			free(contents);
+		}
+	}
+done:
+	for (i = 0; i < parents_nr; i++)
+		free(parents[i].tree);
+	free(parents);
+
+	strbuf_release(&namebuf);
+	return retval;
+}
+
 static int match_entry(const struct pathspec_item *item,
 		       const struct name_entry *entry, int pathlen,
 		       const char *match, int matchlen,
diff --git a/tree-walk.h b/tree-walk.h
index ae7fb3a..d1670c6 100644
--- a/tree-walk.h
+++ b/tree-walk.h
@@ -40,6 +40,8 @@ struct traverse_info;
 typedef int (*traverse_callback_t)(int n, unsigned long mask, unsigned long dirmask, struct name_entry *entry, struct traverse_info *);
 int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info);
 
+int get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, struct strbuf *result_path, unsigned *mode);
+
 struct traverse_info {
 	struct traverse_info *prev;
 	struct name_entry name;
-- 
2.0.4.315.gad8727a-twtrsrc

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v5 2/3] sha1_name: get_sha1_with_context learns to follow symlinks
  2015-05-11 22:50 [PATCH v5 0/3] cat-file --follow-symlinks dturner
  2015-05-11 22:50 ` [PATCH v5 1/3] tree-walk: learn get_tree_entry_follow_symlinks dturner
@ 2015-05-11 22:50 ` dturner
  2015-05-11 22:50 ` [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch dturner
  2 siblings, 0 replies; 20+ messages in thread
From: dturner @ 2015-05-11 22:50 UTC (permalink / raw)
  To: git; +Cc: David Turner

From: David Turner <dturner@twitter.com>

Wire up get_sha1_with_context to call get_tree_entry_follow_symlinks
when GET_SHA1_FOLLOW_SYMLINKS is passed in flags. G_S_FOLLOW_SYMLINKS
is incompatible with G_S_ONLY_TO_DIE because the diagnosis
that ONLY_TO_DIE triggers does not at present consider symlinks, and
it would be a significant amount of additional code to allow it to
do so.

Signed-off-by: David Turner <dturner@twitter.com>
---
 cache.h     | 20 +++++++++++++-------
 sha1_name.c | 20 +++++++++++++++-----
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/cache.h b/cache.h
index 3d3244b..65505d1 100644
--- a/cache.h
+++ b/cache.h
@@ -922,15 +922,21 @@ struct object_context {
 	unsigned char tree[20];
 	char path[PATH_MAX];
 	unsigned mode;
+	/*
+	 * symlink_path is only used by get_tree_entry_follow_symlinks,
+	 * and only for symlinks that point outside the repository.
+	 */
+	struct strbuf symlink_path;
 };
 
-#define GET_SHA1_QUIETLY        01
-#define GET_SHA1_COMMIT         02
-#define GET_SHA1_COMMITTISH     04
-#define GET_SHA1_TREE          010
-#define GET_SHA1_TREEISH       020
-#define GET_SHA1_BLOB	       040
-#define GET_SHA1_ONLY_TO_DIE 04000
+#define GET_SHA1_QUIETLY           01
+#define GET_SHA1_COMMIT            02
+#define GET_SHA1_COMMITTISH        04
+#define GET_SHA1_TREE             010
+#define GET_SHA1_TREEISH          020
+#define GET_SHA1_BLOB             040
+#define GET_SHA1_FOLLOW_SYMLINKS 0100
+#define GET_SHA1_ONLY_TO_DIE    04000
 
 extern int get_sha1(const char *str, unsigned char *sha1);
 extern int get_sha1_commit(const char *str, unsigned char *sha1);
diff --git a/sha1_name.c b/sha1_name.c
index 6d10f05..0c26515 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -1434,11 +1434,19 @@ static int get_sha1_with_context_1(const char *name,
 			new_filename = resolve_relative_path(filename);
 			if (new_filename)
 				filename = new_filename;
-			ret = get_tree_entry(tree_sha1, filename, sha1, &oc->mode);
-			if (ret && only_to_die) {
-				diagnose_invalid_sha1_path(prefix, filename,
-							   tree_sha1,
-							   name, len);
+			if (flags & GET_SHA1_FOLLOW_SYMLINKS) {
+				ret = get_tree_entry_follow_symlinks(tree_sha1,
+					filename, sha1, &oc->symlink_path,
+					&oc->mode);
+			} else {
+				ret = get_tree_entry(tree_sha1, filename,
+						     sha1, &oc->mode);
+				if (ret && only_to_die) {
+					diagnose_invalid_sha1_path(prefix,
+								   filename,
+								   tree_sha1,
+								   name, len);
+				}
 			}
 			hashcpy(oc->tree, tree_sha1);
 			strlcpy(oc->path, filename, sizeof(oc->path));
@@ -1469,5 +1477,7 @@ void maybe_die_on_misspelt_object_name(const char *name, const char *prefix)
 
 int get_sha1_with_context(const char *str, unsigned flags, unsigned char *sha1, struct object_context *orc)
 {
+	if (flags & GET_SHA1_FOLLOW_SYMLINKS && flags & GET_SHA1_ONLY_TO_DIE)
+		die("BUG: incompatible flags for get_sha1_with_context");
 	return get_sha1_with_context_1(str, flags, NULL, sha1, orc);
 }
-- 
2.0.4.315.gad8727a-twtrsrc

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-11 22:50 [PATCH v5 0/3] cat-file --follow-symlinks dturner
  2015-05-11 22:50 ` [PATCH v5 1/3] tree-walk: learn get_tree_entry_follow_symlinks dturner
  2015-05-11 22:50 ` [PATCH v5 2/3] sha1_name: get_sha1_with_context learns to follow symlinks dturner
@ 2015-05-11 22:50 ` dturner
  2015-05-12 17:34   ` Johannes Sixt
  2015-05-12 18:07   ` Junio C Hamano
  2 siblings, 2 replies; 20+ messages in thread
From: dturner @ 2015-05-11 22:50 UTC (permalink / raw)
  To: git; +Cc: David Turner

From: David Turner <dturner@twitter.com>

This wires the in-repo-symlink following code through to the cat-file
builtin.  In the event of an out-of-repo link, cat-file will print
the link in a new format.

Signed-off-by: David Turner <dturner@twitter.com>
---
 Documentation/git-cat-file.txt |  54 +++++++++++-
 builtin/cat-file.c             |  24 +++++-
 t/t1006-cat-file.sh            | 189 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 262 insertions(+), 5 deletions(-)

diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index f6a16f4..9bdfced 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -10,7 +10,7 @@ SYNOPSIS
 --------
 [verse]
 'git cat-file' (-t | -s | -e | -p | <type> | --textconv ) <object>
-'git cat-file' (--batch | --batch-check) < <list-of-objects>
+'git cat-file' (--batch | --batch-check) [--follow-symlinks] < <list-of-objects>
 
 DESCRIPTION
 -----------
@@ -69,6 +69,46 @@ OPTIONS
 	not be combined with any other options or arguments.  See the
 	section `BATCH OUTPUT` below for details.
 
+--follow-symlinks::
+	Follow symlinks inside the repository when requesting objects
+	with extended SHA-1 expressions of the form tree-ish:path-in-tree.
+	Instead of providing output about the link itself, provide output
+	about the linked-to object.  This option requires --batch or
+	--batch-check.  In the event of a symlink loop (or more than
+	40 symlinks in a symlink resolution chain), the file will be
+	treated as missing.  If a symlink points outside the tree-ish
+	(e.g. a link to /foo or a root-level link to ../foo), the
+	portion of the link which is outside the tree will be printed.
+	Follow-symlinks will be silently turned off if <object>
+	specifies an object in the index rather than one in the object
+	database.
+
+	For example, consider the a git repository containing:
+	f: a file containing "hello\n"
+	link: a symlink to f
+	dir/link: a symlink to ../f
+	plink: a symlink to ../f
+	alink: a symlink to /etc/passwd
+
+	echo HEAD:f |git cat-file --batch --follow-symlinks would print
+	ce013625030ba8dba906f756967f9e9ca394464a blob 6
+
+	echo HEAD:link |git cat-file --batch --follow-symlinks would print
+	the same thing, as would HEAD:dir/link.
+	Without follow-symlinks, these would print data about the
+	symlink itself.  In the case of HEAD:link, you would see
+	4d1ae35ba2c8ec712fa2a379db44ad639ca277bd blob 1
+
+	Both plink and alink point outside the tree, so they would
+	respectively print:
+	symlink 4
+	../f
+
+	symlink 11
+	/etc/passwd
+
+
+
 OUTPUT
 ------
 If '-t' is specified, one of the <type>.
@@ -148,6 +188,18 @@ the repository, then `cat-file` will ignore any custom format and print:
 <object> SP missing LF
 ------------
 
+If --follow-symlinks is used, and a symlink in the repository points
+outside the repository, then `cat-file` will ignore any custom format
+and print:
+
+------------
+symlink SP <size> LF <symlink> LF
+------------
+
+The symlink will either be absolute (beginning with a /), or relative
+to the tree root.  For instance, if dir/link points to ../../foo, then
+<symlink> will be ../foo.  <size> is the size of the symlink in bytes.
+
 
 CAVEATS
 -------
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index df99df4..6bcf9a0 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -224,6 +224,7 @@ static void print_object_or_die(int fd, struct expand_data *data)
 
 struct batch_options {
 	int enabled;
+	int follow_symlinks;
 	int print_contents;
 	const char *format;
 };
@@ -232,16 +233,25 @@ static int batch_one_object(const char *obj_name, struct batch_options *opt,
 			    struct expand_data *data)
 {
 	struct strbuf buf = STRBUF_INIT;
+	struct object_context ctx;
+	int flags = opt->follow_symlinks ? GET_SHA1_FOLLOW_SYMLINKS : 0;
 
 	if (!obj_name)
 	   return 1;
 
-	if (get_sha1(obj_name, data->sha1)) {
+	if (get_sha1_with_context(obj_name, flags, data->sha1, &ctx)) {
 		printf("%s missing\n", obj_name);
 		fflush(stdout);
 		return 0;
 	}
 
+	if (ctx.mode == 0) {
+		printf("symlink %"PRIuMAX"\n%s\n",
+		       (uintmax_t)ctx.symlink_path.len,
+		       ctx.symlink_path.buf);
+		return 0;
+	}
+
 	if (sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) {
 		printf("%s missing\n", obj_name);
 		fflush(stdout);
@@ -342,9 +352,8 @@ static int batch_option_callback(const struct option *opt,
 {
 	struct batch_options *bo = opt->value;
 
-	if (unset) {
-		memset(bo, 0, sizeof(*bo));
-		return 0;
+	if (bo->enabled) {
+		return 1;
 	}
 
 	bo->enabled = 1;
@@ -369,6 +378,9 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
 		OPT_SET_INT(0, "textconv", &opt,
 			    N_("for blob objects, run textconv on object's content"), 'c'),
+		OPT_SET_INT(0, "follow-symlinks", &batch.follow_symlinks,
+			N_("follow in-repo symlinks; report out-of-repo symlinks (requires --batch or --batch-check)"),
+			    1),
 		{ OPTION_CALLBACK, 0, "batch", &batch, "format",
 			N_("show info and content of objects fed from the standard input"),
 			PARSE_OPT_OPTARG, batch_option_callback },
@@ -402,6 +414,10 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		usage_with_options(cat_file_usage, options);
 	}
 
+	if (batch.follow_symlinks && !batch.enabled) {
+		usage_with_options(cat_file_usage, options);
+	}
+
 	if (batch.enabled)
 		return batch_objects(&batch);
 
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index ab36b1e..500cfa9 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -189,6 +189,13 @@ do
     '
 done
 
+for opt in t s e p
+do
+    test_expect_success "Passing -$opt with --follow-symlinks fails" '
+	    test_must_fail git cat-file --follow-symlinks -$opt $hello_sha1
+	'
+done
+
 test_expect_success "--batch-check for a non-existent named object" '
     test "foobar42 missing
 foobar84 missing" = \
@@ -296,4 +303,186 @@ test_expect_success '%(deltabase) reports packed delta bases' '
 	}
 '
 
+# Tests for git cat-file --follow-symlinks
+test_expect_success 'prep for symlink tests' '
+	echo_without_newline "$hello_content" >morx &&
+	test_ln_s_add morx same-dir-link &&
+	test_ln_s_add ../fleem out-of-repo-link &&
+	test_ln_s_add .. out-of-repo-link-dir &&
+	test_ln_s_add same-dir-link link-to-link &&
+	test_ln_s_add nope broken-same-dir-link &&
+	mkdir dir &&
+	test_ln_s_add ../morx dir/parent-dir-link &&
+	test_ln_s_add .. dir/link-dir &&
+	test_ln_s_add ../../escape dir/out-of-repo-link &&
+	test_ln_s_add ../.. dir/out-of-repo-link-dir &&
+	test_ln_s_add nope dir/broken-link-in-dir &&
+	mkdir dir/subdir &&
+	test_ln_s_add ../../morx dir/subdir/grandparent-dir-link &&
+	test_ln_s_add ../../../great-escape dir/subdir/out-of-repo-link &&
+	test_ln_s_add ../../.. dir/subdir/out-of-repo-link-dir &&
+	test_ln_s_add ../../../ dir/subdir/out-of-repo-link-dir-trailing &&
+	test_ln_s_add ../parent-dir-link dir/subdir/parent-dir-link-to-link &&
+	echo_without_newline "$hello_content" >dir/subdir/ind2 &&
+	echo_without_newline "$hello_content" >dir/ind1 &&
+	test_ln_s_add dir dirlink &&
+	test_ln_s_add dir/subdir subdirlink &&
+	test_ln_s_add subdir/ind2 dir/link-to-child &&
+	test_ln_s_add dir/link-to-child link-to-down-link &&
+	test_ln_s_add dir/.. up-down &&
+	test_ln_s_add dir/../ up-down-trailing &&
+	test_ln_s_add dir/../morx up-down-file &&
+	test_ln_s_add dir/../../morx up-up-down-file &&
+	test_ln_s_add subdirlink/../../morx up-two-down-file &&
+	test_ln_s_add loop1 loop2 &&
+	test_ln_s_add loop2 loop1 &&
+	git add morx dir/subdir/ind2 dir/ind1 &&
+	git commit -am "test"
+	echo $hello_sha1 blob $hello_size >found
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for non-links' '
+	echo HEAD:morx | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp found actual &&
+	echo HEAD:nope missing >expect &&
+	echo HEAD:nope | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for in-repo, same-dir links' '
+	echo HEAD:same-dir-link | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp found actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for broken in-repo, same-dir links' '
+	echo HEAD:broken-same-dir-link missing >expect &&
+	echo HEAD:broken-same-dir-link | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for same-dir links-to-links' '
+	echo HEAD:link-to-link | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp found actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for parent-dir links' '
+	echo HEAD:dir/parent-dir-link | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp found actual &&
+	echo HEAD:dir/parent-dir-link/nope missing >expect &&
+	echo HEAD:dir/parent-dir-link/nope | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for .. links' '
+	echo HEAD:dir/link-dir/nope missing >expect &&
+	echo HEAD:dir/link-dir/nope | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual &&
+	echo HEAD:dir/link-dir/morx | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp found actual &&
+	echo HEAD:dir/broken-link-in-dir missing >expect &&
+	echo HEAD:dir/broken-link-in-dir | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for ../.. links' '
+	echo HEAD:dir/subdir/grandparent-dir-link/nope missing >expect &&
+	echo HEAD:dir/subdir/grandparent-dir-link/nope | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual &&
+	echo HEAD:dir/subdir/grandparent-dir-link | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp found actual &&
+	echo HEAD:dir/subdir/parent-dir-link-to-link | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp found actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for dir/ links' '
+	echo HEAD:dirlink/morx missing >expect &&
+	echo HEAD:dirlink/morx | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual &&
+	echo $hello_sha1 blob $hello_size >expect &&
+	echo HEAD:dirlink/ind1 | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for dir/subdir links' '
+	echo HEAD:subdirlink/morx missing >expect &&
+	echo HEAD:subdirlink/morx | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual &&
+	echo HEAD:subdirlink/ind2 | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp found actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for dir ->subdir links' '
+	echo HEAD:dir/link-to-child/morx missing >expect &&
+	echo HEAD:dir/link-to-child/morx | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual &&
+	echo HEAD:dir/link-to-child | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp found actual &&
+	echo HEAD:link-to-down-link | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp found actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for out-of-repo symlinks' '
+	echo symlink 8 >expect &&
+	echo ../fleem >>expect &&
+	echo HEAD:out-of-repo-link | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual &&
+	echo symlink 2 >expect &&
+	echo .. >>expect &&
+	echo HEAD:out-of-repo-link-dir | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for out-of-repo symlinks in dirs' '
+	echo symlink 9 >expect &&
+	echo ../escape >>expect &&
+	echo HEAD:dir/out-of-repo-link | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual &&
+	echo symlink 2 >expect &&
+	echo .. >>expect &&
+	echo HEAD:dir/out-of-repo-link-dir | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for out-of-repo symlinks in subdirs' '
+	echo symlink 15 >expect &&
+	echo ../great-escape >>expect &&
+	echo HEAD:dir/subdir/out-of-repo-link | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual &&
+	echo symlink 2 >expect &&
+	echo .. >>expect &&
+	echo HEAD:dir/subdir/out-of-repo-link-dir | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual &&
+	echo symlink 3 >expect &&
+	echo ../ >>expect &&
+	echo HEAD:dir/subdir/out-of-repo-link-dir-trailing | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for symlinks with internal ..' '
+	echo HEAD: | git cat-file --batch-check >expect &&
+	echo HEAD:up-down | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual &&
+	echo HEAD:up-down-trailing | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual &&
+	echo HEAD:up-down-file | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp found actual &&
+	echo symlink 7 >expect &&
+	echo ../morx >>expect &&
+	echo HEAD:up-up-down-file | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual &&
+	echo HEAD:up-two-down-file | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp found actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlink breaks loops' '
+	echo HEAD:loop1 missing >expect &&
+	echo HEAD:loop1 | git cat-file --batch-check --follow-symlinks >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch --follow-symlink returns correct sha and mode' '
+	echo HEAD:morx | git cat-file --batch >expect &&
+	echo HEAD:morx | git cat-file --batch --follow-symlinks >actual &&
+	test_cmp expect actual
+'
 test_done
-- 
2.0.4.315.gad8727a-twtrsrc

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 1/3] tree-walk: learn get_tree_entry_follow_symlinks
  2015-05-11 22:50 ` [PATCH v5 1/3] tree-walk: learn get_tree_entry_follow_symlinks dturner
@ 2015-05-12 17:29   ` Johannes Sixt
  0 siblings, 0 replies; 20+ messages in thread
From: Johannes Sixt @ 2015-05-12 17:29 UTC (permalink / raw)
  To: dturner, git; +Cc: David Turner

Am 12.05.2015 um 00:50 schrieb dturner@twopensource.com:
> +		} else if (S_ISLNK(*mode)) {
> +			/* Follow a symlink */
> +			size_t link_len, len;
> +			char *contents, *contents_start;
> +			struct dir_state *parent;
> +			enum object_type type;
> +
> +			if (follows_remaining-- == 0)
> +				/* Too many symlinks followed */
> +				goto done;
> +
> +			contents = read_sha1_file(current_tree_sha1, &type,
> +						  &link_len);

In this line, I get:

tree-walk.c: In function 'get_tree_entry_follow_symlinks':
tree-walk.c:637: warning: passing argument 3 of 'read_sha1_file' from 
incompatible pointer type
cache.h:885: note: expected 'long unsigned int *' but argument is of 
type 'size_t *'

-- Hannes

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-11 22:50 ` [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch dturner
@ 2015-05-12 17:34   ` Johannes Sixt
  2015-05-12 18:07   ` Junio C Hamano
  1 sibling, 0 replies; 20+ messages in thread
From: Johannes Sixt @ 2015-05-12 17:34 UTC (permalink / raw)
  To: dturner, git; +Cc: David Turner

Am 12.05.2015 um 00:50 schrieb dturner@twopensource.com:
> +# Tests for git cat-file --follow-symlinks
> +test_expect_success 'prep for symlink tests' '
> +	echo_without_newline "$hello_content" >morx &&
> +	test_ln_s_add morx same-dir-link &&
> +	test_ln_s_add ../fleem out-of-repo-link &&
> +	test_ln_s_add .. out-of-repo-link-dir &&
> +	test_ln_s_add same-dir-link link-to-link &&
> +	test_ln_s_add nope broken-same-dir-link &&
> +	mkdir dir &&
> +	test_ln_s_add ../morx dir/parent-dir-link &&
> +	test_ln_s_add .. dir/link-dir &&
> +	test_ln_s_add ../../escape dir/out-of-repo-link &&
> +	test_ln_s_add ../.. dir/out-of-repo-link-dir &&
> +	test_ln_s_add nope dir/broken-link-in-dir &&
> +	mkdir dir/subdir &&
> +	test_ln_s_add ../../morx dir/subdir/grandparent-dir-link &&
> +	test_ln_s_add ../../../great-escape dir/subdir/out-of-repo-link &&
> +	test_ln_s_add ../../.. dir/subdir/out-of-repo-link-dir &&
> +	test_ln_s_add ../../../ dir/subdir/out-of-repo-link-dir-trailing &&
> +	test_ln_s_add ../parent-dir-link dir/subdir/parent-dir-link-to-link &&
> +	echo_without_newline "$hello_content" >dir/subdir/ind2 &&
> +	echo_without_newline "$hello_content" >dir/ind1 &&
> +	test_ln_s_add dir dirlink &&
> +	test_ln_s_add dir/subdir subdirlink &&
> +	test_ln_s_add subdir/ind2 dir/link-to-child &&
> +	test_ln_s_add dir/link-to-child link-to-down-link &&
> +	test_ln_s_add dir/.. up-down &&
> +	test_ln_s_add dir/../ up-down-trailing &&
> +	test_ln_s_add dir/../morx up-down-file &&
> +	test_ln_s_add dir/../../morx up-up-down-file &&
> +	test_ln_s_add subdirlink/../../morx up-two-down-file &&
> +	test_ln_s_add loop1 loop2 &&
> +	test_ln_s_add loop2 loop1 &&
> +	git add morx dir/subdir/ind2 dir/ind1 &&
> +	git commit -am "test"

Broken && chain.

> +	echo $hello_sha1 blob $hello_size >found
> +'

-- Hannes

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-11 22:50 ` [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch dturner
  2015-05-12 17:34   ` Johannes Sixt
@ 2015-05-12 18:07   ` Junio C Hamano
  2015-05-12 18:36     ` David Turner
  1 sibling, 1 reply; 20+ messages in thread
From: Junio C Hamano @ 2015-05-12 18:07 UTC (permalink / raw)
  To: dturner; +Cc: git, David Turner

dturner@twopensource.com writes:

> +--follow-symlinks::
> +	Follow symlinks inside the repository when requesting objects
> +	with extended SHA-1 expressions of the form tree-ish:path-in-tree.
> +	Instead of providing output about the link itself, provide output
> +	about the linked-to object.  This option requires --batch or
> +	--batch-check.  In the event of a symlink loop (or more than
> +	40 symlinks in a symlink resolution chain), the file will be
> +	treated as missing.  If a symlink points outside the tree-ish
> +	(e.g. a link to /foo or a root-level link to ../foo), the
> +	portion of the link which is outside the tree will be printed.
> +	Follow-symlinks will be silently turned off if <object>
> +	specifies an object in the index rather than one in the object
> +	database.
> +
> +	For example, consider the a git repository containing:
> +	f: a file containing "hello\n"
> +	link: a symlink to f
> +	dir/link: a symlink to ../f
> +	plink: a symlink to ../f
> +	alink: a symlink to /etc/passwd
> +
> +	echo HEAD:f |git cat-file --batch --follow-symlinks would print
> +	ce013625030ba8dba906f756967f9e9ca394464a blob 6
> +
> +	echo HEAD:link |git cat-file --batch --follow-symlinks would print
> +	the same thing, as would HEAD:dir/link.
> +	Without follow-symlinks, these would print data about the
> +	symlink itself.  In the case of HEAD:link, you would see
> +	4d1ae35ba2c8ec712fa2a379db44ad639ca277bd blob 1
> +
> +	Both plink and alink point outside the tree, so they would
> +	respectively print:
> +	symlink 4
> +	../f
> +
> +	symlink 11
> +	/etc/passwd
> +
> +
> +

A few points I noticed:

 * It is not clear that this is (currently) only for --batch and
   --batch-check until you read four lines into the description.

   Perhaps start the description like this instead?

   --follow-symlinks::
           When answering `--batch` or `--batch-check` request,
           follow symlinks inside the repository when requesting objects
           with extended SHA-1 expressions of the form tree-ish:path-in-tree.

   Also I'd lose the "This option requires ..." sentence in the middle
   (I'll come back to the reason why later).

 * Is it fundamental that this is only for --batch family, or is it
   just lack of need by the current implementor and implementation?
   "git cat-file --follow-symlinks blob :RelNotes" does not sound
   a nonsense request to me.

 * I am not sure if HEAD:link that points at HEAD:link should be
   reported as "missing".  It may be better to report the original
   without any dereferencing just like a link that points at outside
   the tree? i.e. "symlink 4 LF link".

 * I think "echo :RelNotes | git cat-file --batch --follow-symlinks"
   that does not follow a symlink is a BUG.  Unless there is
   something fundamental that in-index object should never support
   this feature, that is.  But I do not think of a good reason
   why---it feels that this is just the lack of implementation that
   can be addressed by somebody else in the future who finds the
   need for the support.

I do not necessarily think the latter three need to be addressed in
this 3-patch series, but they should be listed as known bugs, I
would think.  That would invite others to fix them and save time for
users to file unnecessary bug reports.

So (now I came back) the last part of the description may want to
become more like this:

	...
	portion of the link which is outside the tree will be printed.

	This option (currently) cannot be used unless `--batch` or
	`--batch-check` is used.

        Also the option does not (currently) work correctly when an
	object in the index is specified (e.g. `:link` instead of
	`HEAD:link`) rather than one in the tree.

We need to also say something about the "missing" vs "loop" case, if
we choose to leave that part broken.  I'd rather see it fixed, but
that is not a very strong preference.

By the way, the text after your patch would not format well thru
AsciiDoc.  See attached for a suggested mark-up fix that can be
squashed.

Thanks.


diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index 9bdfced..3226f3e 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -82,31 +82,43 @@ OPTIONS
 	Follow-symlinks will be silently turned off if <object>
 	specifies an object in the index rather than one in the object
 	database.
-
-	For example, consider the a git repository containing:
++
+For example, consider a git repository containing:
++
+--
 	f: a file containing "hello\n"
 	link: a symlink to f
 	dir/link: a symlink to ../f
 	plink: a symlink to ../f
 	alink: a symlink to /etc/passwd
-
-	echo HEAD:f |git cat-file --batch --follow-symlinks would print
+--
++
+For a regular file `f`, `echo HEAD:f | git cat-file --batch` would print
++
+--
 	ce013625030ba8dba906f756967f9e9ca394464a blob 6
-
-	echo HEAD:link |git cat-file --batch --follow-symlinks would print
-	the same thing, as would HEAD:dir/link.
-	Without follow-symlinks, these would print data about the
-	symlink itself.  In the case of HEAD:link, you would see
+--
++
+And `echo HEAD:link | git cat-file --batch --follow-symlinks` would print
+the same thing, as would `HEAD:dir/link`, as they both point at `HEAD:f`.
++
+Without `--follow-symlinks`, these would print data about the
+symlink itself.  In the case of `HEAD:link`, you would see
++
+--
 	4d1ae35ba2c8ec712fa2a379db44ad639ca277bd blob 1
-
-	Both plink and alink point outside the tree, so they would
-	respectively print:
+--
++
+Both `plink` and `alink` point outside the tree, so they would
+respectively print:
++
+--
 	symlink 4
 	../f
 
 	symlink 11
 	/etc/passwd
-
+--
 
 
 OUTPUT
-- 
2.4.0-363-gef77c54

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-12 18:07   ` Junio C Hamano
@ 2015-05-12 18:36     ` David Turner
  2015-05-12 18:43       ` Junio C Hamano
                         ` (2 more replies)
  0 siblings, 3 replies; 20+ messages in thread
From: David Turner @ 2015-05-12 18:36 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, David Turner

On Tue, 2015-05-12 at 11:07 -0700, Junio C Hamano wrote:
> > +	Both plink and alink point outside the tree, so they would
> > +	respectively print:
> > +	symlink 4
> > +	../f
> > +
> > +	symlink 11
> > +	/etc/passwd
> > +
> > +
> > +
> 
> A few points I noticed:
> 
>  * It is not clear that this is (currently) only for --batch and
>    --batch-check until you read four lines into the description.
> 
>    Perhaps start the description like this instead?
> 
>    --follow-symlinks::
>            When answering `--batch` or `--batch-check` request,
>            follow symlinks inside the repository when requesting objects
>            with extended SHA-1 expressions of the form tree-ish:path-in-tree.

Will rearrange.

>    Also I'd lose the "This option requires ..." sentence in the middle
>    (I'll come back to the reason why later).
> 
>  * Is it fundamental that this is only for --batch family, or is it
>    just lack of need by the current implementor and implementation?
>    "git cat-file --follow-symlinks blob :RelNotes" does not sound
>    a nonsense request to me.

The reason that --follow-symlinks doesn't work for non-batch requests is
that it is impossible to distinguish out-of-tree symlinks from valid
output in non-batch output. I will add text explaining this. 

>  * I am not sure if HEAD:link that points at HEAD:link should be
>    reported as "missing".  It may be better to report the original
>    without any dereferencing just like a link that points at outside
>    the tree? i.e. "symlink 4 LF link".

Unfortunately, a symlink loop might include relative symlinks
(e.g. ../a).  If we return a relative symlink, the user will
not be able to distinguish it from a non-loop, out-of-tree symlink.  So
I think we may not return symlink 4 LF ../a for these cases.  

We could, I guess, have a separate output like loop <size> LF link
<LF>", but, unless we always save and output the first link in the
chain, we won't know what any link is relative to.  Since reasonable
people do not create symlink loops, and since there are other mechanisms
for symlink loop debugging (e.g. plain cat-file), I think it is OK not
to put special effort into handling loops.

>  * I think "echo :RelNotes | git cat-file --batch --follow-symlinks"
>    that does not follow a symlink is a BUG.  Unless there is
>    something fundamental that in-index object should never support
>    this feature, that is.  But I do not think of a good reason
>    why---it feels that this is just the lack of implementation that
>    can be addressed by somebody else in the future who finds the
>    need for the support.

Yes, this should definitely be addressed in the future.  I didn't see a
straightforward way to generalize this code to also address the index,
so a new version of this function would have to be written.  That's why
I didn't add that feature yet.  The lack of it is definitely a bug,
though.  

>         Also the option does not (currently) work correctly when an
> 	object in the index is specified (e.g. `:link` instead of
> 	`HEAD:link`) rather than one in the tree.
> 
> We need to also say something about the "missing" vs "loop" case, if
> we choose to leave that part broken.  I'd rather see it fixed, but
> that is not a very strong preference.

Will add an example.

> By the way, the text after your patch would not format well thru
> AsciiDoc.  See attached for a suggested mark-up fix that can be
> squashed.

I'll squash that in when I re-roll.  Thanks for the formatting.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-12 18:36     ` David Turner
@ 2015-05-12 18:43       ` Junio C Hamano
  2015-05-12 18:55         ` David Turner
  2015-05-12 20:07       ` Junio C Hamano
  2015-05-14 20:06       ` Junio C Hamano
  2 siblings, 1 reply; 20+ messages in thread
From: Junio C Hamano @ 2015-05-12 18:43 UTC (permalink / raw)
  To: David Turner; +Cc: git, David Turner

David Turner <dturner@twopensource.com> writes:

>> We need to also say something about the "missing" vs "loop" case, if
>> we choose to leave that part broken.  I'd rather see it fixed, but
>> that is not a very strong preference.
>
> Will add an example.

I do not think we need an example.  By "also say", I meant in
addition to "This and that does not currently work", we also need to
say that loops do not work well.  In other words, it is enough to
just mention that it is a current limitation (or a bug, whichever we
choose to call) that loops are reported as missing.

Thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-12 18:43       ` Junio C Hamano
@ 2015-05-12 18:55         ` David Turner
  2015-05-12 20:00           ` Junio C Hamano
  0 siblings, 1 reply; 20+ messages in thread
From: David Turner @ 2015-05-12 18:55 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, David Turner

On Tue, 2015-05-12 at 11:43 -0700, Junio C Hamano wrote:
> David Turner <dturner@twopensource.com> writes:
> 
> >> We need to also say something about the "missing" vs "loop" case, if
> >> we choose to leave that part broken.  I'd rather see it fixed, but
> >> that is not a very strong preference.
> >
> > Will add an example.
> 
> I do not think we need an example.  By "also say", I meant in
> addition to "This and that does not currently work", we also need to
> say that loops do not work well.  In other words, it is enough to
> just mention that it is a current limitation (or a bug, whichever we
> choose to call) that loops are reported as missing.

The version of the patch that we are commenting on contained the text: 
> +     --batch-check.  In the event of a symlink loop (or more than
> +     40 symlinks in a symlink resolution chain), the file will be
> +     treated as missing.  If a symlink points outside the tree-ish

Is that sufficient?  

Actually, we could simply have a separate output for broken links.
Instead of [original path] SP missing, [original path] SP loop.

I would probably implement this with a special error return code
(SYMLINK_LOOP=-2 instead of the usual -1). 

Does that seem reasonable to you?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-12 18:55         ` David Turner
@ 2015-05-12 20:00           ` Junio C Hamano
  2015-05-12 20:22             ` Junio C Hamano
  0 siblings, 1 reply; 20+ messages in thread
From: Junio C Hamano @ 2015-05-12 20:00 UTC (permalink / raw)
  To: David Turner; +Cc: git, David Turner

David Turner <dturner@twopensource.com> writes:

> On Tue, 2015-05-12 at 11:43 -0700, Junio C Hamano wrote:
>> David Turner <dturner@twopensource.com> writes:
>> 
>> >> We need to also say something about the "missing" vs "loop" case, if
>> >> we choose to leave that part broken.  I'd rather see it fixed, but
>> >> that is not a very strong preference.
>> >
>> > Will add an example.
>> 
>> I do not think we need an example.  By "also say", I meant in
>> addition to "This and that does not currently work", we also need to
>> say that loops do not work well.  In other words, it is enough to
>> just mention that it is a current limitation (or a bug, whichever we
>> choose to call) that loops are reported as missing.
>
> The version of the patch that we are commenting on contained the text: 
>> +     --batch-check.  In the event of a symlink loop (or more than
>> +     40 symlinks in a symlink resolution chain), the file will be
>> +     treated as missing.  If a symlink points outside the tree-ish
>
> Is that sufficient?  

Not really, as I think people would say "treated as missing" is a
bug, and it would be better to mark it as "currently does not work"
in the sense that "you cannot use this feature to tell HEAD:link
that is not in HEAD and HEAD:link that is a symbolic link that
points to itself apart".

> Actually, we could simply have a separate output for broken links.
> Instead of [original path] SP missing, [original path] SP loop.

Hmm, I do not quite see where this resistance against keeping the
original request in order to say "You gave HEAD:link to me, but that
is a symbolic link that leads to 'link'" comes from.  After all,
wouldn't that be more consistent with what you already show for a
link that cannot be resolved, i.e. "You gave HEAD:link to me, that
is a link that points at 'nosuch'" when I do this:

    $ ln -s nosuch link
    $ git add link
    $ echo "$(git write-tree):link" |
      git cat-file --batch --follow-symlinks

Ahh, that would also give us "missing", so in that sense you are
being consistent.

But I do not think that consistency is useful.  Showing just
"missing" instead is losing information and that is what bothers me.

Showing "symlink 6 nosuch" to this "link points at a target that
would be in-tree but there is no such object in the tree" symbolic
link instead of "missing" would make it more useful, and I do not
offhand think of a downside, but maybe I am missing something.

For a link that points outside, the code already gives

    $ ln -s ../outside outlink
    $ git add outlink
    $ echo "$(git write-tree):outlink" |
      git cat-file --batch --follow-symlinks

"symlink ../outside", so the script reading from the batch output
already has to be prepared to handle "symlink" and understand it as
saying "the link does not point an object that is inside the tree".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-12 18:36     ` David Turner
  2015-05-12 18:43       ` Junio C Hamano
@ 2015-05-12 20:07       ` Junio C Hamano
  2015-05-12 21:37         ` David Turner
  2015-05-14 20:06       ` Junio C Hamano
  2 siblings, 1 reply; 20+ messages in thread
From: Junio C Hamano @ 2015-05-12 20:07 UTC (permalink / raw)
  To: David Turner; +Cc: git, David Turner

David Turner <dturner@twopensource.com> writes:

>>  * I am not sure if HEAD:link that points at HEAD:link should be
>>    reported as "missing".  It may be better to report the original
>>    without any dereferencing just like a link that points at outside
>>    the tree? i.e. "symlink 4 LF link".
>
> Unfortunately, a symlink loop might include relative symlinks
> (e.g. ../a).  If we return a relative symlink, the user will
> not be able to distinguish it from a non-loop, out-of-tree symlink.  So
> I think we may not return symlink 4 LF ../a for these cases.  

I do not follow.  Let's start from a shared example.

    HEAD:sub/link is a symbolic link whose value is ../nextlink
    HEAD:nextlink is a symbolic link whose value is sub/link

That's a loop.  Now, I think what I am sugesting is

	$ git cat-file --batch-check --follow-symlinks <<\EOF
	HEAD:sub/link
        HEAD:nextlink
	EOF
	symlink ../nextlink
        symlink sub/link

If you asked about sub/link and then got ../nextlink back, then
isn't it clear for the reading script that it is about nextlink
at the top-level?  Why can't it tell it from out-of-tree link?


	

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-12 20:00           ` Junio C Hamano
@ 2015-05-12 20:22             ` Junio C Hamano
  2015-05-12 22:36               ` David Turner
  0 siblings, 1 reply; 20+ messages in thread
From: Junio C Hamano @ 2015-05-12 20:22 UTC (permalink / raw)
  To: David Turner; +Cc: git, David Turner

Junio C Hamano <gitster@pobox.com> writes:

> Ahh, that would also give us "missing", so in that sense you are
> being consistent.
>
> But I do not think that consistency is useful.  Showing just
> "missing" instead is losing information and that is what bothers me.
>
> Showing "symlink 6 nosuch" to this "link points at a target that
> would be in-tree but there is no such object in the tree" symbolic
> link instead of "missing" would make it more useful, and I do not
> offhand think of a downside, but maybe I am missing something.
>
> For a link that points outside, the code already gives
>
>     $ ln -s ../outside outlink
>     $ git add outlink
>     $ echo "$(git write-tree):outlink" |
>       git cat-file --batch --follow-symlinks
>
> "symlink ../outside", so the script reading from the batch output
> already has to be prepared to handle "symlink" and understand it as
> saying "the link does not point an object that is inside the tree".

Having said all that, I think we can make readers' life easier by
classifying various reasons why --follow-symlinks cannot pretend
as if the in-tree pointee were given as its input and signal it
differently:

 * A link points outside the original tree (or the index, once we
   support "cat-file :RelNotes").

   I think your "symlink <size> LF <target> LF" already does this.

 * A link points (directly or indirectly) at itself.

   This would be your "loop <size> LF <target> LF", which I think is
   good.

 * A link does not step outside the original tree, but the pointee
   does not exist in the original tree.

   I think you currently say "<object name for HEAD:link> missing",
   but I do not think it is useful.  That object does exist, but it
   merely cannot be dereferenced.  Perhaps introducing a new one,
   e.g. "dangling <size> LF <target> LF" similar to symlink/loop,
   would help the reader better?

Are there other cases?  The only other case I think of is when the
link resolves cleanly inside the tree, which you already handle.

Thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-12 20:07       ` Junio C Hamano
@ 2015-05-12 21:37         ` David Turner
  2015-05-12 21:42           ` Junio C Hamano
  0 siblings, 1 reply; 20+ messages in thread
From: David Turner @ 2015-05-12 21:37 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, David Turner

On Tue, 2015-05-12 at 13:07 -0700, Junio C Hamano wrote:
> David Turner <dturner@twopensource.com> writes:
> 
> >>  * I am not sure if HEAD:link that points at HEAD:link should be
> >>    reported as "missing".  It may be better to report the original
> >>    without any dereferencing just like a link that points at outside
> >>    the tree? i.e. "symlink 4 LF link".
> >
> > Unfortunately, a symlink loop might include relative symlinks
> > (e.g. ../a).  If we return a relative symlink, the user will
> > not be able to distinguish it from a non-loop, out-of-tree symlink.  So
> > I think we may not return symlink 4 LF ../a for these cases.  
> 
> I do not follow.  Let's start from a shared example.
> 
>     HEAD:sub/link is a symbolic link whose value is ../nextlink
>     HEAD:nextlink is a symbolic link whose value is sub/link
> 
> That's a loop.  Now, I think what I am sugesting is
> 
> 	$ git cat-file --batch-check --follow-symlinks <<\EOF
> 	HEAD:sub/link
>         HEAD:nextlink
> 	EOF
> 	symlink ../nextlink
>         symlink sub/link
> 
> If you asked about sub/link and then got ../nextlink back, then
> isn't it clear for the reading script that it is about nextlink
> at the top-level?  Why can't it tell it from out-of-tree link?

Because maybe sub/link was actually a link to ../../nextlink.  Or maybe
sub/link was actually a link to ../outlink, and ../outlink was a link
to ../nextlink. 

But I think I'm OK with implementing your proposed solution[1] of
categorizing all of the possible cases.


[1] http://www.spinics.net/lists/git/msg250893.html
xmqqd225fsw8.fsf@gitster.dls.corp.google.com 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-12 21:37         ` David Turner
@ 2015-05-12 21:42           ` Junio C Hamano
  2015-05-12 21:53             ` David Turner
  0 siblings, 1 reply; 20+ messages in thread
From: Junio C Hamano @ 2015-05-12 21:42 UTC (permalink / raw)
  To: David Turner; +Cc: git, David Turner

David Turner <dturner@twopensource.com> writes:

> On Tue, 2015-05-12 at 13:07 -0700, Junio C Hamano wrote:
>> David Turner <dturner@twopensource.com> writes:
>> 
>> >>  * I am not sure if HEAD:link that points at HEAD:link should be
>> >>    reported as "missing".  It may be better to report the original
>> >>    without any dereferencing just like a link that points at outside
>> >>    the tree? i.e. "symlink 4 LF link".
>> >
>> > Unfortunately, a symlink loop might include relative symlinks
>> > (e.g. ../a).  If we return a relative symlink, the user will
>> > not be able to distinguish it from a non-loop, out-of-tree symlink.  So
>> > I think we may not return symlink 4 LF ../a for these cases.  
>> 
>> I do not follow.  Let's start from a shared example.
>> 
>>     HEAD:sub/link is a symbolic link whose value is ../nextlink
>>     HEAD:nextlink is a symbolic link whose value is sub/link
>> 
>> That's a loop.  Now, I think what I am sugesting is
>> 
>> 	$ git cat-file --batch-check --follow-symlinks <<\EOF
>> 	HEAD:sub/link
>>         HEAD:nextlink
>> 	EOF
>> 	symlink ../nextlink
>>         symlink sub/link
>> 
>> If you asked about sub/link and then got ../nextlink back, then
>> isn't it clear for the reading script that it is about nextlink
>> at the top-level?  Why can't it tell it from out-of-tree link?
>
> Because maybe sub/link was actually a link to ../../nextlink.

Then the output would have said "symlink ../../nextlink" and you can
tell these two cases apart, no?  Same for the other case.

> But I think I'm OK with implementing your proposed solution[1] of
> categorizing all of the possible cases.

I think that is the cleanest I can come up with offhand, and would
give the easiest input to scripters.  At least I think so for now,
but others may have even better ideas ;-)

Thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-12 21:42           ` Junio C Hamano
@ 2015-05-12 21:53             ` David Turner
  0 siblings, 0 replies; 20+ messages in thread
From: David Turner @ 2015-05-12 21:53 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, David Turner

On Tue, 2015-05-12 at 14:42 -0700, Junio C Hamano wrote:
> David Turner <dturner@twopensource.com> writes:
> 
> > On Tue, 2015-05-12 at 13:07 -0700, Junio C Hamano wrote:
> >> David Turner <dturner@twopensource.com> writes:
> >> 
> >> >>  * I am not sure if HEAD:link that points at HEAD:link should be
> >> >>    reported as "missing".  It may be better to report the original
> >> >>    without any dereferencing just like a link that points at outside
> >> >>    the tree? i.e. "symlink 4 LF link".
> >> >
> >> > Unfortunately, a symlink loop might include relative symlinks
> >> > (e.g. ../a).  If we return a relative symlink, the user will
> >> > not be able to distinguish it from a non-loop, out-of-tree symlink.  So
> >> > I think we may not return symlink 4 LF ../a for these cases.  
> >> 
> >> I do not follow.  Let's start from a shared example.
> >> 
> >>     HEAD:sub/link is a symbolic link whose value is ../nextlink
> >>     HEAD:nextlink is a symbolic link whose value is sub/link
> >> 
> >> That's a loop.  Now, I think what I am sugesting is
> >> 
> >> 	$ git cat-file --batch-check --follow-symlinks <<\EOF
> >> 	HEAD:sub/link
> >>         HEAD:nextlink
> >> 	EOF
> >> 	symlink ../nextlink
> >>         symlink sub/link
> >> 
> >> If you asked about sub/link and then got ../nextlink back, then
> >> isn't it clear for the reading script that it is about nextlink
> >> at the top-level?  Why can't it tell it from out-of-tree link?
> >
> > Because maybe sub/link was actually a link to ../../nextlink.
> 
> Then the output would have said "symlink ../../nextlink" and you can
> tell these two cases apart, no?  Same for the other case.

No.  Successfully resolved out-of-tree symlinks must be relative to the
tree root.  Jeff King and I discussed this in the original RFC:
otherwise, after a chain of symlinks which might move you all over the
tree, you have no idea what "../" is relative to.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-12 20:22             ` Junio C Hamano
@ 2015-05-12 22:36               ` David Turner
  2015-05-12 23:02                 ` Junio C Hamano
  0 siblings, 1 reply; 20+ messages in thread
From: David Turner @ 2015-05-12 22:36 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, David Turner

On Tue, 2015-05-12 at 13:22 -0700, Junio C Hamano wrote:
> Junio C Hamano <gitster@pobox.com> writes:
> 
> > Ahh, that would also give us "missing", so in that sense you are
> > being consistent.
> >
> > But I do not think that consistency is useful.  Showing just
> > "missing" instead is losing information and that is what bothers me.
> >
> > Showing "symlink 6 nosuch" to this "link points at a target that
> > would be in-tree but there is no such object in the tree" symbolic
> > link instead of "missing" would make it more useful, and I do not
> > offhand think of a downside, but maybe I am missing something.
> >
> > For a link that points outside, the code already gives
> >
> >     $ ln -s ../outside outlink
> >     $ git add outlink
> >     $ echo "$(git write-tree):outlink" |
> >       git cat-file --batch --follow-symlinks
> >
> > "symlink ../outside", so the script reading from the batch output
> > already has to be prepared to handle "symlink" and understand it as
> > saying "the link does not point an object that is inside the tree".
> 
> Having said all that, I think we can make readers' life easier by
> classifying various reasons why --follow-symlinks cannot pretend
> as if the in-tree pointee were given as its input and signal it
> differently:
> 
>  * A link points outside the original tree (or the index, once we
>    support "cat-file :RelNotes").
> 
>    I think your "symlink <size> LF <target> LF" already does this.
> 
>  * A link points (directly or indirectly) at itself.
> 
>    This would be your "loop <size> LF <target> LF", which I think is
>    good.
> 
>  * A link does not step outside the original tree, but the pointee
>    does not exist in the original tree.
> 
>    I think you currently say "<object name for HEAD:link> missing",
>    but I do not think it is useful.  That object does exist, but it
>    merely cannot be dereferenced.  Perhaps introducing a new one,
>    e.g. "dangling <size> LF <target> LF" similar to symlink/loop,
>    would help the reader better?
> 
> Are there other cases?  The only other case I think of is when the
> link resolves cleanly inside the tree, which you already handle.
> 
> Thanks.
On Tue, 2015-05-12 at 13:22 -0700, Junio C Hamano wrote:
Junio C Hamano <gitster@pobox.com> writes:
> 
> > Ahh, that would also give us "missing", so in that sense you are
> > being consistent.
> >
> > But I do not think that consistency is useful.  Showing just
> > "missing" instead is losing information and that is what bothers me.
> >
> > Showing "symlink 6 nosuch" to this "link points at a target that
> > would be in-tree but there is no such object in the tree" symbolic
> > link instead of "missing" would make it more useful, and I do not
> > offhand think of a downside, but maybe I am missing something.
> >
> > For a link that points outside, the code already gives
> >
> >     $ ln -s ../outside outlink
> >     $ git add outlink
> >     $ echo "$(git write-tree):outlink" |
> >       git cat-file --batch --follow-symlinks
> >
> > "symlink ../outside", so the script reading from the batch output
> > already has to be prepared to handle "symlink" and understand it as
> > saying "the link does not point an object that is inside the tree".
> 
> Having said all that, I think we can make readers' life easier by
> classifying various reasons why --follow-symlinks cannot pretend
> as if the in-tree pointee were given as its input and signal it
> differently:
> 
>  * A link points outside the original tree (or the index, once we
>    support "cat-file :RelNotes").
> 
>    I think your "symlink <size> LF <target> LF" already does this.
> 
>  * A link points (directly or indirectly) at itself.
> 
>    This would be your "loop <size> LF <target> LF", which I think is
>    good.
> 
>  * A link does not step outside the original tree, but the pointee
>    does not exist in the original tree.
> 
>    I think you currently say "<object name for HEAD:link> missing",
>    but I do not think it is useful.  That object does exist, but it
>    merely cannot be dereferenced.  Perhaps introducing a new one,
>    e.g. "dangling <size> LF <target> LF" similar to symlink/loop,
>    would help the reader better?
> 
> Are there other cases?  The only other case I think of is when the
> link resolves cleanly inside the tree, which you already handle.

While updating the tests, I noticed another two cases:
1. HEAD:broken-link/file

I am inclined to describe this as "dangling" as well.  (It is not useful
to tell users that "file" is the remaining bit to be resolved, because
due to chains of symlinks, users have no idea what file would be
relative to).  I think the filesystem returns ENOENT in the equivalent
case.

2. HEAD:link-to-file/file

This should be "notdir", I think, in that it is a distinct way of
failing that the user might care to know. The filesystem returns ENOTDIR
in the equivalent case.

In addition, I would like to have the format for the dangling, loop, and
notdir cases match the missing case.  In other words, "HEAD:link
missing", "HEAD:link dangling", etc.  Users already need to parse the
missing case, so we might as well make the others match.  It's true that
this parsing is impossible in the case of filenames containing newlines,
but git cat-file --batch cannot accept those filenames as input anyway
(presumably, a -0 flag would be needed, if anyone actually used such
filenames, and at that point we could make the -0 output unambiguously
parseable).

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-12 22:36               ` David Turner
@ 2015-05-12 23:02                 ` Junio C Hamano
  2015-05-12 23:06                   ` Junio C Hamano
  0 siblings, 1 reply; 20+ messages in thread
From: Junio C Hamano @ 2015-05-12 23:02 UTC (permalink / raw)
  To: David Turner; +Cc: git, David Turner

David Turner <dturner@twopensource.com> writes:

> On Tue, 2015-05-12 at 13:22 -0700, Junio C Hamano wrote:
>
>> Are there other cases?  The only other case I think of is when the
>> link resolves cleanly inside the tree, which you already handle.

Funny double-inclusion ;-)

> While updating the tests, I noticed another two cases:
> 1. HEAD:broken-link/file
>
> I am inclined to describe this as "dangling" as well.  (It is not useful
> to tell users that "file" is the remaining bit to be resolved, because
> due to chains of symlinks, users have no idea what file would be
> relative to).  I think the filesystem returns ENOENT in the equivalent
> case.
>
> 2. HEAD:link-to-file/file
>
> This should be "notdir", I think, in that it is a distinct way of
> failing that the user might care to know. The filesystem returns ENOTDIR
> in the equivalent case.

Thanks.

> In addition, I would like to have the format for the dangling, loop, and
> notdir cases match the missing case.  In other words, "HEAD:link
> missing", "HEAD:link dangling", etc.  Users already need to parse the
> missing case, so we might as well make the others match.

Interesting to see our opinions differ here, especially because the
previous suggestion was coming from "users already need to parse the
'symlink' case, so we might as well make the others match" ;-).

Between 'missing' and 'symlink', the latter is richer in information
and easier to parse, I would have thought.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-12 23:02                 ` Junio C Hamano
@ 2015-05-12 23:06                   ` Junio C Hamano
  0 siblings, 0 replies; 20+ messages in thread
From: Junio C Hamano @ 2015-05-12 23:06 UTC (permalink / raw)
  To: David Turner; +Cc: git, David Turner

Junio C Hamano <gitster@pobox.com> writes:

> David Turner <dturner@twopensource.com> writes:
>
>> In addition, I would like to have the format for the dangling, loop, and
>> notdir cases match the missing case.  In other words, "HEAD:link
>> missing", "HEAD:link dangling", etc.  Users already need to parse the
>> missing case, so we might as well make the others match.
>
> Interesting to see our opinions differ here, especially because the
> previous suggestion was coming from "users already need to parse the
> 'symlink' case, so we might as well make the others match" ;-).
>
> Between 'missing' and 'symlink', the latter is richer in information
> and easier to parse, I would have thought.

Forgot to conclude the paragraph with final sentence.

... But I do not deeply care either way.

Thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-12 18:36     ` David Turner
  2015-05-12 18:43       ` Junio C Hamano
  2015-05-12 20:07       ` Junio C Hamano
@ 2015-05-14 20:06       ` Junio C Hamano
  2 siblings, 0 replies; 20+ messages in thread
From: Junio C Hamano @ 2015-05-14 20:06 UTC (permalink / raw)
  To: David Turner; +Cc: git, David Turner

David Turner <dturner@twopensource.com> writes:

> On Tue, 2015-05-12 at 11:07 -0700, Junio C Hamano wrote:
>
>>  * Is it fundamental that this is only for --batch family, or is it
>>    just lack of need by the current implementor and implementation?
>>    "git cat-file --follow-symlinks blob :RelNotes" does not sound
>>    a nonsense request to me.
>
> The reason that --follow-symlinks doesn't work for non-batch requests is
> that it is impossible to distinguish out-of-tree symlinks from valid
> output in non-batch output. I will add text explaining this. 

Actually, I do not think that is a valid reason not to consider
supporting "git cat-file --follow-symlinks blob :RelNotes" in future
versions of Git.  "--batch" and "--batch-check" needs to continue,
so it needs to give diagnosis in-line in its output and let the
driving script continue talking with it, but a single-shot request
"git cat-file --follow-symlinks blob :RelNotes" can signal that
the link does not resolve in-tree (or in-index) by erroring out.

So I still view it as "we currently do not need it outside --batch;
if somebody wants to, feel free to add it to non-batch mode".  It
does not have to be called a bug but I would say it is a missing
feature.  And not handling an in-index path e.g. :RelNotes falls
into the same category, I would say.

Just to avoid misunderstanding, I still think this series (at its
9th iteration) is good from the point of new feature without filling
these two missing features.

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2015-05-14 20:06 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-11 22:50 [PATCH v5 0/3] cat-file --follow-symlinks dturner
2015-05-11 22:50 ` [PATCH v5 1/3] tree-walk: learn get_tree_entry_follow_symlinks dturner
2015-05-12 17:29   ` Johannes Sixt
2015-05-11 22:50 ` [PATCH v5 2/3] sha1_name: get_sha1_with_context learns to follow symlinks dturner
2015-05-11 22:50 ` [PATCH v5 3/3] cat-file: add --follow-symlinks to --batch dturner
2015-05-12 17:34   ` Johannes Sixt
2015-05-12 18:07   ` Junio C Hamano
2015-05-12 18:36     ` David Turner
2015-05-12 18:43       ` Junio C Hamano
2015-05-12 18:55         ` David Turner
2015-05-12 20:00           ` Junio C Hamano
2015-05-12 20:22             ` Junio C Hamano
2015-05-12 22:36               ` David Turner
2015-05-12 23:02                 ` Junio C Hamano
2015-05-12 23:06                   ` Junio C Hamano
2015-05-12 20:07       ` Junio C Hamano
2015-05-12 21:37         ` David Turner
2015-05-12 21:42           ` Junio C Hamano
2015-05-12 21:53             ` David Turner
2015-05-14 20:06       ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.