All of lore.kernel.org
 help / color / mirror / Atom feed
From: Brandon Williams <bmwill@google.com>
To: git@vger.kernel.org
Cc: Brandon Williams <bmwill@google.com>,
	sbeller@google.com, peff@peff.net, jacob.keller@gmail.com,
	gitster@pobox.com, ramsay@ramsayjones.plus.com, tboegi@web.de,
	j6t@kdbg.org, pclouds@gmail.com
Subject: [PATCH v2 1/4] real_path: resolve symlinks by hand
Date: Thu,  8 Dec 2016 15:58:11 -0800	[thread overview]
Message-ID: <1481241494-6861-2-git-send-email-bmwill@google.com> (raw)
In-Reply-To: <1481241494-6861-1-git-send-email-bmwill@google.com>

The current implementation of real_path uses chdir() in order to resolve
symlinks.  Unfortunately this isn't thread-safe as chdir() affects a
process as a whole and not just an individual thread.  Instead perform
the symlink resolution by hand so that the calls to chdir() can be
removed, making real_path one step closer to being reentrant.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 abspath.c | 183 +++++++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 122 insertions(+), 61 deletions(-)

diff --git a/abspath.c b/abspath.c
index 2825de8..92f2a29 100644
--- a/abspath.c
+++ b/abspath.c
@@ -11,8 +11,38 @@ int is_directory(const char *path)
 	return (!stat(path, &st) && S_ISDIR(st.st_mode));
 }
 
+/* removes the last path component from 'path' except if 'path' is root */
+static void strip_last_component(struct strbuf *path)
+{
+	if (path->len > offset_1st_component(path->buf)) {
+		char *last_slash = find_last_dir_sep(path->buf);
+		strbuf_setlen(path, last_slash - path->buf);
+	}
+}
+
+/* get (and remove) the next component in 'remaining' and place it in 'next' */
+static void get_next_component(struct strbuf *next, struct strbuf *remaining)
+{
+	char *start = NULL;
+	char *end = NULL;
+
+	strbuf_reset(next);
+
+	/* look for the next component */
+	/* Skip sequences of multiple path-separators */
+	for (start = remaining->buf; is_dir_sep(*start); start++)
+		; /* nothing */
+	/* Find end of the path component */
+	for (end = start; *end && !is_dir_sep(*end); end++)
+		; /* nothing */
+
+	strbuf_add(next, start, end - start);
+	/* remove the component from 'remaining' */
+	strbuf_remove(remaining, 0, end - remaining->buf);
+}
+
 /* We allow "recursive" symbolic links. Only within reason, though. */
-#define MAXDEPTH 5
+#define MAXSYMLINKS 5
 
 /*
  * Return the real path (i.e., absolute path, with symlinks resolved
@@ -21,7 +51,6 @@ int is_directory(const char *path)
  * absolute_path().)  The return value is a pointer to a static
  * buffer.
  *
- * The input and all intermediate paths must be shorter than MAX_PATH.
  * The directory part of path (i.e., everything up to the last
  * dir_sep) must denote a valid, existing directory, but the last
  * component need not exist.  If die_on_error is set, then die with an
@@ -33,22 +62,16 @@ int is_directory(const char *path)
  */
 static const char *real_path_internal(const char *path, int die_on_error)
 {
-	static struct strbuf sb = STRBUF_INIT;
+	static struct strbuf resolved = STRBUF_INIT;
+	struct strbuf remaining = STRBUF_INIT;
+	struct strbuf next = STRBUF_INIT;
+	struct strbuf symlink = STRBUF_INIT;
 	char *retval = NULL;
-
-	/*
-	 * If we have to temporarily chdir(), store the original CWD
-	 * here so that we can chdir() back to it at the end of the
-	 * function:
-	 */
-	struct strbuf cwd = STRBUF_INIT;
-
-	int depth = MAXDEPTH;
-	char *last_elem = NULL;
+	int num_symlinks = 0;
 	struct stat st;
 
 	/* We've already done it */
-	if (path == sb.buf)
+	if (path == resolved.buf)
 		return path;
 
 	if (!*path) {
@@ -58,74 +81,112 @@ static const char *real_path_internal(const char *path, int die_on_error)
 			goto error_out;
 	}
 
-	strbuf_reset(&sb);
-	strbuf_addstr(&sb, path);
-
-	while (depth--) {
-		if (!is_directory(sb.buf)) {
-			char *last_slash = find_last_dir_sep(sb.buf);
-			if (last_slash) {
-				last_elem = xstrdup(last_slash + 1);
-				strbuf_setlen(&sb, last_slash - sb.buf + 1);
-			} else {
-				last_elem = xmemdupz(sb.buf, sb.len);
-				strbuf_reset(&sb);
-			}
+	strbuf_reset(&resolved);
+
+	if (is_absolute_path(path)) {
+		/* absolute path; start with only root as being resolved */
+		int offset = offset_1st_component(path);
+		strbuf_add(&resolved, path, offset);
+		strbuf_addstr(&remaining, path + offset);
+	} else {
+		/* relative path; can use CWD as the initial resolved path */
+		if (strbuf_getcwd(&resolved)) {
+			if (die_on_error)
+				die_errno("unable to get current working directory");
+			else
+				goto error_out;
+		}
+		strbuf_addstr(&remaining, path);
+	}
+
+	/* Iterate over the remaining path components */
+	while (remaining.len > 0) {
+		get_next_component(&next, &remaining);
+
+		if (next.len == 0) {
+			continue; /* empty component */
+		} else if (next.len == 1 && !strcmp(next.buf, ".")) {
+			continue; /* '.' component */
+		} else if (next.len == 2 && !strcmp(next.buf, "..")) {
+			/* '..' component; strip the last path component */
+			strip_last_component(&resolved);
+			continue;
 		}
 
-		if (sb.len) {
-			if (!cwd.len && strbuf_getcwd(&cwd)) {
+		/* append the next component and resolve resultant path */
+		if (!is_dir_sep(resolved.buf[resolved.len - 1]))
+			strbuf_addch(&resolved, '/');
+		strbuf_addbuf(&resolved, &next);
+
+		if (lstat(resolved.buf, &st)) {
+			/* error out unless this was the last component */
+			if (!(errno == ENOENT && !remaining.len)) {
 				if (die_on_error)
-					die_errno("Could not get current working directory");
+					die_errno("Invalid path '%s'",
+						  resolved.buf);
 				else
 					goto error_out;
 			}
+		} else if (S_ISLNK(st.st_mode)) {
+			ssize_t len;
+			strbuf_reset(&symlink);
 
-			if (chdir(sb.buf)) {
+			if (num_symlinks++ > MAXSYMLINKS) {
 				if (die_on_error)
-					die_errno("Could not switch to '%s'",
-						  sb.buf);
+					die("More than %d nested symlinks "
+					    "on path '%s'", MAXSYMLINKS, path);
 				else
 					goto error_out;
 			}
-		}
-		if (strbuf_getcwd(&sb)) {
-			if (die_on_error)
-				die_errno("Could not get current working directory");
-			else
-				goto error_out;
-		}
-
-		if (last_elem) {
-			if (sb.len && !is_dir_sep(sb.buf[sb.len - 1]))
-				strbuf_addch(&sb, '/');
-			strbuf_addstr(&sb, last_elem);
-			free(last_elem);
-			last_elem = NULL;
-		}
 
-		if (!lstat(sb.buf, &st) && S_ISLNK(st.st_mode)) {
-			struct strbuf next_sb = STRBUF_INIT;
-			ssize_t len = strbuf_readlink(&next_sb, sb.buf, 0);
+			len = strbuf_readlink(&symlink, resolved.buf,
+					      st.st_size);
 			if (len < 0) {
 				if (die_on_error)
 					die_errno("Invalid symlink '%s'",
-						  sb.buf);
+						  resolved.buf);
 				else
 					goto error_out;
 			}
-			strbuf_swap(&sb, &next_sb);
-			strbuf_release(&next_sb);
-		} else
-			break;
+
+			if (is_absolute_path(symlink.buf)) {
+				/* absolute symlink; set resolved to root */
+				int offset = offset_1st_component(symlink.buf);
+				strbuf_reset(&resolved);
+				strbuf_add(&resolved, symlink.buf, offset);
+				strbuf_remove(&symlink, 0, offset);
+			} else {
+				/*
+				 * relative symlink
+				 * strip off the last component since it will
+				 * be replaced with the contents of the symlink
+				 */
+				strip_last_component(&resolved);
+			}
+
+			/*
+			 * if there are still remaining components to resolve
+			 * then append them to symlink
+			 */
+			if (remaining.len) {
+				strbuf_addch(&symlink, '/');
+				strbuf_addbuf(&symlink, &remaining);
+			}
+
+			/*
+			 * use the symlink as the remaining components that
+			 * need to be resloved
+			 */
+			strbuf_swap(&symlink, &remaining);
+		}
 	}
 
-	retval = sb.buf;
+	retval = resolved.buf;
+
 error_out:
-	free(last_elem);
-	if (cwd.len && chdir(cwd.buf))
-		die_errno("Could not change back to '%s'", cwd.buf);
-	strbuf_release(&cwd);
+	strbuf_release(&remaining);
+	strbuf_release(&next);
+	strbuf_release(&symlink);
 
 	return retval;
 }
-- 
2.8.0.rc3.226.g39d4020


  reply	other threads:[~2016-12-08 23:58 UTC|newest]

Thread overview: 83+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-12-05 18:58 [PATCH] making real_path thread-safe Brandon Williams
2016-12-05 18:58 ` [PATCH] real_path: make " Brandon Williams
2016-12-05 19:57   ` Stefan Beller
2016-12-05 20:12     ` Brandon Williams
2016-12-05 20:38       ` Stefan Beller
2016-12-05 20:14   ` Stefan Beller
2016-12-05 20:16     ` Brandon Williams
2016-12-08  9:41       ` Duy Nguyen
2016-12-08 17:50         ` Brandon Williams
2016-12-06 23:44   ` Junio C Hamano
2016-12-07  0:10     ` Brandon Williams
2016-12-07  1:12       ` Ramsay Jones
2016-12-07 20:14         ` Torsten Bögershausen
2016-12-07 20:32           ` Junio C Hamano
2016-12-07 22:13             ` Brandon Williams
2016-12-08  7:55               ` Torsten Bögershausen
2016-12-08 18:41                 ` Johannes Sixt
2016-12-08 19:02                   ` Brandon Williams
2016-12-07 20:43       ` Johannes Sixt
2016-12-07 22:29         ` Brandon Williams
2016-12-08 11:32           ` Johannes Sixt
2016-12-08 16:54             ` Junio C Hamano
2016-12-08 23:58 ` [PATCH v2 0/4] road to reentrant real_path Brandon Williams
2016-12-08 23:58   ` Brandon Williams [this message]
2016-12-09  1:49     ` [PATCH v2 1/4] real_path: resolve symlinks by hand Jacob Keller
2016-12-09 14:33     ` Johannes Sixt
2016-12-09 20:04       ` Brandon Williams
2016-12-08 23:58   ` [PATCH v2 2/4] real_path: convert real_path_internal to strbuf_realpath Brandon Williams
2016-12-08 23:58   ` [PATCH v2 3/4] real_path: create real_pathdup Brandon Williams
2016-12-09 14:35     ` Johannes Sixt
2016-12-08 23:58   ` [PATCH v2 4/4] real_path: have callers use real_pathdup and strbuf_realpath Brandon Williams
2016-12-09 12:33   ` [PATCH v2 0/4] road to reentrant real_path Duy Nguyen
2016-12-09 19:42     ` Brandon Williams
2016-12-10 11:02       ` Duy Nguyen
2016-12-12 18:16   ` [PATCH v3 " Brandon Williams
2016-12-12 18:16     ` [PATCH v3 1/4] real_path: resolve symlinks by hand Brandon Williams
2016-12-12 22:19       ` Junio C Hamano
2016-12-12 22:50         ` Brandon Williams
2016-12-12 23:32           ` Junio C Hamano
2016-12-12 18:16     ` [PATCH v3 2/4] real_path: convert real_path_internal to strbuf_realpath Brandon Williams
2016-12-12 22:20       ` Junio C Hamano
2016-12-12 18:16     ` [PATCH v3 3/4] real_path: create real_pathdup Brandon Williams
2016-12-12 22:25       ` Junio C Hamano
2016-12-12 18:16     ` [PATCH v3 4/4] real_path: have callers use real_pathdup and strbuf_realpath Brandon Williams
2016-12-12 22:26       ` Junio C Hamano
2016-12-12 23:47         ` Junio C Hamano
2016-12-12 23:58           ` Stefan Beller
2016-12-13  1:15             ` Brandon Williams
2016-12-13  6:39               ` Junio C Hamano
2016-12-21 21:51     ` [PATCH bw/realpath-wo-chdir] real_path: canonicalize directory separators in root parts Johannes Sixt
2016-12-21 22:33       ` Brandon Williams
2016-12-22  6:07         ` Johannes Sixt
2016-12-22 17:33           ` Brandon Williams
2016-12-22 18:54             ` Johannes Sixt
2016-12-22 19:33             ` Junio C Hamano
2017-01-03 19:09     ` [PATCH v4 0/5] road to reentrant real_path Brandon Williams
2017-01-03 19:09       ` [PATCH v4 1/5] real_path: resolve symlinks by hand Brandon Williams
2017-01-03 19:09       ` [PATCH v4 2/5] real_path: convert real_path_internal to strbuf_realpath Brandon Williams
2017-01-03 19:09       ` [PATCH v4 3/5] real_path: create real_pathdup Brandon Williams
2017-01-03 19:09       ` [PATCH v4 4/5] real_path: have callers use real_pathdup and strbuf_realpath Brandon Williams
2017-01-04  1:07         ` Jacob Keller
2017-01-04 18:14           ` Brandon Williams
2017-01-03 19:09       ` [PATCH v4 5/5] real_path: canonicalize directory separators in root parts Brandon Williams
2017-01-04  0:48       ` [PATCH v4 0/5] road to reentrant real_path Jeff King
2017-01-04  6:56         ` Torsten Bögershausen
2017-01-04  7:01           ` Jeff King
2017-01-04 18:13             ` Brandon Williams
2017-01-04 18:22               ` Stefan Beller
2017-01-04 21:46                 ` Jacob Keller
2017-01-04 21:55                   ` Brandon Williams
2017-01-04 22:01       ` [PATCH v5 " Brandon Williams
2017-01-04 22:01         ` [PATCH v5 1/5] real_path: resolve symlinks by hand Brandon Williams
2017-01-04 22:01         ` [PATCH v5 2/5] real_path: convert real_path_internal to strbuf_realpath Brandon Williams
2017-01-04 22:01         ` [PATCH v5 3/5] real_path: create real_pathdup Brandon Williams
2017-01-04 22:01         ` [PATCH v5 4/5] real_path: have callers use real_pathdup and strbuf_realpath Brandon Williams
2017-01-04 22:01         ` [PATCH v5 5/5] real_path: canonicalize directory separators in root parts Brandon Williams
2017-01-08  3:09         ` [PATCH v5 0/5] road to reentrant real_path Junio C Hamano
2017-01-09 18:04           ` Brandon Williams
2017-01-09 18:18             ` Junio C Hamano
2017-01-09 18:24               ` Brandon Williams
2017-01-09 19:26                 ` Junio C Hamano
2017-01-09 18:50               ` [PATCH 1/2] real_path: prevent redefinition of MAXSYMLINKS Brandon Williams
2017-01-09 18:50                 ` [PATCH 2/2] real_path: set errno when max number of symlinks is exceeded Brandon Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1481241494-6861-2-git-send-email-bmwill@google.com \
    --to=bmwill@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=j6t@kdbg.org \
    --cc=jacob.keller@gmail.com \
    --cc=pclouds@gmail.com \
    --cc=peff@peff.net \
    --cc=ramsay@ramsayjones.plus.com \
    --cc=sbeller@google.com \
    --cc=tboegi@web.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.