All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 05/20] copy.c: add copy_dir_recursively()
Date: Wed,  3 Feb 2016 16:35:35 +0700	[thread overview]
Message-ID: <1454492150-10628-6-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1454492150-10628-1-git-send-email-pclouds@gmail.com>

This is busybox's copy_file() [1] modified to fit in Git. Because this
is busybox, the code is likely POSIX-y (or even Linux-y). Windows
support may not be there yet.

[1] in libbb/copy_file.c from the GPL2+ commit
    f2c043acfcf9dad9fd3d65821b81f89986bbe54e (busybox: fix
    uninitialized memory when displaying IPv6 addresses - 2016-01-18)

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 cache.h |   1 +
 copy.c  | 371 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 372 insertions(+)

diff --git a/cache.h b/cache.h
index c75d13f..3fbb38d 100644
--- a/cache.h
+++ b/cache.h
@@ -1638,6 +1638,7 @@ extern void fprintf_or_die(FILE *, const char *fmt, ...);
 extern int copy_fd(int ifd, int ofd);
 extern int copy_file(const char *dst, const char *src, int mode);
 extern int copy_file_with_time(const char *dst, const char *src, int mode);
+extern int copy_dir_recursively(const char *source, const char *dest);
 
 extern void write_or_die(int fd, const void *buf, size_t count);
 extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);
diff --git a/copy.c b/copy.c
index 574fa1f..c99d6e5 100644
--- a/copy.c
+++ b/copy.c
@@ -1,4 +1,6 @@
 #include "cache.h"
+#include "dir.h"
+#include "hashmap.h"
 
 int copy_fd(int ifd, int ofd)
 {
@@ -65,3 +67,372 @@ int copy_file_with_time(const char *dst, const char *src, int mode)
 		return copy_times(dst, src);
 	return status;
 }
+
+struct inode_key {
+	struct hashmap_entry entry;
+	ino_t ino;
+	dev_t dev;
+	/*
+	 * Reportedly, on cramfs a file and a dir can have same ino.
+	 * Need to also remember "file/dir" bit:
+	 */
+	char isdir; /* bool */
+};
+
+struct inode_value {
+	struct inode_key key;
+	char name[FLEX_ARRAY];
+};
+
+#define HASH_SIZE      311u   /* Should be prime */
+static inline unsigned hash_inode(ino_t i)
+{
+	return i % HASH_SIZE;
+}
+
+static int inode_cmp(const void *entry, const void *entry_or_key,
+		     const void *keydata)
+{
+	const struct inode_value *inode = entry;
+	const struct inode_key   *key   = entry_or_key;
+
+	return !(inode->key.ino   == key->ino &&
+		 inode->key.dev   == key->dev &&
+		 inode->key.isdir == key->isdir);
+}
+
+static const char *is_in_ino_dev_hashtable(const struct hashmap *map,
+					   const struct stat *st)
+{
+	struct inode_key key;
+	struct inode_value *value;
+
+	key.entry.hash = hash_inode(st->st_ino);
+	key.ino	       = st->st_ino;
+	key.dev	       = st->st_dev;
+	key.isdir      = !!S_ISDIR(st->st_mode);
+	value	       = hashmap_get(map, &key, NULL);
+	return value ? value->name : NULL;
+}
+
+static void add_to_ino_dev_hashtable(struct hashmap *map,
+				     const struct stat *st,
+				     const char *path)
+{
+	struct inode_value *v;
+	int len = strlen(path);
+
+	v = xmalloc(offsetof(struct inode_value, name) + len + 1);
+	v->key.entry.hash = hash_inode(st->st_ino);
+	v->key.ino	  = st->st_ino;
+	v->key.dev	  = st->st_dev;
+	v->key.isdir      = !!S_ISDIR(st->st_mode);
+	memcpy(v->name, path, len + 1);
+	hashmap_add(map, v);
+}
+
+/*
+ * Find out if the last character of a string matches the one given.
+ * Don't underrun the buffer if the string length is 0.
+ */
+static inline char *last_char_is(const char *s, int c)
+{
+	if (s && *s) {
+		size_t sz = strlen(s) - 1;
+		s += sz;
+		if ( (unsigned char)*s == c)
+			return (char*)s;
+	}
+	return NULL;
+}
+
+static inline char *concat_path_file(const char *path, const char *filename)
+{
+	struct strbuf sb = STRBUF_INIT;
+	char *lc;
+
+	if (!path)
+		path = "";
+	lc = last_char_is(path, '/');
+	while (*filename == '/')
+		filename++;
+	strbuf_addf(&sb, "%s%s%s", path, (lc==NULL ? "/" : ""), filename);
+	return strbuf_detach(&sb, NULL);
+}
+
+static char *concat_subpath_file(const char *path, const char *f)
+{
+	if (f && is_dot_or_dotdot(f))
+		return NULL;
+	return concat_path_file(path, f);
+}
+
+static int do_unlink(const char *dest)
+{
+	int e = errno;
+
+	if (unlink(dest) < 0) {
+		errno = e; /* do not use errno from unlink */
+		return sys_error(_("can't create '%s'"), dest);
+	}
+	return 0;
+}
+
+/* See busybox.git, libbb/copy_file.c for the original implementation */
+static int copy_dir_1(struct hashmap *inode_map,
+		      const char *source,
+		      const char *dest)
+{
+	/* This is a recursive function, try to minimize stack usage */
+	struct stat source_stat;
+	struct stat dest_stat;
+	int retval = 0;
+	int dest_exists = 0;
+	int ovr;
+
+	if (stat(source, &source_stat) < 0)
+		return sys_error(_("can't stat '%s'"), source);
+
+	if (lstat(dest, &dest_stat) < 0) {
+		if (errno != ENOENT)
+			return sys_error(_("can't stat '%s'"), dest);
+	} else {
+		if (source_stat.st_dev == dest_stat.st_dev &&
+		    source_stat.st_ino == dest_stat.st_ino)
+			return sys_error(_("'%s' and '%s' are the same file"), source, dest);
+		dest_exists = 1;
+	}
+
+	if (S_ISDIR(source_stat.st_mode)) {
+		DIR *dp;
+		const char *tp;
+		struct dirent *d;
+		mode_t saved_umask = 0;
+
+		/* Did we ever create source ourself before? */
+		tp = is_in_ino_dev_hashtable(inode_map, &source_stat);
+		if (tp)
+			/* We did! it's a recursion! man the lifeboats... */
+			return error(_("recursion detected, omitting directory '%s'"),
+				     source);
+
+		if (dest_exists) {
+			if (!S_ISDIR(dest_stat.st_mode))
+				return sys_error(_("target '%s' is not a directory"), dest);
+			/*
+			 * race here: user can substitute a symlink between
+			 * this check and actual creation of files inside dest
+			 */
+		} else {
+			/* Create DEST */
+			mode_t mode;
+			saved_umask = umask(0);
+
+			mode = source_stat.st_mode;
+			/* Allow owner to access new dir (at least for now) */
+			mode |= S_IRWXU;
+			if (mkdir(dest, mode) < 0) {
+				umask(saved_umask);
+				return sys_error(_("can't create directory '%s'"), dest);
+			}
+			umask(saved_umask);
+			/* need stat info for add_to_ino_dev_hashtable */
+			if (lstat(dest, &dest_stat) < 0)
+				return sys_error(_("can't stat '%s'"), dest);
+		}
+
+		/*
+		 * remember (dev,inode) of each created dir. name is
+		 * not remembered
+		 */
+		add_to_ino_dev_hashtable(inode_map, &dest_stat, "");
+
+		/* Recursively copy files in SOURCE */
+		dp = opendir(source);
+		if (!dp) {
+			retval = -1;
+			goto preserve_mode_ugid_time;
+		}
+
+		while ((d = readdir(dp))) {
+			char *new_source, *new_dest;
+
+			new_source = concat_subpath_file(source, d->d_name);
+			if (!new_source)
+				continue;
+			new_dest = concat_path_file(dest, d->d_name);
+			if (copy_dir_1(inode_map, new_source, new_dest) < 0)
+				retval = -1;
+			free(new_source);
+			free(new_dest);
+		}
+		closedir(dp);
+
+		if (!dest_exists &&
+		    chmod(dest, source_stat.st_mode & ~saved_umask) < 0) {
+			sys_error(_("can't preserve permissions of '%s'"), dest);
+			/* retval = -1; - WRONG! copy *WAS* made */
+		}
+		goto preserve_mode_ugid_time;
+	}
+
+	/* "cp [-opts] regular_file thing2" */
+	if (S_ISREG(source_stat.st_mode)) {
+		const char *link_target;
+		int src_fd;
+		int dst_fd;
+		mode_t new_mode;
+
+		if (S_ISLNK(source_stat.st_mode)) {
+			/* "cp -d symlink dst": create a link */
+			goto dont_cat;
+		}
+
+		link_target = is_in_ino_dev_hashtable(inode_map, &source_stat);
+		if (link_target) {
+			if (link(link_target, dest) < 0) {
+				ovr = do_unlink(dest);
+				if (ovr < 0)
+					return ovr;
+				if (link(link_target, dest) < 0) {
+					sys_error(_("can't create link '%s'"), dest);
+					return -1;
+				}
+			}
+			return 0;
+		}
+		add_to_ino_dev_hashtable(inode_map, &source_stat, dest);
+
+		src_fd = open(source, O_RDONLY);
+		if (src_fd < 0)
+			return sys_error(_("can't open '%s'"), source);
+
+		/* Do not try to open with weird mode fields */
+		new_mode = source_stat.st_mode;
+		if (!S_ISREG(source_stat.st_mode))
+			new_mode = 0666;
+
+		dst_fd = open(dest, O_WRONLY|O_CREAT|O_EXCL, new_mode);
+		if (dst_fd == -1) {
+			ovr = do_unlink(dest);
+			if (ovr < 0) {
+				close(src_fd);
+				return ovr;
+			}
+			/* It shouldn't exist. If it exists, do not open (symlink attack?) */
+			dst_fd = open(dest, O_WRONLY|O_CREAT|O_EXCL, new_mode);
+			if (dst_fd < 0) {
+				close(src_fd);
+				return sys_error(_("can't open '%s'"), dest);
+			}
+		}
+
+		switch (copy_fd(src_fd, dst_fd)) {
+		case COPY_READ_ERROR:
+			error(_("copy-fd: read returned %s"), strerror(errno));
+			retval = -1;
+			break;
+		case COPY_WRITE_ERROR:
+			error(_("copy-fd: write returned %s"), strerror(errno));
+			retval = -1;
+			break;
+		}
+
+		/* Careful with writing... */
+		if (close(dst_fd) < 0)
+			retval = sys_error(_("error writing to '%s'"), dest);
+		/* ...but read size is already checked by bb_copyfd_eof */
+		close(src_fd);
+		/*
+		 * "cp /dev/something new_file" should not
+		 * copy mode of /dev/something
+		 */
+		if (!S_ISREG(source_stat.st_mode))
+			return retval;
+		goto preserve_mode_ugid_time;
+	}
+dont_cat:
+
+	/* Source is a symlink or a special file */
+	/* We are lazy here, a bit lax with races... */
+	if (dest_exists) {
+		errno = EEXIST;
+		ovr = do_unlink(dest);
+		if (ovr < 0)
+			return ovr;
+	}
+	if (S_ISLNK(source_stat.st_mode)) {
+		struct strbuf lpath = STRBUF_INIT;
+		if (!strbuf_readlink(&lpath, source, 0)) {
+			int r = symlink(lpath.buf, dest);
+			strbuf_release(&lpath);
+			if (r < 0)
+				return sys_error(_("can't create symlink '%s'"), dest);
+			if (lchown(dest, source_stat.st_uid, source_stat.st_gid) < 0)
+				sys_error(_("can't preserve %s of '%s'"), "ownership", dest);
+		} else {
+			/* EINVAL => "file: Invalid argument" => puzzled user */
+			const char *errmsg = _("not a symlink");
+			int err = errno;
+
+			if (err != EINVAL)
+				errmsg = strerror(err);
+			error(_("%s: cannot read link: %s"), source, errmsg);
+			strbuf_release(&lpath);
+		}
+		/*
+		 * _Not_ jumping to preserve_mode_ugid_time: symlinks
+		 * don't have those
+		 */
+		return 0;
+	}
+	if (S_ISBLK(source_stat.st_mode) ||
+	    S_ISCHR(source_stat.st_mode) ||
+	    S_ISSOCK(source_stat.st_mode) ||
+	    S_ISFIFO(source_stat.st_mode)) {
+		if (mknod(dest, source_stat.st_mode, source_stat.st_rdev) < 0)
+			return sys_error(_("can't create '%s'"), dest);
+	} else
+		return sys_error(_("unrecognized file '%s' with mode %x"),
+				 source, source_stat.st_mode);
+
+preserve_mode_ugid_time:
+
+	if (1 /*FILEUTILS_PRESERVE_STATUS*/) {
+		struct timeval times[2];
+
+		times[1].tv_sec = times[0].tv_sec = source_stat.st_mtime;
+		times[1].tv_usec = times[0].tv_usec = 0;
+		/* BTW, utimes sets usec-precision time - just FYI */
+		if (utimes(dest, times) < 0)
+			sys_error(_("can't preserve %s of '%s'"), "times", dest);
+		if (chown(dest, source_stat.st_uid, source_stat.st_gid) < 0) {
+			source_stat.st_mode &= ~(S_ISUID | S_ISGID);
+			sys_error(_("can't preserve %s of '%s'"), "ownership", dest);
+		}
+		if (chmod(dest, source_stat.st_mode) < 0)
+			sys_error(_("can't preserve %s of '%s'"), "permissions", dest);
+	}
+
+	return retval;
+}
+
+/*
+ * Return:
+ * -1 error, copy not made
+ *  0 copy is made
+ *
+ * Failures to preserve mode/owner/times are not reported in exit
+ * code. No support for preserving SELinux security context. Symlinks
+ * and hardlinks are preserved.
+ */
+int copy_dir_recursively(const char *source, const char *dest)
+{
+	int ret;
+	struct hashmap inode_map;
+
+	hashmap_init(&inode_map, inode_cmp, 1024);
+	ret = copy_dir_1(&inode_map, source, dest);
+	hashmap_free(&inode_map, 1);
+	return ret;
+}
-- 
2.7.0.377.g4cd97dd

  parent reply	other threads:[~2016-02-03  9:36 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-03  9:35 [PATCH 00/20] "git worktree move" preview Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 01/20] usage.c: move format processing out of die_errno() Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 02/20] usage.c: add sys_error() that prints strerror() automatically Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 03/20] path.c: add git_common_path() and strbuf_git_common_path() Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 04/20] path.c: add is_git_path_shared() Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` Nguyễn Thái Ngọc Duy [this message]
2016-02-03  9:35 ` [PATCH 06/20] worktree.c: use is_dot_or_dotdot() Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 07/20] worktree.c: store "id" instead of "git_dir" Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 08/20] worktree.c: add clear_worktree() Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 09/20] worktree.c: add find_worktree_by_path() Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 10/20] worktree.c: add is_main_worktree() Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 11/20] worktree.c: recognize no main worktree Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 12/20] worktree.c: add update_worktree_location() Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 13/20] worktree.c: add update_worktree_gitfile() Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 14/20] worktree.c: add collect_per_worktree_git_paths() Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 15/20] worktree: avoid 0{40}, too many zeroes, hard to read Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 16/20] worktree: simplify prefixing paths Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 17/20] worktree: add "move" commmand Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 18/20] worktree: refactor add_worktree() Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 19/20] worktree: move repo, simple case Nguyễn Thái Ngọc Duy
2016-02-03  9:35 ` [PATCH 20/20] worktree: move repo, convert main worktree Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 00/26] worktree lock, move, remove and unlock Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 01/26] usage.c: move format processing out of die_errno() Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 02/26] usage.c: add sys_error() that prints strerror() automatically Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 03/26] copy.c: import copy_file() from busybox Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 04/26] copy.c: delete unused code in copy_file() Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 05/26] copy.c: convert bb_(p)error_msg to (sys_)error Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 06/26] copy.c: style fix Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 07/26] copy.c: convert copy_file() to copy_dir_recursively() Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 08/26] completion: support git-worktree Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 09/26] git-worktree.txt: keep subcommand listing in alphabetical order Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 10/26] wrapper.c: allow to create an empty file with write_file() Nguyễn Thái Ngọc Duy
2016-02-17 22:29     ` Junio C Hamano
2016-02-18  0:49       ` Duy Nguyen
2016-02-16 13:29   ` [PATCH v2 11/26] path.c: add git_common_path() and strbuf_git_common_path() Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 12/26] worktree.c: use is_dot_or_dotdot() Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 13/26] worktree.c: store "id" instead of "git_dir" Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 14/26] worktree.c: add clear_worktree() Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 15/26] worktree.c: add find_worktree_by_path() Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 16/26] worktree.c: add is_main_worktree() Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 17/26] worktree.c: add validate_worktree() Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 18/26] worktree.c: add update_worktree_location() Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 19/26] worktree.c: add is_worktree_locked() Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 20/26] worktree: avoid 0{40}, too many zeroes, hard to read Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 21/26] worktree: simplify prefixing paths Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 22/26] worktree: add "lock" command Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 23/26] worktree: add "unlock" command Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 24/26] worktree: add "move" commmand Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 25/26] worktree move: accept destination as directory Nguyễn Thái Ngọc Duy
2016-02-16 13:29   ` [PATCH v2 26/26] worktree: add "remove" command Nguyễn Thái Ngọc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1454492150-10628-6-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.