All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 07/25] copy.c: convert copy_file() to copy_dir_recursively()
Date: Wed, 13 Apr 2016 20:15:28 +0700	[thread overview]
Message-ID: <1460553346-12985-8-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1460553346-12985-1-git-send-email-pclouds@gmail.com>

This finally enables busybox's copy_file() code under a new name
(because "copy_file" is already taken in Git code base). Because this
comes from busybox, POSIXy (or even Linuxy) behavior is expected. More
changes may be needed for Windows support.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 cache.h |   1 +
 copy.c  | 216 ++++++++++++++++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 179 insertions(+), 38 deletions(-)

diff --git a/cache.h b/cache.h
index 9f09540..213a8d3 100644
--- a/cache.h
+++ b/cache.h
@@ -1677,6 +1677,7 @@ extern void fprintf_or_die(FILE *, const char *fmt, ...);
 extern int copy_fd(int ifd, int ofd);
 extern int copy_file(const char *dst, const char *src, int mode);
 extern int copy_file_with_time(const char *dst, const char *src, int mode);
+extern int copy_dir_recursively(const char *source, const char *dest);
 
 extern void write_or_die(int fd, const void *buf, size_t count);
 extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);
diff --git a/copy.c b/copy.c
index 00f8349..f04ac87 100644
--- a/copy.c
+++ b/copy.c
@@ -1,4 +1,6 @@
 #include "cache.h"
+#include "dir.h"
+#include "hashmap.h"
 
 int copy_fd(int ifd, int ofd)
 {
@@ -66,21 +68,126 @@ int copy_file_with_time(const char *dst, const char *src, int mode)
 	return status;
 }
 
-#if 0
-/* Return:
- * -1 error, copy not made
- *  0 copy is made or user answered "no" in interactive mode
- *    (failures to preserve mode/owner/times are not reported in exit code)
+struct inode_key {
+	struct hashmap_entry entry;
+	ino_t ino;
+	dev_t dev;
+	/*
+	 * Reportedly, on cramfs a file and a dir can have same ino.
+	 * Need to also remember "file/dir" bit:
+	 */
+	char isdir; /* bool */
+};
+
+struct inode_value {
+	struct inode_key key;
+	char name[FLEX_ARRAY];
+};
+
+#define HASH_SIZE      311u   /* Should be prime */
+static inline unsigned hash_inode(ino_t i)
+{
+	return i % HASH_SIZE;
+}
+
+static int inode_cmp(const void *entry, const void *entry_or_key,
+		     const void *keydata)
+{
+	const struct inode_value *inode = entry;
+	const struct inode_key   *key   = entry_or_key;
+
+	return !(inode->key.ino   == key->ino &&
+		 inode->key.dev   == key->dev &&
+		 inode->key.isdir == key->isdir);
+}
+
+static const char *is_in_ino_dev_hashtable(const struct hashmap *map,
+					   const struct stat *st)
+{
+	struct inode_key key;
+	struct inode_value *value;
+
+	key.entry.hash = hash_inode(st->st_ino);
+	key.ino	       = st->st_ino;
+	key.dev	       = st->st_dev;
+	key.isdir      = !!S_ISDIR(st->st_mode);
+	value	       = hashmap_get(map, &key, NULL);
+	return value ? value->name : NULL;
+}
+
+static void add_to_ino_dev_hashtable(struct hashmap *map,
+				     const struct stat *st,
+				     const char *path)
+{
+	struct inode_value *v;
+	int len = strlen(path);
+
+	v = xmalloc(offsetof(struct inode_value, name) + len + 1);
+	v->key.entry.hash = hash_inode(st->st_ino);
+	v->key.ino	  = st->st_ino;
+	v->key.dev	  = st->st_dev;
+	v->key.isdir      = !!S_ISDIR(st->st_mode);
+	memcpy(v->name, path, len + 1);
+	hashmap_add(map, v);
+}
+
+/*
+ * Find out if the last character of a string matches the one given.
+ * Don't underrun the buffer if the string length is 0.
  */
-int FAST_FUNC copy_file(const char *source, const char *dest, int flags)
+static inline char *last_char_is(const char *s, int c)
+{
+	if (s && *s) {
+		size_t sz = strlen(s) - 1;
+		s += sz;
+		if ( (unsigned char)*s == c)
+			return (char*)s;
+	}
+	return NULL;
+}
+
+static inline char *concat_path_file(const char *path, const char *filename)
+{
+	struct strbuf sb = STRBUF_INIT;
+	char *lc;
+
+	if (!path)
+		path = "";
+	lc = last_char_is(path, '/');
+	while (*filename == '/')
+		filename++;
+	strbuf_addf(&sb, "%s%s%s", path, (lc==NULL ? "/" : ""), filename);
+	return strbuf_detach(&sb, NULL);
+}
+
+static char *concat_subpath_file(const char *path, const char *f)
+{
+	if (f && is_dot_or_dotdot(f))
+		return NULL;
+	return concat_path_file(path, f);
+}
+
+static int do_unlink(const char *dest)
+{
+	int e = errno;
+
+	if (unlink(dest) < 0) {
+		errno = e; /* do not use errno from unlink */
+		return sys_error(_("can't create '%s'"), dest);
+	}
+	return 0;
+}
+
+static int copy_dir_1(struct hashmap *inode_map,
+		      const char *source,
+		      const char *dest)
 {
 	/* This is a recursive function, try to minimize stack usage */
-	/* NB: each struct stat is ~100 bytes */
 	struct stat source_stat;
 	struct stat dest_stat;
-	smallint retval = 0;
-	smallint dest_exists = 0;
-	smallint ovr;
+	int retval = 0;
+	int dest_exists = 0;
+	int ovr;
 
 	if (lstat(source, &source_stat) < 0)
 		return sys_error(_("can't stat '%s'"), source);
@@ -102,7 +209,7 @@ int FAST_FUNC copy_file(const char *source, const char *dest, int flags)
 		mode_t saved_umask = 0;
 
 		/* Did we ever create source ourself before? */
-		tp = is_in_ino_dev_hashtable(&source_stat);
+		tp = is_in_ino_dev_hashtable(inode_map, &source_stat);
 		if (tp)
 			/* We did! it's a recursion! man the lifeboats... */
 			return error(_("recursion detected, omitting directory '%s'"),
@@ -132,11 +239,12 @@ int FAST_FUNC copy_file(const char *source, const char *dest, int flags)
 			if (lstat(dest, &dest_stat) < 0)
 				return sys_error(_("can't stat '%s'"), dest);
 		}
+
 		/*
 		 * remember (dev,inode) of each created dir. name is
 		 * not remembered
 		 */
-		add_to_ino_dev_hashtable(&dest_stat, NULL);
+		add_to_ino_dev_hashtable(inode_map, &dest_stat, "");
 
 		/* Recursively copy files in SOURCE */
 		dp = opendir(source);
@@ -152,7 +260,7 @@ int FAST_FUNC copy_file(const char *source, const char *dest, int flags)
 			if (!new_source)
 				continue;
 			new_dest = concat_path_file(dest, d->d_name);
-			if (copy_file(new_source, new_dest, flags & ~FILEUTILS_DEREFERENCE_L0) < 0)
+			if (copy_dir_1(inode_map, new_source, new_dest) < 0)
 				retval = -1;
 			free(new_source);
 			free(new_dest);
@@ -177,53 +285,57 @@ int FAST_FUNC copy_file(const char *source, const char *dest, int flags)
 			goto dont_cat;
 		}
 
-		if (ENABLE_FEATURE_PRESERVE_HARDLINKS) {
+		if (1 /*ENABLE_FEATURE_PRESERVE_HARDLINKS*/) {
 			const char *link_target;
-			link_target = is_in_ino_dev_hashtable(&source_stat);
+			link_target = is_in_ino_dev_hashtable(inode_map, &source_stat);
 			if (link_target) {
 				if (link(link_target, dest) < 0) {
-					ovr = ask_and_unlink(dest, flags);
-					if (ovr <= 0)
+					ovr = do_unlink(dest);
+					if (ovr < 0)
 						return ovr;
 					if (link(link_target, dest) < 0)
 						return sys_error(_("can't create link '%s'"), dest);
 				}
 				return 0;
 			}
-			add_to_ino_dev_hashtable(&source_stat, dest);
+			add_to_ino_dev_hashtable(inode_map, &source_stat, dest);
 		}
 
-		src_fd = open_or_warn(source, O_RDONLY);
+		src_fd = open(source, O_RDONLY);
 		if (src_fd < 0)
-			return -1;
+			return sys_error(_("can't open '%s'"), source);
 
 		/* Do not try to open with weird mode fields */
 		new_mode = source_stat.st_mode;
 		if (!S_ISREG(source_stat.st_mode))
 			new_mode = 0666;
 
-		/* POSIX way is a security problem versus (sym)link attacks */
-		if (!ENABLE_FEATURE_NON_POSIX_CP) {
-			dst_fd = open(dest, O_WRONLY|O_CREAT|O_TRUNC, new_mode);
-		} else { /* safe way: */
-			dst_fd = open(dest, O_WRONLY|O_CREAT|O_EXCL, new_mode);
-		}
+		dst_fd = open(dest, O_WRONLY|O_CREAT|O_EXCL, new_mode);
 		if (dst_fd == -1) {
-			ovr = ask_and_unlink(dest, flags);
-			if (ovr <= 0) {
+			ovr = do_unlink(dest);
+			if (ovr < 0) {
 				close(src_fd);
 				return ovr;
 			}
 			/* It shouldn't exist. If it exists, do not open (symlink attack?) */
-			dst_fd = open3_or_warn(dest, O_WRONLY|O_CREAT|O_EXCL, new_mode);
+			dst_fd = open(dest, O_WRONLY|O_CREAT|O_EXCL, new_mode);
 			if (dst_fd < 0) {
 				close(src_fd);
-				return -1;
+				return sys_error(_("can't open '%s'"), dest);
 			}
 		}
 
-		if (bb_copyfd_eof(src_fd, dst_fd) == -1)
+		switch (copy_fd(src_fd, dst_fd)) {
+		case COPY_READ_ERROR:
+			error(_("copy-fd: read returned %s"), strerror(errno));
 			retval = -1;
+			break;
+		case COPY_WRITE_ERROR:
+			error(_("copy-fd: write returned %s"), strerror(errno));
+			retval = -1;
+			break;
+		}
+
 		/* Careful with writing... */
 		if (close(dst_fd) < 0)
 			retval = sys_error(_("error writing to '%s'"), dest);
@@ -243,19 +355,28 @@ dont_cat:
 	/* We are lazy here, a bit lax with races... */
 	if (dest_exists) {
 		errno = EEXIST;
-		ovr = ask_and_unlink(dest, flags);
-		if (ovr <= 0)
+		ovr = do_unlink(dest);
+		if (ovr < 0)
 			return ovr;
 	}
 	if (S_ISLNK(source_stat.st_mode)) {
-		char *lpath = xmalloc_readlink_or_warn(source);
-		if (lpath) {
-			int r = symlink(lpath, dest);
-			free(lpath);
+		struct strbuf lpath = STRBUF_INIT;
+		if (!strbuf_readlink(&lpath, source, 0)) {
+			int r = symlink(lpath.buf, dest);
+			strbuf_release(&lpath);
 			if (r < 0)
 				return sys_error(_("can't create symlink '%s'"), dest);
 			if (lchown(dest, source_stat.st_uid, source_stat.st_gid) < 0)
 				sys_error(_("can't preserve %s of '%s'"), "ownership", dest);
+		} else {
+			/* EINVAL => "file: Invalid argument" => puzzled user */
+			const char *errmsg = _("not a symlink");
+			int err = errno;
+
+			if (err != EINVAL)
+				errmsg = strerror(err);
+			error(_("%s: cannot read link: %s"), source, errmsg);
+			strbuf_release(&lpath);
 		}
 		/*
 		 * _Not_ jumping to preserve_mode_ugid_time: symlinks
@@ -293,4 +414,23 @@ preserve_mode_ugid_time:
 
 	return retval;
 }
-#endif
+
+/*
+ * Return:
+ * -1 error, copy not made
+ *  0 copy is made
+ *
+ * Failures to preserve mode/owner/times are not reported in exit
+ * code. No support for preserving SELinux security context. Symlinks
+ * and hardlinks are preserved.
+ */
+int copy_dir_recursively(const char *source, const char *dest)
+{
+	int ret;
+	struct hashmap inode_map;
+
+	hashmap_init(&inode_map, inode_cmp, 1024);
+	ret = copy_dir_1(&inode_map, source, dest);
+	hashmap_free(&inode_map, 1);
+	return ret;
+}
-- 
2.8.0.rc0.210.gd302cd2

  parent reply	other threads:[~2016-04-13 13:16 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-13 13:15 [PATCH 00/25] worktree lock, move, remove and unlock Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 01/25] usage.c: move format processing out of die_errno() Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 02/25] usage.c: add sys_error() that prints strerror() automatically Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 03/25] copy.c: import copy_file() from busybox Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 04/25] copy.c: delete unused code in copy_file() Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 05/25] copy.c: convert bb_(p)error_msg to (sys_)error Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 06/25] copy.c: style fix Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` Nguyễn Thái Ngọc Duy [this message]
2016-04-13 13:15 ` [PATCH 08/25] completion: support git-worktree Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 09/25] git-worktree.txt: keep subcommand listing in alphabetical order Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 10/25] path.c: add git_common_path() and strbuf_git_common_path() Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 11/25] worktree.c: use is_dot_or_dotdot() Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 12/25] worktree.c: store "id" instead of "git_dir" Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 13/25] worktree.c: add clear_worktree() Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 14/25] worktree.c: add find_worktree_by_path() Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 15/25] worktree.c: add is_main_worktree() Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 16/25] worktree.c: add validate_worktree() Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 17/25] worktree.c: add update_worktree_location() Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 18/25] worktree.c: add is_worktree_locked() Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 19/25] worktree: avoid 0{40}, too many zeroes, hard to read Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 20/25] worktree: simplify prefixing paths Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 21/25] worktree: add "lock" command Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 22/25] worktree: add "unlock" command Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 23/25] worktree: add "move" commmand Nguyễn Thái Ngọc Duy
2016-04-13 13:15 ` [PATCH 24/25] worktree move: accept destination as directory Nguyễn Thái Ngọc Duy
2016-05-11  4:43   ` Eric Sunshine
2016-05-11 13:34     ` Duy Nguyen
2016-05-11 17:32       ` Eric Sunshine
2016-05-11 18:32         ` Johannes Sixt
2016-05-11 21:34           ` Junio C Hamano
2016-05-12  5:58             ` Johannes Sixt
2016-04-13 13:15 ` [PATCH 25/25] worktree: add "remove" command Nguyễn Thái Ngọc Duy
2016-04-14 16:08 ` [PATCH 00/25] worktree lock, move, remove and unlock Junio C Hamano
2016-04-15  0:40   ` Duy Nguyen
2016-04-15  1:21     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1460553346-12985-8-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.