All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] ksmbd: remove internal.h include
@ 2022-04-27  2:32 Namjae Jeon
  2022-04-27  2:32 ` [PATCH 2/3] fs: introduce lock_rename_child() helper Namjae Jeon
  2022-04-27  2:32 ` [PATCH 3/3] ksmbd: fix racy issue from using ->d_parent and ->d_name Namjae Jeon
  0 siblings, 2 replies; 7+ messages in thread
From: Namjae Jeon @ 2022-04-27  2:32 UTC (permalink / raw)
  To: linux-cifs, linux-fsdevel
  Cc: smfrench, hyc.lee, senozhatsky, Namjae Jeon, Al Viro, Steve French

Since vfs_path_lookup is exported, It should not be internal.
Move vfs_path_lookup prototype in internal.h to linux/namei.h.

Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/internal.h         | 2 --
 fs/ksmbd/vfs.c        | 2 --
 include/linux/namei.h | 2 ++
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index 08503dc68d2b..a73eda6f9f63 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -58,8 +58,6 @@ extern int finish_clean_context(struct fs_context *fc);
  */
 extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
 			   struct path *path, struct path *root);
-extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
-			   const char *, unsigned int, struct path *);
 int do_rmdir(int dfd, struct filename *name);
 int do_unlinkat(int dfd, struct filename *name);
 int may_linkat(struct user_namespace *mnt_userns, struct path *link);
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index dcdd07c6efff..f79dfcebe480 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -18,8 +18,6 @@
 #include <linux/sched/xacct.h>
 #include <linux/crc32c.h>
 
-#include "../internal.h"	/* for vfs_path_lookup */
-
 #include "glob.h"
 #include "oplock.h"
 #include "connection.h"
diff --git a/include/linux/namei.h b/include/linux/namei.h
index e89329bb3134..4858c3cdf7c6 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -63,6 +63,8 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne
 extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
 extern void done_path_create(struct path *, struct dentry *);
 extern struct dentry *kern_path_locked(const char *, struct path *);
+int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *,
+		    unsigned int, struct path *);
 
 extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int);
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] fs: introduce lock_rename_child() helper
  2022-04-27  2:32 [PATCH 1/3] ksmbd: remove internal.h include Namjae Jeon
@ 2022-04-27  2:32 ` Namjae Jeon
  2022-04-27  2:32 ` [PATCH 3/3] ksmbd: fix racy issue from using ->d_parent and ->d_name Namjae Jeon
  1 sibling, 0 replies; 7+ messages in thread
From: Namjae Jeon @ 2022-04-27  2:32 UTC (permalink / raw)
  To: linux-cifs, linux-fsdevel
  Cc: smfrench, hyc.lee, senozhatsky, Al Viro, Namjae Jeon

From: Al Viro <viro@zeniv.linux.org.uk>

Pass the dentry of a source file and the dentry of a destination directory
to lock parent inodes for rename. As soon as this function returns,
->d_parent of the source file dentry is stable and inodes are properly
locked for calling vfs-rename. This helper is needed for ksmbd server.
rename request of SMB protocol has to rename an opened file, no matter
which directory it's in.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/namei.c            | 46 ++++++++++++++++++++++++++++++++-----------
 include/linux/namei.h |  1 +
 2 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 3f1829b3ab5b..516b8d147744 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2954,20 +2954,10 @@ static inline int may_create(struct user_namespace *mnt_userns,
 	return inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
 }
 
-/*
- * p1 and p2 should be directories on the same fs.
- */
-struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
+static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
 {
 	struct dentry *p;
 
-	if (p1 == p2) {
-		inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
-		return NULL;
-	}
-
-	mutex_lock(&p1->d_sb->s_vfs_rename_mutex);
-
 	p = d_ancestor(p2, p1);
 	if (p) {
 		inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
@@ -2986,8 +2976,42 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 	inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
 	return NULL;
 }
+
+/*
+ * p1 and p2 should be directories on the same fs.
+ */
+struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
+{
+	if (p1 == p2) {
+		inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+		return NULL;
+	}
+
+	mutex_lock(&p1->d_sb->s_vfs_rename_mutex);
+	return lock_two_directories(p1, p2);
+}
 EXPORT_SYMBOL(lock_rename);
 
+struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2)
+{
+	if (READ_ONCE(c1->d_parent) == p2) {
+		inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
+		if (likely(c1->d_parent == p2))
+			return NULL;
+
+		inode_unlock(p2->d_inode);
+	}
+
+	mutex_lock(&c1->d_sb->s_vfs_rename_mutex);
+	if (likely(c1->d_parent != p2))
+		return lock_two_directories(c1->d_parent, p2);
+
+	inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
+	mutex_unlock(&c1->d_sb->s_vfs_rename_mutex);
+	return NULL;
+}
+EXPORT_SYMBOL(lock_rename_child);
+
 void unlock_rename(struct dentry *p1, struct dentry *p2)
 {
 	inode_unlock(p1->d_inode);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 4858c3cdf7c6..e6949d4ba188 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -77,6 +77,7 @@ extern int follow_down(struct path *);
 extern int follow_up(struct path *);
 
 extern struct dentry *lock_rename(struct dentry *, struct dentry *);
+extern struct dentry *lock_rename_child(struct dentry *, struct dentry *);
 extern void unlock_rename(struct dentry *, struct dentry *);
 
 extern int __must_check nd_jump_link(struct path *path);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] ksmbd: fix racy issue from using ->d_parent and ->d_name
  2022-04-27  2:32 [PATCH 1/3] ksmbd: remove internal.h include Namjae Jeon
  2022-04-27  2:32 ` [PATCH 2/3] fs: introduce lock_rename_child() helper Namjae Jeon
@ 2022-04-27  2:32 ` Namjae Jeon
  2022-05-15 11:09   ` Namjae Jeon
  2022-05-17 21:21   ` Al Viro
  1 sibling, 2 replies; 7+ messages in thread
From: Namjae Jeon @ 2022-04-27  2:32 UTC (permalink / raw)
  To: linux-cifs, linux-fsdevel
  Cc: smfrench, hyc.lee, senozhatsky, Namjae Jeon, Al Viro

Al pointed out that ksmbd has racy issue from using ->d_parent and ->d_name
in ksmbd_vfs_unlink and smb2_vfs_rename(). and use new lock_rename_child()
to lock stable parent while underlying rename racy.
Introduce vfs_path_parent_lookup helper to avoid out of share access and
export vfs functions like the following ones to use
vfs_path_parent_lookup().
 - export __lookup_hash().
 - export getname_kernel() and putname().

vfs_path_parent_lookup() is used for parent lookup of destination file
using absolute pathname given from FILE_RENAME_INFORMATION request.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/ksmbd/smb2pdu.c    | 105 ++-------------
 fs/ksmbd/vfs.c        | 302 ++++++++++++++++--------------------------
 fs/ksmbd/vfs.h        |  10 +-
 fs/ksmbd/vfs_cache.c  |   5 +-
 fs/namei.c            |  41 +++++-
 include/linux/namei.h |   5 +
 6 files changed, 172 insertions(+), 296 deletions(-)

diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index 16c803a9d996..d3e29f60fa2e 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -5362,44 +5362,19 @@ int smb2_echo(struct ksmbd_work *work)
 
 static int smb2_rename(struct ksmbd_work *work,
 		       struct ksmbd_file *fp,
-		       struct user_namespace *user_ns,
 		       struct smb2_file_rename_info *file_info,
 		       struct nls_table *local_nls)
 {
 	struct ksmbd_share_config *share = fp->tcon->share_conf;
-	char *new_name = NULL, *abs_oldname = NULL, *old_name = NULL;
-	char *pathname = NULL;
-	struct path path;
-	bool file_present = true;
-	int rc;
+	char *new_name = NULL;
+	int rc, flags = 0;
 
 	ksmbd_debug(SMB, "setting FILE_RENAME_INFO\n");
-	pathname = kmalloc(PATH_MAX, GFP_KERNEL);
-	if (!pathname)
-		return -ENOMEM;
-
-	abs_oldname = d_path(&fp->filp->f_path, pathname, PATH_MAX);
-	if (IS_ERR(abs_oldname)) {
-		rc = -EINVAL;
-		goto out;
-	}
-	old_name = strrchr(abs_oldname, '/');
-	if (old_name && old_name[1] != '\0') {
-		old_name++;
-	} else {
-		ksmbd_debug(SMB, "can't get last component in path %s\n",
-			    abs_oldname);
-		rc = -ENOENT;
-		goto out;
-	}
-
 	new_name = smb2_get_name(file_info->FileName,
 				 le32_to_cpu(file_info->FileNameLength),
 				 local_nls);
-	if (IS_ERR(new_name)) {
-		rc = PTR_ERR(new_name);
-		goto out;
-	}
+	if (IS_ERR(new_name))
+		return PTR_ERR(new_name);
 
 	if (strchr(new_name, ':')) {
 		int s_type;
@@ -5425,7 +5400,7 @@ static int smb2_rename(struct ksmbd_work *work,
 		if (rc)
 			goto out;
 
-		rc = ksmbd_vfs_setxattr(user_ns,
+		rc = ksmbd_vfs_setxattr(file_mnt_user_ns(fp->filp),
 					fp->filp->f_path.dentry,
 					xattr_stream_name,
 					NULL, 0, 0);
@@ -5440,47 +5415,18 @@ static int smb2_rename(struct ksmbd_work *work,
 	}
 
 	ksmbd_debug(SMB, "new name %s\n", new_name);
-	rc = ksmbd_vfs_kern_path(work, new_name, LOOKUP_NO_SYMLINKS, &path, 1);
-	if (rc) {
-		if (rc != -ENOENT)
-			goto out;
-		file_present = false;
-	} else {
-		path_put(&path);
-	}
-
 	if (ksmbd_share_veto_filename(share, new_name)) {
 		rc = -ENOENT;
 		ksmbd_debug(SMB, "Can't rename vetoed file: %s\n", new_name);
 		goto out;
 	}
 
-	if (file_info->ReplaceIfExists) {
-		if (file_present) {
-			rc = ksmbd_vfs_remove_file(work, new_name);
-			if (rc) {
-				if (rc != -ENOTEMPTY)
-					rc = -EINVAL;
-				ksmbd_debug(SMB, "cannot delete %s, rc %d\n",
-					    new_name, rc);
-				goto out;
-			}
-		}
-	} else {
-		if (file_present &&
-		    strncmp(old_name, path.dentry->d_name.name, strlen(old_name))) {
-			rc = -EEXIST;
-			ksmbd_debug(SMB,
-				    "cannot rename already existing file\n");
-			goto out;
-		}
-	}
+	if (!file_info->ReplaceIfExists)
+		flags = RENAME_NOREPLACE;
 
-	rc = ksmbd_vfs_fp_rename(work, fp, new_name);
+	rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags);
 out:
-	kfree(pathname);
-	if (!IS_ERR(new_name))
-		kfree(new_name);
+	kfree(new_name);
 	return rc;
 }
 
@@ -5728,12 +5674,6 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
 			   struct smb2_file_rename_info *rename_info,
 			   unsigned int buf_len)
 {
-	struct user_namespace *user_ns;
-	struct ksmbd_file *parent_fp;
-	struct dentry *parent;
-	struct dentry *dentry = fp->filp->f_path.dentry;
-	int ret;
-
 	if (!(fp->daccess & FILE_DELETE_LE)) {
 		pr_err("no right to delete : 0x%x\n", fp->daccess);
 		return -EACCES;
@@ -5743,32 +5683,7 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
 			le32_to_cpu(rename_info->FileNameLength))
 		return -EINVAL;
 
-	user_ns = file_mnt_user_ns(fp->filp);
-	if (ksmbd_stream_fd(fp))
-		goto next;
-
-	parent = dget_parent(dentry);
-	ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
-	if (ret) {
-		dput(parent);
-		return ret;
-	}
-
-	parent_fp = ksmbd_lookup_fd_inode(d_inode(parent));
-	inode_unlock(d_inode(parent));
-	dput(parent);
-
-	if (parent_fp) {
-		if (parent_fp->daccess & FILE_DELETE_LE) {
-			pr_err("parent dir is opened with delete access\n");
-			ksmbd_fd_put(work, parent_fp);
-			return -ESHARE;
-		}
-		ksmbd_fd_put(work, parent_fp);
-	}
-next:
-	return smb2_rename(work, fp, user_ns, rename_info,
-			   work->sess->conn->local_nls);
+	return smb2_rename(work, fp, rename_info, work->sess->conn->local_nls);
 }
 
 static int set_file_disposition_info(struct ksmbd_file *fp,
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index f79dfcebe480..93d3625bf6d1 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -17,6 +17,7 @@
 #include <linux/vmalloc.h>
 #include <linux/sched/xacct.h>
 #include <linux/crc32c.h>
+#include <linux/namei.h>
 
 #include "glob.h"
 #include "oplock.h"
@@ -34,19 +35,6 @@
 #include "mgmt/user_session.h"
 #include "mgmt/user_config.h"
 
-static char *extract_last_component(char *path)
-{
-	char *p = strrchr(path, '/');
-
-	if (p && p[1] != '\0') {
-		*p = '\0';
-		p++;
-	} else {
-		p = NULL;
-	}
-	return p;
-}
-
 static void ksmbd_vfs_inherit_owner(struct ksmbd_work *work,
 				    struct inode *parent_inode,
 				    struct inode *inode)
@@ -60,38 +48,20 @@ static void ksmbd_vfs_inherit_owner(struct ksmbd_work *work,
 
 /**
  * ksmbd_vfs_lock_parent() - lock parent dentry if it is stable
- *
- * the parent dentry got by dget_parent or @parent could be
- * unstable, we try to lock a parent inode and lookup the
- * child dentry again.
- *
- * the reference count of @parent isn't incremented.
  */
-int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry *parent,
-			  struct dentry *child)
+struct dentry *ksmbd_vfs_lock_parent(struct dentry *child)
 {
-	struct dentry *dentry;
-	int ret = 0;
+	struct dentry *parent;
 
+	parent = dget(child->d_parent);
 	inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
-	dentry = lookup_one(user_ns, child->d_name.name, parent,
-			    child->d_name.len);
-	if (IS_ERR(dentry)) {
-		ret = PTR_ERR(dentry);
-		goto out_err;
-	}
-
-	if (dentry != child) {
-		ret = -ESTALE;
-		dput(dentry);
-		goto out_err;
+	if (child->d_parent != parent) {
+		dput(parent);
+		inode_unlock(d_inode(parent));
+		return ERR_PTR(-EINVAL);
 	}
 
-	dput(dentry);
-	return 0;
-out_err:
-	inode_unlock(d_inode(parent));
-	return ret;
+	return parent;
 }
 
 int ksmbd_vfs_may_delete(struct user_namespace *user_ns,
@@ -100,12 +70,9 @@ int ksmbd_vfs_may_delete(struct user_namespace *user_ns,
 	struct dentry *parent;
 	int ret;
 
-	parent = dget_parent(dentry);
-	ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
-	if (ret) {
-		dput(parent);
-		return ret;
-	}
+	parent = ksmbd_vfs_lock_parent(dentry);
+	if (IS_ERR(parent))
+		return PTR_ERR(parent);
 
 	ret = inode_permission(user_ns, d_inode(parent),
 			       MAY_EXEC | MAY_WRITE);
@@ -135,12 +102,9 @@ int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
 	if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_EXEC))
 		*daccess |= FILE_EXECUTE_LE;
 
-	parent = dget_parent(dentry);
-	ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
-	if (ret) {
-		dput(parent);
-		return ret;
-	}
+	parent = ksmbd_vfs_lock_parent(dentry);
+	if (IS_ERR(parent))
+		return PTR_ERR(parent);
 
 	if (!inode_permission(user_ns, d_inode(parent), MAY_EXEC | MAY_WRITE))
 		*daccess |= FILE_DELETE_LE;
@@ -599,14 +563,11 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name)
 		return err;
 	}
 
-	user_ns = mnt_user_ns(path.mnt);
-	parent = dget_parent(path.dentry);
-	err = ksmbd_vfs_lock_parent(user_ns, parent, path.dentry);
-	if (err) {
-		dput(parent);
+	parent = ksmbd_vfs_lock_parent(path.dentry);
+	if (IS_ERR(parent)) {
 		path_put(&path);
 		ksmbd_revert_fsids(work);
-		return err;
+		return PTR_ERR(parent);
 	}
 
 	if (!d_inode(path.dentry)->i_nlink) {
@@ -614,6 +575,7 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name)
 		goto out_err;
 	}
 
+	user_ns = mnt_user_ns(path.mnt);
 	if (S_ISDIR(d_inode(path.dentry)->i_mode)) {
 		err = vfs_rmdir(user_ns, d_inode(parent), path.dentry);
 		if (err && err != -ENOTEMPTY)
@@ -688,149 +650,114 @@ int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname,
 	return err;
 }
 
-static int ksmbd_validate_entry_in_use(struct dentry *src_dent)
+int ksmbd_vfs_rename(struct ksmbd_work *work, struct path *old_path,
+		     char *newname, int flags)
 {
-	struct dentry *dst_dent;
-
-	spin_lock(&src_dent->d_lock);
-	list_for_each_entry(dst_dent, &src_dent->d_subdirs, d_child) {
-		struct ksmbd_file *child_fp;
+	struct dentry *old_parent, *new_dentry, *trap;
+	struct dentry *old_child = old_path->dentry;
+	struct path new_path;
+	struct qstr new_last;
+	struct renamedata rd;
+	struct filename *to;
+	struct ksmbd_share_config *share_conf = work->tcon->share_conf;
+	struct ksmbd_file *parent_fp;
+	int new_type;
+	int err, lookup_flags = LOOKUP_NO_SYMLINKS;
 
-		if (d_really_is_negative(dst_dent))
-			continue;
+	if (ksmbd_override_fsids(work))
+		return -ENOMEM;
 
-		child_fp = ksmbd_lookup_fd_inode(d_inode(dst_dent));
-		if (child_fp) {
-			spin_unlock(&src_dent->d_lock);
-			ksmbd_debug(VFS, "Forbid rename, sub file/dir is in use\n");
-			return -EACCES;
-		}
+	to = getname_kernel(newname);
+	if (IS_ERR(to)) {
+		err = PTR_ERR(to);
+		goto revert_fsids;
 	}
-	spin_unlock(&src_dent->d_lock);
 
-	return 0;
-}
+retry:
+	err = vfs_path_parent_lookup(to, lookup_flags | LOOKUP_BENEATH,
+				     &new_path, &new_last, &new_type,
+				     &share_conf->vfs_path);
+	if (err)
+		goto out1;
 
-static int __ksmbd_vfs_rename(struct ksmbd_work *work,
-			      struct user_namespace *src_user_ns,
-			      struct dentry *src_dent_parent,
-			      struct dentry *src_dent,
-			      struct user_namespace *dst_user_ns,
-			      struct dentry *dst_dent_parent,
-			      struct dentry *trap_dent,
-			      char *dst_name)
-{
-	struct dentry *dst_dent;
-	int err;
+	if (old_path->mnt != new_path.mnt) {
+		err = -EXDEV;
+		goto out2;
+	}
 
-	if (!work->tcon->posix_extensions) {
-		err = ksmbd_validate_entry_in_use(src_dent);
-		if (err)
-			return err;
+	trap = lock_rename_child(old_child, new_path.dentry);
+
+	old_parent = dget(old_child->d_parent);
+	if (d_unhashed(old_child)) {
+		err = -EINVAL;
+		goto out3;
 	}
 
-	if (d_really_is_negative(src_dent_parent))
-		return -ENOENT;
-	if (d_really_is_negative(dst_dent_parent))
-		return -ENOENT;
-	if (d_really_is_negative(src_dent))
-		return -ENOENT;
-	if (src_dent == trap_dent)
-		return -EINVAL;
+	parent_fp = ksmbd_lookup_fd_inode(d_inode(old_child->d_parent));
+	if (parent_fp) {
+		if (parent_fp->daccess & FILE_DELETE_LE) {
+			pr_err("parent dir is opened with delete access\n");
+			err = -ESHARE;
+			ksmbd_fd_put(work, parent_fp);
+			goto out3;
+		}
+		ksmbd_fd_put(work, parent_fp);
+	}
 
-	if (ksmbd_override_fsids(work))
-		return -ENOMEM;
+	new_dentry = __lookup_hash(&new_last, new_path.dentry,
+				   lookup_flags | LOOKUP_RENAME_TARGET);
+	if (IS_ERR(new_dentry)) {
+		err = PTR_ERR(new_dentry);
+		goto out3;
+	}
 
-	dst_dent = lookup_one(dst_user_ns, dst_name, dst_dent_parent,
-			      strlen(dst_name));
-	err = PTR_ERR(dst_dent);
-	if (IS_ERR(dst_dent)) {
-		pr_err("lookup failed %s [%d]\n", dst_name, err);
-		goto out;
+	if (d_is_symlink(new_dentry)) {
+		err = -EACCES;
+		goto out4;
 	}
 
-	err = -ENOTEMPTY;
-	if (dst_dent != trap_dent && !d_really_is_positive(dst_dent)) {
-		struct renamedata rd = {
-			.old_mnt_userns	= src_user_ns,
-			.old_dir	= d_inode(src_dent_parent),
-			.old_dentry	= src_dent,
-			.new_mnt_userns	= dst_user_ns,
-			.new_dir	= d_inode(dst_dent_parent),
-			.new_dentry	= dst_dent,
-		};
-		err = vfs_rename(&rd);
+	if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) {
+		err = -EEXIST;
+		goto out4;
 	}
-	if (err)
-		pr_err("vfs_rename failed err %d\n", err);
-	if (dst_dent)
-		dput(dst_dent);
-out:
-	ksmbd_revert_fsids(work);
-	return err;
-}
 
-int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
-			char *newname)
-{
-	struct user_namespace *user_ns;
-	struct path dst_path;
-	struct dentry *src_dent_parent, *dst_dent_parent;
-	struct dentry *src_dent, *trap_dent, *src_child;
-	char *dst_name;
-	int err;
+	if (old_child == trap) {
+		err = -EINVAL;
+		goto out4;
+	}
 
-	dst_name = extract_last_component(newname);
-	if (!dst_name) {
-		dst_name = newname;
-		newname = "";
+	if (new_dentry == trap) {
+		err = -ENOTEMPTY;
+		goto out4;
 	}
 
-	src_dent_parent = dget_parent(fp->filp->f_path.dentry);
-	src_dent = fp->filp->f_path.dentry;
+	rd.old_mnt_userns	= mnt_user_ns(old_path->mnt),
+	rd.old_dir		= d_inode(old_parent),
+	rd.old_dentry		= old_child,
+	rd.new_mnt_userns	= mnt_user_ns(new_path.mnt),
+	rd.new_dir		= new_path.dentry->d_inode,
+	rd.new_dentry		= new_dentry,
+	rd.flags		= flags,
+	err = vfs_rename(&rd);
+	if (err)
+		ksmbd_debug(VFS, "vfs_rename failed err %d\n", err);
 
-	err = ksmbd_vfs_kern_path(work, newname,
-				  LOOKUP_NO_SYMLINKS | LOOKUP_DIRECTORY,
-				  &dst_path, false);
-	if (err) {
-		ksmbd_debug(VFS, "Cannot get path for %s [%d]\n", newname, err);
-		goto out;
+out4:
+	dput(new_dentry);
+out3:
+	dput(old_parent);
+	unlock_rename(old_parent, new_path.dentry);
+out2:
+	path_put(&new_path);
+
+	if (retry_estale(err, lookup_flags)) {
+		lookup_flags |= LOOKUP_REVAL;
+		goto retry;
 	}
-	dst_dent_parent = dst_path.dentry;
-
-	trap_dent = lock_rename(src_dent_parent, dst_dent_parent);
-	dget(src_dent);
-	dget(dst_dent_parent);
-	user_ns = file_mnt_user_ns(fp->filp);
-	src_child = lookup_one(user_ns, src_dent->d_name.name, src_dent_parent,
-			       src_dent->d_name.len);
-	if (IS_ERR(src_child)) {
-		err = PTR_ERR(src_child);
-		goto out_lock;
-	}
-
-	if (src_child != src_dent) {
-		err = -ESTALE;
-		dput(src_child);
-		goto out_lock;
-	}
-	dput(src_child);
-
-	err = __ksmbd_vfs_rename(work,
-				 user_ns,
-				 src_dent_parent,
-				 src_dent,
-				 mnt_user_ns(dst_path.mnt),
-				 dst_dent_parent,
-				 trap_dent,
-				 dst_name);
-out_lock:
-	dput(src_dent);
-	dput(dst_dent_parent);
-	unlock_rename(src_dent_parent, dst_dent_parent);
-	path_put(&dst_path);
-out:
-	dput(src_dent_parent);
+out1:
+	putname(to);
+revert_fsids:
+	ksmbd_revert_fsids(work);
 	return err;
 }
 
@@ -1079,14 +1006,17 @@ int ksmbd_vfs_remove_xattr(struct user_namespace *user_ns,
 	return vfs_removexattr(user_ns, dentry, attr_name);
 }
 
-int ksmbd_vfs_unlink(struct user_namespace *user_ns,
-		     struct dentry *dir, struct dentry *dentry)
+int ksmbd_vfs_unlink(struct file *filp)
 {
 	int err = 0;
+	struct dentry *dir, *dentry = filp->f_path.dentry;
+	struct user_namespace *user_ns = file_mnt_user_ns(filp);
 
-	err = ksmbd_vfs_lock_parent(user_ns, dir, dentry);
-	if (err)
-		return err;
+	dir = ksmbd_vfs_lock_parent(dentry);
+	if (IS_ERR(dir)) {
+		err = PTR_ERR(dir);
+		goto out;
+	}
 	dget(dentry);
 
 	if (S_ISDIR(d_inode(dentry)->i_mode))
@@ -1098,6 +1028,8 @@ int ksmbd_vfs_unlink(struct user_namespace *user_ns,
 	inode_unlock(d_inode(dir));
 	if (err)
 		ksmbd_debug(VFS, "failed to delete, err %d\n", err);
+out:
+	dput(dir);
 
 	return err;
 }
diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
index 8c37aaf936ab..f79e73b6f01b 100644
--- a/fs/ksmbd/vfs.h
+++ b/fs/ksmbd/vfs.h
@@ -69,8 +69,7 @@ struct ksmbd_kstat {
 	__le32			file_attributes;
 };
 
-int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry *parent,
-			  struct dentry *child);
+struct dentry *ksmbd_vfs_lock_parent(struct dentry *child);
 int ksmbd_vfs_may_delete(struct user_namespace *user_ns, struct dentry *dentry);
 int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
 				   struct dentry *dentry, __le32 *daccess);
@@ -86,8 +85,8 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name);
 int ksmbd_vfs_link(struct ksmbd_work *work,
 		   const char *oldname, const char *newname);
 int ksmbd_vfs_getattr(struct path *path, struct kstat *stat);
-int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
-			char *newname);
+int ksmbd_vfs_rename(struct ksmbd_work *work, struct path *old_path,
+		     char *newname, int flags);
 int ksmbd_vfs_truncate(struct ksmbd_work *work,
 		       struct ksmbd_file *fp, loff_t size);
 struct srv_copychunk;
@@ -129,8 +128,7 @@ struct file_allocated_range_buffer;
 int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
 			 struct file_allocated_range_buffer *ranges,
 			 unsigned int in_count, unsigned int *out_count);
-int ksmbd_vfs_unlink(struct user_namespace *user_ns,
-		     struct dentry *dir, struct dentry *dentry);
+int ksmbd_vfs_unlink(struct file *filp);
 void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat);
 int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
 				struct user_namespace *user_ns,
diff --git a/fs/ksmbd/vfs_cache.c b/fs/ksmbd/vfs_cache.c
index c4d59d2735f0..df600eb04552 100644
--- a/fs/ksmbd/vfs_cache.c
+++ b/fs/ksmbd/vfs_cache.c
@@ -243,7 +243,6 @@ void ksmbd_release_inode_hash(void)
 
 static void __ksmbd_inode_close(struct ksmbd_file *fp)
 {
-	struct dentry *dir, *dentry;
 	struct ksmbd_inode *ci = fp->f_ci;
 	int err;
 	struct file *filp;
@@ -262,11 +261,9 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp)
 	if (atomic_dec_and_test(&ci->m_count)) {
 		write_lock(&ci->m_lock);
 		if (ci->m_flags & (S_DEL_ON_CLS | S_DEL_PENDING)) {
-			dentry = filp->f_path.dentry;
-			dir = dentry->d_parent;
 			ci->m_flags &= ~(S_DEL_ON_CLS | S_DEL_PENDING);
 			write_unlock(&ci->m_lock);
-			ksmbd_vfs_unlink(file_mnt_user_ns(filp), dir, dentry);
+			ksmbd_vfs_unlink(filp);
 			write_lock(&ci->m_lock);
 		}
 		write_unlock(&ci->m_lock);
diff --git a/fs/namei.c b/fs/namei.c
index 516b8d147744..1c0de16f9cb9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -252,6 +252,7 @@ getname_kernel(const char * filename)
 
 	return result;
 }
+EXPORT_SYMBOL(getname_kernel);
 
 void putname(struct filename *name)
 {
@@ -269,6 +270,7 @@ void putname(struct filename *name)
 	} else
 		__putname(name);
 }
+EXPORT_SYMBOL(putname);
 
 /**
  * check_acl - perform ACL permission checking
@@ -1587,8 +1589,8 @@ static struct dentry *lookup_dcache(const struct qstr *name,
  * when directory is guaranteed to have no in-lookup children
  * at all.
  */
-static struct dentry *__lookup_hash(const struct qstr *name,
-		struct dentry *base, unsigned int flags)
+struct dentry *__lookup_hash(const struct qstr *name, struct dentry *base,
+			     unsigned int flags)
 {
 	struct dentry *dentry = lookup_dcache(name, base, flags);
 	struct dentry *old;
@@ -1612,6 +1614,7 @@ static struct dentry *__lookup_hash(const struct qstr *name,
 	}
 	return dentry;
 }
+EXPORT_SYMBOL(__lookup_hash);
 
 static struct dentry *lookup_fast(struct nameidata *nd,
 				  struct inode **inode,
@@ -2556,16 +2559,17 @@ static int path_parentat(struct nameidata *nd, unsigned flags,
 }
 
 /* Note: this does not consume "name" */
-static int filename_parentat(int dfd, struct filename *name,
-			     unsigned int flags, struct path *parent,
-			     struct qstr *last, int *type)
+static int __filename_parentat(int dfd, struct filename *name,
+			       unsigned int flags, struct path *parent,
+			       struct qstr *last, int *type,
+			       const struct path *root)
 {
 	int retval;
 	struct nameidata nd;
 
 	if (IS_ERR(name))
 		return PTR_ERR(name);
-	set_nameidata(&nd, dfd, name, NULL);
+	set_nameidata(&nd, dfd, name, root);
 	retval = path_parentat(&nd, flags | LOOKUP_RCU, parent);
 	if (unlikely(retval == -ECHILD))
 		retval = path_parentat(&nd, flags, parent);
@@ -2580,6 +2584,13 @@ static int filename_parentat(int dfd, struct filename *name,
 	return retval;
 }
 
+static int filename_parentat(int dfd, struct filename *name,
+			     unsigned int flags, struct path *parent,
+			     struct qstr *last, int *type)
+{
+	return __filename_parentat(dfd, name, flags, parent, last, type, NULL);
+}
+
 /* does lookup, returns the object with parent locked */
 static struct dentry *__kern_path_locked(struct filename *name, struct path *path)
 {
@@ -2623,6 +2634,24 @@ int kern_path(const char *name, unsigned int flags, struct path *path)
 }
 EXPORT_SYMBOL(kern_path);
 
+/**
+ * vfs_path_parent_lookup - lookup a parent path relative to a dentry-vfsmount pair
+ * @filename: filename structure
+ * @flags: lookup flags
+ * @parent: pointer to struct path to fill
+ * @last: last component
+ * @type: type of the last component
+ * @root: pointer to struct path of the base directory
+ */
+int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
+			   struct path *parent, struct qstr *last, int *type,
+			   const struct path *root)
+{
+	return  __filename_parentat(AT_FDCWD, filename, flags, parent, last,
+				    type, root);
+}
+EXPORT_SYMBOL(vfs_path_parent_lookup);
+
 /**
  * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
  * @dentry:  pointer to dentry of the base directory
diff --git a/include/linux/namei.h b/include/linux/namei.h
index e6949d4ba188..f9be815f12b0 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -57,12 +57,17 @@ static inline int user_path_at(int dfd, const char __user *name, unsigned flags,
 	return user_path_at_empty(dfd, name, flags, path, NULL);
 }
 
+struct dentry *__lookup_hash(const struct qstr *name, struct dentry *base,
+			     unsigned int flags);
 extern int kern_path(const char *, unsigned, struct path *);
 
 extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int);
 extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
 extern void done_path_create(struct path *, struct dentry *);
 extern struct dentry *kern_path_locked(const char *, struct path *);
+int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
+			   struct path *parent, struct qstr *last, int *type,
+			   const struct path *root);
 int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *,
 		    unsigned int, struct path *);
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] ksmbd: fix racy issue from using ->d_parent and ->d_name
  2022-04-27  2:32 ` [PATCH 3/3] ksmbd: fix racy issue from using ->d_parent and ->d_name Namjae Jeon
@ 2022-05-15 11:09   ` Namjae Jeon
  2022-05-17 21:21   ` Al Viro
  1 sibling, 0 replies; 7+ messages in thread
From: Namjae Jeon @ 2022-05-15 11:09 UTC (permalink / raw)
  To: Al Viro
  Cc: smfrench, hyc.lee, senozhatsky, Namjae Jeon, linux-cifs, linux-fsdevel

2022-04-27 11:32 GMT+09:00, Namjae Jeon <linkinjeon@kernel.org>:
Hi Al,

Could you please review this patch-set ?

Thanks!
> Al pointed out that ksmbd has racy issue from using ->d_parent and ->d_name
> in ksmbd_vfs_unlink and smb2_vfs_rename(). and use new lock_rename_child()
> to lock stable parent while underlying rename racy.
> Introduce vfs_path_parent_lookup helper to avoid out of share access and
> export vfs functions like the following ones to use
> vfs_path_parent_lookup().
>  - export __lookup_hash().
>  - export getname_kernel() and putname().
>
> vfs_path_parent_lookup() is used for parent lookup of destination file
> using absolute pathname given from FILE_RENAME_INFORMATION request.
>
> Cc: Al Viro <viro@zeniv.linux.org.uk>
> Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
> ---
>  fs/ksmbd/smb2pdu.c    | 105 ++-------------
>  fs/ksmbd/vfs.c        | 302 ++++++++++++++++--------------------------
>  fs/ksmbd/vfs.h        |  10 +-
>  fs/ksmbd/vfs_cache.c  |   5 +-
>  fs/namei.c            |  41 +++++-
>  include/linux/namei.h |   5 +
>  6 files changed, 172 insertions(+), 296 deletions(-)
>
> diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
> index 16c803a9d996..d3e29f60fa2e 100644
> --- a/fs/ksmbd/smb2pdu.c
> +++ b/fs/ksmbd/smb2pdu.c
> @@ -5362,44 +5362,19 @@ int smb2_echo(struct ksmbd_work *work)
>
>  static int smb2_rename(struct ksmbd_work *work,
>  		       struct ksmbd_file *fp,
> -		       struct user_namespace *user_ns,
>  		       struct smb2_file_rename_info *file_info,
>  		       struct nls_table *local_nls)
>  {
>  	struct ksmbd_share_config *share = fp->tcon->share_conf;
> -	char *new_name = NULL, *abs_oldname = NULL, *old_name = NULL;
> -	char *pathname = NULL;
> -	struct path path;
> -	bool file_present = true;
> -	int rc;
> +	char *new_name = NULL;
> +	int rc, flags = 0;
>
>  	ksmbd_debug(SMB, "setting FILE_RENAME_INFO\n");
> -	pathname = kmalloc(PATH_MAX, GFP_KERNEL);
> -	if (!pathname)
> -		return -ENOMEM;
> -
> -	abs_oldname = d_path(&fp->filp->f_path, pathname, PATH_MAX);
> -	if (IS_ERR(abs_oldname)) {
> -		rc = -EINVAL;
> -		goto out;
> -	}
> -	old_name = strrchr(abs_oldname, '/');
> -	if (old_name && old_name[1] != '\0') {
> -		old_name++;
> -	} else {
> -		ksmbd_debug(SMB, "can't get last component in path %s\n",
> -			    abs_oldname);
> -		rc = -ENOENT;
> -		goto out;
> -	}
> -
>  	new_name = smb2_get_name(file_info->FileName,
>  				 le32_to_cpu(file_info->FileNameLength),
>  				 local_nls);
> -	if (IS_ERR(new_name)) {
> -		rc = PTR_ERR(new_name);
> -		goto out;
> -	}
> +	if (IS_ERR(new_name))
> +		return PTR_ERR(new_name);
>
>  	if (strchr(new_name, ':')) {
>  		int s_type;
> @@ -5425,7 +5400,7 @@ static int smb2_rename(struct ksmbd_work *work,
>  		if (rc)
>  			goto out;
>
> -		rc = ksmbd_vfs_setxattr(user_ns,
> +		rc = ksmbd_vfs_setxattr(file_mnt_user_ns(fp->filp),
>  					fp->filp->f_path.dentry,
>  					xattr_stream_name,
>  					NULL, 0, 0);
> @@ -5440,47 +5415,18 @@ static int smb2_rename(struct ksmbd_work *work,
>  	}
>
>  	ksmbd_debug(SMB, "new name %s\n", new_name);
> -	rc = ksmbd_vfs_kern_path(work, new_name, LOOKUP_NO_SYMLINKS, &path, 1);
> -	if (rc) {
> -		if (rc != -ENOENT)
> -			goto out;
> -		file_present = false;
> -	} else {
> -		path_put(&path);
> -	}
> -
>  	if (ksmbd_share_veto_filename(share, new_name)) {
>  		rc = -ENOENT;
>  		ksmbd_debug(SMB, "Can't rename vetoed file: %s\n", new_name);
>  		goto out;
>  	}
>
> -	if (file_info->ReplaceIfExists) {
> -		if (file_present) {
> -			rc = ksmbd_vfs_remove_file(work, new_name);
> -			if (rc) {
> -				if (rc != -ENOTEMPTY)
> -					rc = -EINVAL;
> -				ksmbd_debug(SMB, "cannot delete %s, rc %d\n",
> -					    new_name, rc);
> -				goto out;
> -			}
> -		}
> -	} else {
> -		if (file_present &&
> -		    strncmp(old_name, path.dentry->d_name.name, strlen(old_name))) {
> -			rc = -EEXIST;
> -			ksmbd_debug(SMB,
> -				    "cannot rename already existing file\n");
> -			goto out;
> -		}
> -	}
> +	if (!file_info->ReplaceIfExists)
> +		flags = RENAME_NOREPLACE;
>
> -	rc = ksmbd_vfs_fp_rename(work, fp, new_name);
> +	rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags);
>  out:
> -	kfree(pathname);
> -	if (!IS_ERR(new_name))
> -		kfree(new_name);
> +	kfree(new_name);
>  	return rc;
>  }
>
> @@ -5728,12 +5674,6 @@ static int set_rename_info(struct ksmbd_work *work,
> struct ksmbd_file *fp,
>  			   struct smb2_file_rename_info *rename_info,
>  			   unsigned int buf_len)
>  {
> -	struct user_namespace *user_ns;
> -	struct ksmbd_file *parent_fp;
> -	struct dentry *parent;
> -	struct dentry *dentry = fp->filp->f_path.dentry;
> -	int ret;
> -
>  	if (!(fp->daccess & FILE_DELETE_LE)) {
>  		pr_err("no right to delete : 0x%x\n", fp->daccess);
>  		return -EACCES;
> @@ -5743,32 +5683,7 @@ static int set_rename_info(struct ksmbd_work *work,
> struct ksmbd_file *fp,
>  			le32_to_cpu(rename_info->FileNameLength))
>  		return -EINVAL;
>
> -	user_ns = file_mnt_user_ns(fp->filp);
> -	if (ksmbd_stream_fd(fp))
> -		goto next;
> -
> -	parent = dget_parent(dentry);
> -	ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
> -	if (ret) {
> -		dput(parent);
> -		return ret;
> -	}
> -
> -	parent_fp = ksmbd_lookup_fd_inode(d_inode(parent));
> -	inode_unlock(d_inode(parent));
> -	dput(parent);
> -
> -	if (parent_fp) {
> -		if (parent_fp->daccess & FILE_DELETE_LE) {
> -			pr_err("parent dir is opened with delete access\n");
> -			ksmbd_fd_put(work, parent_fp);
> -			return -ESHARE;
> -		}
> -		ksmbd_fd_put(work, parent_fp);
> -	}
> -next:
> -	return smb2_rename(work, fp, user_ns, rename_info,
> -			   work->sess->conn->local_nls);
> +	return smb2_rename(work, fp, rename_info, work->sess->conn->local_nls);
>  }
>
>  static int set_file_disposition_info(struct ksmbd_file *fp,
> diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
> index f79dfcebe480..93d3625bf6d1 100644
> --- a/fs/ksmbd/vfs.c
> +++ b/fs/ksmbd/vfs.c
> @@ -17,6 +17,7 @@
>  #include <linux/vmalloc.h>
>  #include <linux/sched/xacct.h>
>  #include <linux/crc32c.h>
> +#include <linux/namei.h>
>
>  #include "glob.h"
>  #include "oplock.h"
> @@ -34,19 +35,6 @@
>  #include "mgmt/user_session.h"
>  #include "mgmt/user_config.h"
>
> -static char *extract_last_component(char *path)
> -{
> -	char *p = strrchr(path, '/');
> -
> -	if (p && p[1] != '\0') {
> -		*p = '\0';
> -		p++;
> -	} else {
> -		p = NULL;
> -	}
> -	return p;
> -}
> -
>  static void ksmbd_vfs_inherit_owner(struct ksmbd_work *work,
>  				    struct inode *parent_inode,
>  				    struct inode *inode)
> @@ -60,38 +48,20 @@ static void ksmbd_vfs_inherit_owner(struct ksmbd_work
> *work,
>
>  /**
>   * ksmbd_vfs_lock_parent() - lock parent dentry if it is stable
> - *
> - * the parent dentry got by dget_parent or @parent could be
> - * unstable, we try to lock a parent inode and lookup the
> - * child dentry again.
> - *
> - * the reference count of @parent isn't incremented.
>   */
> -int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry
> *parent,
> -			  struct dentry *child)
> +struct dentry *ksmbd_vfs_lock_parent(struct dentry *child)
>  {
> -	struct dentry *dentry;
> -	int ret = 0;
> +	struct dentry *parent;
>
> +	parent = dget(child->d_parent);
>  	inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
> -	dentry = lookup_one(user_ns, child->d_name.name, parent,
> -			    child->d_name.len);
> -	if (IS_ERR(dentry)) {
> -		ret = PTR_ERR(dentry);
> -		goto out_err;
> -	}
> -
> -	if (dentry != child) {
> -		ret = -ESTALE;
> -		dput(dentry);
> -		goto out_err;
> +	if (child->d_parent != parent) {
> +		dput(parent);
> +		inode_unlock(d_inode(parent));
> +		return ERR_PTR(-EINVAL);
>  	}
>
> -	dput(dentry);
> -	return 0;
> -out_err:
> -	inode_unlock(d_inode(parent));
> -	return ret;
> +	return parent;
>  }
>
>  int ksmbd_vfs_may_delete(struct user_namespace *user_ns,
> @@ -100,12 +70,9 @@ int ksmbd_vfs_may_delete(struct user_namespace
> *user_ns,
>  	struct dentry *parent;
>  	int ret;
>
> -	parent = dget_parent(dentry);
> -	ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
> -	if (ret) {
> -		dput(parent);
> -		return ret;
> -	}
> +	parent = ksmbd_vfs_lock_parent(dentry);
> +	if (IS_ERR(parent))
> +		return PTR_ERR(parent);
>
>  	ret = inode_permission(user_ns, d_inode(parent),
>  			       MAY_EXEC | MAY_WRITE);
> @@ -135,12 +102,9 @@ int ksmbd_vfs_query_maximal_access(struct
> user_namespace *user_ns,
>  	if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_EXEC))
>  		*daccess |= FILE_EXECUTE_LE;
>
> -	parent = dget_parent(dentry);
> -	ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
> -	if (ret) {
> -		dput(parent);
> -		return ret;
> -	}
> +	parent = ksmbd_vfs_lock_parent(dentry);
> +	if (IS_ERR(parent))
> +		return PTR_ERR(parent);
>
>  	if (!inode_permission(user_ns, d_inode(parent), MAY_EXEC | MAY_WRITE))
>  		*daccess |= FILE_DELETE_LE;
> @@ -599,14 +563,11 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work,
> char *name)
>  		return err;
>  	}
>
> -	user_ns = mnt_user_ns(path.mnt);
> -	parent = dget_parent(path.dentry);
> -	err = ksmbd_vfs_lock_parent(user_ns, parent, path.dentry);
> -	if (err) {
> -		dput(parent);
> +	parent = ksmbd_vfs_lock_parent(path.dentry);
> +	if (IS_ERR(parent)) {
>  		path_put(&path);
>  		ksmbd_revert_fsids(work);
> -		return err;
> +		return PTR_ERR(parent);
>  	}
>
>  	if (!d_inode(path.dentry)->i_nlink) {
> @@ -614,6 +575,7 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char
> *name)
>  		goto out_err;
>  	}
>
> +	user_ns = mnt_user_ns(path.mnt);
>  	if (S_ISDIR(d_inode(path.dentry)->i_mode)) {
>  		err = vfs_rmdir(user_ns, d_inode(parent), path.dentry);
>  		if (err && err != -ENOTEMPTY)
> @@ -688,149 +650,114 @@ int ksmbd_vfs_link(struct ksmbd_work *work, const
> char *oldname,
>  	return err;
>  }
>
> -static int ksmbd_validate_entry_in_use(struct dentry *src_dent)
> +int ksmbd_vfs_rename(struct ksmbd_work *work, struct path *old_path,
> +		     char *newname, int flags)
>  {
> -	struct dentry *dst_dent;
> -
> -	spin_lock(&src_dent->d_lock);
> -	list_for_each_entry(dst_dent, &src_dent->d_subdirs, d_child) {
> -		struct ksmbd_file *child_fp;
> +	struct dentry *old_parent, *new_dentry, *trap;
> +	struct dentry *old_child = old_path->dentry;
> +	struct path new_path;
> +	struct qstr new_last;
> +	struct renamedata rd;
> +	struct filename *to;
> +	struct ksmbd_share_config *share_conf = work->tcon->share_conf;
> +	struct ksmbd_file *parent_fp;
> +	int new_type;
> +	int err, lookup_flags = LOOKUP_NO_SYMLINKS;
>
> -		if (d_really_is_negative(dst_dent))
> -			continue;
> +	if (ksmbd_override_fsids(work))
> +		return -ENOMEM;
>
> -		child_fp = ksmbd_lookup_fd_inode(d_inode(dst_dent));
> -		if (child_fp) {
> -			spin_unlock(&src_dent->d_lock);
> -			ksmbd_debug(VFS, "Forbid rename, sub file/dir is in use\n");
> -			return -EACCES;
> -		}
> +	to = getname_kernel(newname);
> +	if (IS_ERR(to)) {
> +		err = PTR_ERR(to);
> +		goto revert_fsids;
>  	}
> -	spin_unlock(&src_dent->d_lock);
>
> -	return 0;
> -}
> +retry:
> +	err = vfs_path_parent_lookup(to, lookup_flags | LOOKUP_BENEATH,
> +				     &new_path, &new_last, &new_type,
> +				     &share_conf->vfs_path);
> +	if (err)
> +		goto out1;
>
> -static int __ksmbd_vfs_rename(struct ksmbd_work *work,
> -			      struct user_namespace *src_user_ns,
> -			      struct dentry *src_dent_parent,
> -			      struct dentry *src_dent,
> -			      struct user_namespace *dst_user_ns,
> -			      struct dentry *dst_dent_parent,
> -			      struct dentry *trap_dent,
> -			      char *dst_name)
> -{
> -	struct dentry *dst_dent;
> -	int err;
> +	if (old_path->mnt != new_path.mnt) {
> +		err = -EXDEV;
> +		goto out2;
> +	}
>
> -	if (!work->tcon->posix_extensions) {
> -		err = ksmbd_validate_entry_in_use(src_dent);
> -		if (err)
> -			return err;
> +	trap = lock_rename_child(old_child, new_path.dentry);
> +
> +	old_parent = dget(old_child->d_parent);
> +	if (d_unhashed(old_child)) {
> +		err = -EINVAL;
> +		goto out3;
>  	}
>
> -	if (d_really_is_negative(src_dent_parent))
> -		return -ENOENT;
> -	if (d_really_is_negative(dst_dent_parent))
> -		return -ENOENT;
> -	if (d_really_is_negative(src_dent))
> -		return -ENOENT;
> -	if (src_dent == trap_dent)
> -		return -EINVAL;
> +	parent_fp = ksmbd_lookup_fd_inode(d_inode(old_child->d_parent));
> +	if (parent_fp) {
> +		if (parent_fp->daccess & FILE_DELETE_LE) {
> +			pr_err("parent dir is opened with delete access\n");
> +			err = -ESHARE;
> +			ksmbd_fd_put(work, parent_fp);
> +			goto out3;
> +		}
> +		ksmbd_fd_put(work, parent_fp);
> +	}
>
> -	if (ksmbd_override_fsids(work))
> -		return -ENOMEM;
> +	new_dentry = __lookup_hash(&new_last, new_path.dentry,
> +				   lookup_flags | LOOKUP_RENAME_TARGET);
> +	if (IS_ERR(new_dentry)) {
> +		err = PTR_ERR(new_dentry);
> +		goto out3;
> +	}
>
> -	dst_dent = lookup_one(dst_user_ns, dst_name, dst_dent_parent,
> -			      strlen(dst_name));
> -	err = PTR_ERR(dst_dent);
> -	if (IS_ERR(dst_dent)) {
> -		pr_err("lookup failed %s [%d]\n", dst_name, err);
> -		goto out;
> +	if (d_is_symlink(new_dentry)) {
> +		err = -EACCES;
> +		goto out4;
>  	}
>
> -	err = -ENOTEMPTY;
> -	if (dst_dent != trap_dent && !d_really_is_positive(dst_dent)) {
> -		struct renamedata rd = {
> -			.old_mnt_userns	= src_user_ns,
> -			.old_dir	= d_inode(src_dent_parent),
> -			.old_dentry	= src_dent,
> -			.new_mnt_userns	= dst_user_ns,
> -			.new_dir	= d_inode(dst_dent_parent),
> -			.new_dentry	= dst_dent,
> -		};
> -		err = vfs_rename(&rd);
> +	if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) {
> +		err = -EEXIST;
> +		goto out4;
>  	}
> -	if (err)
> -		pr_err("vfs_rename failed err %d\n", err);
> -	if (dst_dent)
> -		dput(dst_dent);
> -out:
> -	ksmbd_revert_fsids(work);
> -	return err;
> -}
>
> -int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
> -			char *newname)
> -{
> -	struct user_namespace *user_ns;
> -	struct path dst_path;
> -	struct dentry *src_dent_parent, *dst_dent_parent;
> -	struct dentry *src_dent, *trap_dent, *src_child;
> -	char *dst_name;
> -	int err;
> +	if (old_child == trap) {
> +		err = -EINVAL;
> +		goto out4;
> +	}
>
> -	dst_name = extract_last_component(newname);
> -	if (!dst_name) {
> -		dst_name = newname;
> -		newname = "";
> +	if (new_dentry == trap) {
> +		err = -ENOTEMPTY;
> +		goto out4;
>  	}
>
> -	src_dent_parent = dget_parent(fp->filp->f_path.dentry);
> -	src_dent = fp->filp->f_path.dentry;
> +	rd.old_mnt_userns	= mnt_user_ns(old_path->mnt),
> +	rd.old_dir		= d_inode(old_parent),
> +	rd.old_dentry		= old_child,
> +	rd.new_mnt_userns	= mnt_user_ns(new_path.mnt),
> +	rd.new_dir		= new_path.dentry->d_inode,
> +	rd.new_dentry		= new_dentry,
> +	rd.flags		= flags,
> +	err = vfs_rename(&rd);
> +	if (err)
> +		ksmbd_debug(VFS, "vfs_rename failed err %d\n", err);
>
> -	err = ksmbd_vfs_kern_path(work, newname,
> -				  LOOKUP_NO_SYMLINKS | LOOKUP_DIRECTORY,
> -				  &dst_path, false);
> -	if (err) {
> -		ksmbd_debug(VFS, "Cannot get path for %s [%d]\n", newname, err);
> -		goto out;
> +out4:
> +	dput(new_dentry);
> +out3:
> +	dput(old_parent);
> +	unlock_rename(old_parent, new_path.dentry);
> +out2:
> +	path_put(&new_path);
> +
> +	if (retry_estale(err, lookup_flags)) {
> +		lookup_flags |= LOOKUP_REVAL;
> +		goto retry;
>  	}
> -	dst_dent_parent = dst_path.dentry;
> -
> -	trap_dent = lock_rename(src_dent_parent, dst_dent_parent);
> -	dget(src_dent);
> -	dget(dst_dent_parent);
> -	user_ns = file_mnt_user_ns(fp->filp);
> -	src_child = lookup_one(user_ns, src_dent->d_name.name, src_dent_parent,
> -			       src_dent->d_name.len);
> -	if (IS_ERR(src_child)) {
> -		err = PTR_ERR(src_child);
> -		goto out_lock;
> -	}
> -
> -	if (src_child != src_dent) {
> -		err = -ESTALE;
> -		dput(src_child);
> -		goto out_lock;
> -	}
> -	dput(src_child);
> -
> -	err = __ksmbd_vfs_rename(work,
> -				 user_ns,
> -				 src_dent_parent,
> -				 src_dent,
> -				 mnt_user_ns(dst_path.mnt),
> -				 dst_dent_parent,
> -				 trap_dent,
> -				 dst_name);
> -out_lock:
> -	dput(src_dent);
> -	dput(dst_dent_parent);
> -	unlock_rename(src_dent_parent, dst_dent_parent);
> -	path_put(&dst_path);
> -out:
> -	dput(src_dent_parent);
> +out1:
> +	putname(to);
> +revert_fsids:
> +	ksmbd_revert_fsids(work);
>  	return err;
>  }
>
> @@ -1079,14 +1006,17 @@ int ksmbd_vfs_remove_xattr(struct user_namespace
> *user_ns,
>  	return vfs_removexattr(user_ns, dentry, attr_name);
>  }
>
> -int ksmbd_vfs_unlink(struct user_namespace *user_ns,
> -		     struct dentry *dir, struct dentry *dentry)
> +int ksmbd_vfs_unlink(struct file *filp)
>  {
>  	int err = 0;
> +	struct dentry *dir, *dentry = filp->f_path.dentry;
> +	struct user_namespace *user_ns = file_mnt_user_ns(filp);
>
> -	err = ksmbd_vfs_lock_parent(user_ns, dir, dentry);
> -	if (err)
> -		return err;
> +	dir = ksmbd_vfs_lock_parent(dentry);
> +	if (IS_ERR(dir)) {
> +		err = PTR_ERR(dir);
> +		goto out;
> +	}
>  	dget(dentry);
>
>  	if (S_ISDIR(d_inode(dentry)->i_mode))
> @@ -1098,6 +1028,8 @@ int ksmbd_vfs_unlink(struct user_namespace *user_ns,
>  	inode_unlock(d_inode(dir));
>  	if (err)
>  		ksmbd_debug(VFS, "failed to delete, err %d\n", err);
> +out:
> +	dput(dir);
>
>  	return err;
>  }
> diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
> index 8c37aaf936ab..f79e73b6f01b 100644
> --- a/fs/ksmbd/vfs.h
> +++ b/fs/ksmbd/vfs.h
> @@ -69,8 +69,7 @@ struct ksmbd_kstat {
>  	__le32			file_attributes;
>  };
>
> -int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry
> *parent,
> -			  struct dentry *child);
> +struct dentry *ksmbd_vfs_lock_parent(struct dentry *child);
>  int ksmbd_vfs_may_delete(struct user_namespace *user_ns, struct dentry
> *dentry);
>  int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
>  				   struct dentry *dentry, __le32 *daccess);
> @@ -86,8 +85,8 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char
> *name);
>  int ksmbd_vfs_link(struct ksmbd_work *work,
>  		   const char *oldname, const char *newname);
>  int ksmbd_vfs_getattr(struct path *path, struct kstat *stat);
> -int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
> -			char *newname);
> +int ksmbd_vfs_rename(struct ksmbd_work *work, struct path *old_path,
> +		     char *newname, int flags);
>  int ksmbd_vfs_truncate(struct ksmbd_work *work,
>  		       struct ksmbd_file *fp, loff_t size);
>  struct srv_copychunk;
> @@ -129,8 +128,7 @@ struct file_allocated_range_buffer;
>  int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t
> length,
>  			 struct file_allocated_range_buffer *ranges,
>  			 unsigned int in_count, unsigned int *out_count);
> -int ksmbd_vfs_unlink(struct user_namespace *user_ns,
> -		     struct dentry *dir, struct dentry *dentry);
> +int ksmbd_vfs_unlink(struct file *filp);
>  void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat);
>  int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
>  				struct user_namespace *user_ns,
> diff --git a/fs/ksmbd/vfs_cache.c b/fs/ksmbd/vfs_cache.c
> index c4d59d2735f0..df600eb04552 100644
> --- a/fs/ksmbd/vfs_cache.c
> +++ b/fs/ksmbd/vfs_cache.c
> @@ -243,7 +243,6 @@ void ksmbd_release_inode_hash(void)
>
>  static void __ksmbd_inode_close(struct ksmbd_file *fp)
>  {
> -	struct dentry *dir, *dentry;
>  	struct ksmbd_inode *ci = fp->f_ci;
>  	int err;
>  	struct file *filp;
> @@ -262,11 +261,9 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp)
>  	if (atomic_dec_and_test(&ci->m_count)) {
>  		write_lock(&ci->m_lock);
>  		if (ci->m_flags & (S_DEL_ON_CLS | S_DEL_PENDING)) {
> -			dentry = filp->f_path.dentry;
> -			dir = dentry->d_parent;
>  			ci->m_flags &= ~(S_DEL_ON_CLS | S_DEL_PENDING);
>  			write_unlock(&ci->m_lock);
> -			ksmbd_vfs_unlink(file_mnt_user_ns(filp), dir, dentry);
> +			ksmbd_vfs_unlink(filp);
>  			write_lock(&ci->m_lock);
>  		}
>  		write_unlock(&ci->m_lock);
> diff --git a/fs/namei.c b/fs/namei.c
> index 516b8d147744..1c0de16f9cb9 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -252,6 +252,7 @@ getname_kernel(const char * filename)
>
>  	return result;
>  }
> +EXPORT_SYMBOL(getname_kernel);
>
>  void putname(struct filename *name)
>  {
> @@ -269,6 +270,7 @@ void putname(struct filename *name)
>  	} else
>  		__putname(name);
>  }
> +EXPORT_SYMBOL(putname);
>
>  /**
>   * check_acl - perform ACL permission checking
> @@ -1587,8 +1589,8 @@ static struct dentry *lookup_dcache(const struct qstr
> *name,
>   * when directory is guaranteed to have no in-lookup children
>   * at all.
>   */
> -static struct dentry *__lookup_hash(const struct qstr *name,
> -		struct dentry *base, unsigned int flags)
> +struct dentry *__lookup_hash(const struct qstr *name, struct dentry *base,
> +			     unsigned int flags)
>  {
>  	struct dentry *dentry = lookup_dcache(name, base, flags);
>  	struct dentry *old;
> @@ -1612,6 +1614,7 @@ static struct dentry *__lookup_hash(const struct qstr
> *name,
>  	}
>  	return dentry;
>  }
> +EXPORT_SYMBOL(__lookup_hash);
>
>  static struct dentry *lookup_fast(struct nameidata *nd,
>  				  struct inode **inode,
> @@ -2556,16 +2559,17 @@ static int path_parentat(struct nameidata *nd,
> unsigned flags,
>  }
>
>  /* Note: this does not consume "name" */
> -static int filename_parentat(int dfd, struct filename *name,
> -			     unsigned int flags, struct path *parent,
> -			     struct qstr *last, int *type)
> +static int __filename_parentat(int dfd, struct filename *name,
> +			       unsigned int flags, struct path *parent,
> +			       struct qstr *last, int *type,
> +			       const struct path *root)
>  {
>  	int retval;
>  	struct nameidata nd;
>
>  	if (IS_ERR(name))
>  		return PTR_ERR(name);
> -	set_nameidata(&nd, dfd, name, NULL);
> +	set_nameidata(&nd, dfd, name, root);
>  	retval = path_parentat(&nd, flags | LOOKUP_RCU, parent);
>  	if (unlikely(retval == -ECHILD))
>  		retval = path_parentat(&nd, flags, parent);
> @@ -2580,6 +2584,13 @@ static int filename_parentat(int dfd, struct filename
> *name,
>  	return retval;
>  }
>
> +static int filename_parentat(int dfd, struct filename *name,
> +			     unsigned int flags, struct path *parent,
> +			     struct qstr *last, int *type)
> +{
> +	return __filename_parentat(dfd, name, flags, parent, last, type, NULL);
> +}
> +
>  /* does lookup, returns the object with parent locked */
>  static struct dentry *__kern_path_locked(struct filename *name, struct path
> *path)
>  {
> @@ -2623,6 +2634,24 @@ int kern_path(const char *name, unsigned int flags,
> struct path *path)
>  }
>  EXPORT_SYMBOL(kern_path);
>
> +/**
> + * vfs_path_parent_lookup - lookup a parent path relative to a
> dentry-vfsmount pair
> + * @filename: filename structure
> + * @flags: lookup flags
> + * @parent: pointer to struct path to fill
> + * @last: last component
> + * @type: type of the last component
> + * @root: pointer to struct path of the base directory
> + */
> +int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
> +			   struct path *parent, struct qstr *last, int *type,
> +			   const struct path *root)
> +{
> +	return  __filename_parentat(AT_FDCWD, filename, flags, parent, last,
> +				    type, root);
> +}
> +EXPORT_SYMBOL(vfs_path_parent_lookup);
> +
>  /**
>   * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
>   * @dentry:  pointer to dentry of the base directory
> diff --git a/include/linux/namei.h b/include/linux/namei.h
> index e6949d4ba188..f9be815f12b0 100644
> --- a/include/linux/namei.h
> +++ b/include/linux/namei.h
> @@ -57,12 +57,17 @@ static inline int user_path_at(int dfd, const char
> __user *name, unsigned flags,
>  	return user_path_at_empty(dfd, name, flags, path, NULL);
>  }
>
> +struct dentry *__lookup_hash(const struct qstr *name, struct dentry *base,
> +			     unsigned int flags);
>  extern int kern_path(const char *, unsigned, struct path *);
>
>  extern struct dentry *kern_path_create(int, const char *, struct path *,
> unsigned int);
>  extern struct dentry *user_path_create(int, const char __user *, struct
> path *, unsigned int);
>  extern void done_path_create(struct path *, struct dentry *);
>  extern struct dentry *kern_path_locked(const char *, struct path *);
> +int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
> +			   struct path *parent, struct qstr *last, int *type,
> +			   const struct path *root);
>  int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *,
>  		    unsigned int, struct path *);
>
> --
> 2.25.1
>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] ksmbd: fix racy issue from using ->d_parent and ->d_name
  2022-04-27  2:32 ` [PATCH 3/3] ksmbd: fix racy issue from using ->d_parent and ->d_name Namjae Jeon
  2022-05-15 11:09   ` Namjae Jeon
@ 2022-05-17 21:21   ` Al Viro
  2022-05-18 14:27     ` Christian Brauner
  2022-05-19  0:39     ` Namjae Jeon
  1 sibling, 2 replies; 7+ messages in thread
From: Al Viro @ 2022-05-17 21:21 UTC (permalink / raw)
  To: Namjae Jeon; +Cc: linux-cifs, linux-fsdevel, smfrench, hyc.lee, senozhatsky

On Wed, Apr 27, 2022 at 11:32:45AM +0900, Namjae Jeon wrote:
> Al pointed out that ksmbd has racy issue from using ->d_parent and ->d_name
> in ksmbd_vfs_unlink and smb2_vfs_rename(). and use new lock_rename_child()
> to lock stable parent while underlying rename racy.
> Introduce vfs_path_parent_lookup helper to avoid out of share access and
> export vfs functions like the following ones to use
> vfs_path_parent_lookup().
>  - export __lookup_hash().
>  - export getname_kernel() and putname().
> 
> vfs_path_parent_lookup() is used for parent lookup of destination file
> using absolute pathname given from FILE_RENAME_INFORMATION request.

First of all, this is seriously broken:

> -int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry *parent,
> -			  struct dentry *child)
> +struct dentry *ksmbd_vfs_lock_parent(struct dentry *child)
>  {
> -	struct dentry *dentry;
> -	int ret = 0;
> +	struct dentry *parent;
>  
> +	parent = dget(child->d_parent);
>  	inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);

Do that in parallel with host rename() and you are risking this:

you: fetch child->d_parent
	get preempted away
another thread: move child elsewhere
another thread: drop (the last) reference to old parent
	memory pressure evicts that dentry and reuses memory
you: regain the timeslice and bump what you think is parent->d_count.
	In reality, it's 4 bytes in completely different data structure.
	At that point you already have a memory corruptor.  Worse,
	there's a decent chance that subsequent code will revert the
	corruption, so it would be hell to debug - you need a race
	to reproduce the thing in the first place *and* you need
	something else to notice the temporary memory corruption.

you: fetch what you think is ->d_inode of that dentry.  It actually
	isn't anything of that sort.
you: grab rwsem at hell knows what address (might or might not point
	to an rwsem).  Here's another chance to get something
	reproducible - e.g. if what you thought was ->d_inode actually
	points to unmapped memory, you'll get an oops here.  Won't
	be consistent, though.

> +	if (child->d_parent != parent) {
you:	->d_parent doesn't point there anymore

> +		dput(parent);

you:	decrement those 4 bytes in whatever object it is; if you are
	lucky, it won't hit zero and nobody had noticed the temporary
	increment.  If you are not, well...

> +		inode_unlock(d_inode(parent));

you:	fetch ->d_inode of parent (mind you, it's a bug in its own right -
	even if parent hadn't gotten freed before your dget(), after dput()
	above it's fair game for getting freed; placing that dput()
	before unlocking d_inode() is wrong).  Assuming you've got
	the same pointer as the first time around, you proceed to
	drop rwsem at the same address where you've grabbed it.

IOW, you really don't want that in the tree in this form.

It *might* be partially recoverable if you replace the first dget() with
dget_parent() and reorder dput() and inode_unlock() in failure case, but...
some of the callers of that thing are also rather dubious.

Look: you have smb2_open() calling ksmbd_vfs_may_delete(), which calls
that thing.  Downstream of this:
	if (!file_present) {
	...
	} else if (!already_permitted) {

If the parent is *NOT* already locked by that point, just how much is
your 'file_present' worth?  And if it is, you'd obviously deadlock
right there and then...

I'm not sure I like what you've done with added exports - e.g.
__lookup_hash had been OK as a name of static function, but exporting
it is asking for clashes.  And honestly, what would you say when running
into a name like that?  OK, it sounds like it's a (probably low-level)
lookup in some hash table.  _Maybe_ it would've been fine if we had one
and only implementation of hash tables in the entire tree and that
had been a part of it, but it's nothing of that sort.  And "hash" in
the name is not about doing a hash lookup as opposed to some other
work (it *does* handle hash misses, allocating dentry, asking filesystem
to do real on-disk lookup, etc.) - it's actually about "hash function
of the name is already calculated".  My fault, that - predecessor of
that thing had been called lookup_one(); it took a string, calculated
its length and computed hash, then proceeded to do lookups.  The latter
part could be reused in handling of rmdir et.al., where we already had
the component length and hash precalculated, so the tail of lookup_one()
had been carved out into a separate helper.  Circa 2.3.99...

Anyway, the name is _not_ fit for an export; I'm not sure what to call
it - lookup_one_qstr(), perhaps?  Additional fun is due to the fact
that these days it is slightly different from the lookup_one() et.al.
Those can be called with directory held shared; that allows parallel
lookups, but it's not free of cost - if we run into a cache miss and need
to allocate a new dentry and talk to filesystem, we have to recheck the
hash table after allocation.  __lookup_hash() is called only with parent
held exclusive and it can skip that fun - hash miss is going to remain
a miss; nobody else will be able to insert stuff into dcache in that
directory until we unlock it.

What I'm worried about is that renaming it to lookup_one_qstr() will
be an invitation for "oh, we happen to have hash/len already known by the
time of that lookup_one() call; let's just convert it to lookup_one_qsrt()"
and if that happens in a place where the parent is held only shared, we'll
be in trouble.  OTOH, lookup_one_qstr_excl() sounds like an invitation to
do something painful to whoever's responsible for such name...

Suggestions, anyone?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] ksmbd: fix racy issue from using ->d_parent and ->d_name
  2022-05-17 21:21   ` Al Viro
@ 2022-05-18 14:27     ` Christian Brauner
  2022-05-19  0:39     ` Namjae Jeon
  1 sibling, 0 replies; 7+ messages in thread
From: Christian Brauner @ 2022-05-18 14:27 UTC (permalink / raw)
  To: Al Viro
  Cc: Namjae Jeon, linux-cifs, linux-fsdevel, smfrench, hyc.lee, senozhatsky

On Tue, May 17, 2022 at 09:21:14PM +0000, Al Viro wrote:
> On Wed, Apr 27, 2022 at 11:32:45AM +0900, Namjae Jeon wrote:
> > Al pointed out that ksmbd has racy issue from using ->d_parent and ->d_name
> > in ksmbd_vfs_unlink and smb2_vfs_rename(). and use new lock_rename_child()
> > to lock stable parent while underlying rename racy.
> > Introduce vfs_path_parent_lookup helper to avoid out of share access and
> > export vfs functions like the following ones to use
> > vfs_path_parent_lookup().
> >  - export __lookup_hash().
> >  - export getname_kernel() and putname().
> > 
> > vfs_path_parent_lookup() is used for parent lookup of destination file
> > using absolute pathname given from FILE_RENAME_INFORMATION request.
> 
> First of all, this is seriously broken:
> 
> > -int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry *parent,
> > -			  struct dentry *child)
> > +struct dentry *ksmbd_vfs_lock_parent(struct dentry *child)
> >  {
> > -	struct dentry *dentry;
> > -	int ret = 0;
> > +	struct dentry *parent;
> >  
> > +	parent = dget(child->d_parent);
> >  	inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
> 
> Do that in parallel with host rename() and you are risking this:
> 
> you: fetch child->d_parent
> 	get preempted away
> another thread: move child elsewhere
> another thread: drop (the last) reference to old parent
> 	memory pressure evicts that dentry and reuses memory
> you: regain the timeslice and bump what you think is parent->d_count.
> 	In reality, it's 4 bytes in completely different data structure.
> 	At that point you already have a memory corruptor.  Worse,
> 	there's a decent chance that subsequent code will revert the
> 	corruption, so it would be hell to debug - you need a race
> 	to reproduce the thing in the first place *and* you need
> 	something else to notice the temporary memory corruption.
> 
> you: fetch what you think is ->d_inode of that dentry.  It actually
> 	isn't anything of that sort.
> you: grab rwsem at hell knows what address (might or might not point
> 	to an rwsem).  Here's another chance to get something
> 	reproducible - e.g. if what you thought was ->d_inode actually
> 	points to unmapped memory, you'll get an oops here.  Won't
> 	be consistent, though.
> 
> > +	if (child->d_parent != parent) {
> you:	->d_parent doesn't point there anymore
> 
> > +		dput(parent);
> 
> you:	decrement those 4 bytes in whatever object it is; if you are
> 	lucky, it won't hit zero and nobody had noticed the temporary
> 	increment.  If you are not, well...
> 
> > +		inode_unlock(d_inode(parent));
> 
> you:	fetch ->d_inode of parent (mind you, it's a bug in its own right -
> 	even if parent hadn't gotten freed before your dget(), after dput()
> 	above it's fair game for getting freed; placing that dput()
> 	before unlocking d_inode() is wrong).  Assuming you've got
> 	the same pointer as the first time around, you proceed to
> 	drop rwsem at the same address where you've grabbed it.
> 
> IOW, you really don't want that in the tree in this form.
> 
> It *might* be partially recoverable if you replace the first dget() with
> dget_parent() and reorder dput() and inode_unlock() in failure case, but...
> some of the callers of that thing are also rather dubious.
> 
> Look: you have smb2_open() calling ksmbd_vfs_may_delete(), which calls
> that thing.  Downstream of this:
> 	if (!file_present) {
> 	...
> 	} else if (!already_permitted) {
> 
> If the parent is *NOT* already locked by that point, just how much is
> your 'file_present' worth?  And if it is, you'd obviously deadlock
> right there and then...
> 
> I'm not sure I like what you've done with added exports - e.g.
> __lookup_hash had been OK as a name of static function, but exporting
> it is asking for clashes.  And honestly, what would you say when running
> into a name like that?  OK, it sounds like it's a (probably low-level)
> lookup in some hash table.  _Maybe_ it would've been fine if we had one
> and only implementation of hash tables in the entire tree and that
> had been a part of it, but it's nothing of that sort.  And "hash" in
> the name is not about doing a hash lookup as opposed to some other
> work (it *does* handle hash misses, allocating dentry, asking filesystem
> to do real on-disk lookup, etc.) - it's actually about "hash function
> of the name is already calculated".  My fault, that - predecessor of
> that thing had been called lookup_one(); it took a string, calculated
> its length and computed hash, then proceeded to do lookups.  The latter
> part could be reused in handling of rmdir et.al., where we already had
> the component length and hash precalculated, so the tail of lookup_one()
> had been carved out into a separate helper.  Circa 2.3.99...
> 
> Anyway, the name is _not_ fit for an export; I'm not sure what to call
> it - lookup_one_qstr(), perhaps?  Additional fun is due to the fact
> that these days it is slightly different from the lookup_one() et.al.
> Those can be called with directory held shared; that allows parallel
> lookups, but it's not free of cost - if we run into a cache miss and need
> to allocate a new dentry and talk to filesystem, we have to recheck the
> hash table after allocation.  __lookup_hash() is called only with parent
> held exclusive and it can skip that fun - hash miss is going to remain
> a miss; nobody else will be able to insert stuff into dcache in that
> directory until we unlock it.
> 
> What I'm worried about is that renaming it to lookup_one_qstr() will
> be an invitation for "oh, we happen to have hash/len already known by the
> time of that lookup_one() call; let's just convert it to lookup_one_qsrt()"
> and if that happens in a place where the parent is held only shared, we'll
> be in trouble.  OTOH, lookup_one_qstr_excl() sounds like an invitation to
> do something painful to whoever's responsible for such name...
> 
> Suggestions, anyone?

Plus, __lookup_hash() doesn't do permission checking so naming it
lookup_one_qstr() seems problematic from that perspective as well...

Don't kill me but:

__excl_qstr_lookup_noperm()

?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] ksmbd: fix racy issue from using ->d_parent and ->d_name
  2022-05-17 21:21   ` Al Viro
  2022-05-18 14:27     ` Christian Brauner
@ 2022-05-19  0:39     ` Namjae Jeon
  1 sibling, 0 replies; 7+ messages in thread
From: Namjae Jeon @ 2022-05-19  0:39 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-cifs, linux-fsdevel, smfrench, hyc.lee, senozhatsky

2022-05-18 6:21 GMT+09:00, Al Viro <viro@zeniv.linux.org.uk>:
> On Wed, Apr 27, 2022 at 11:32:45AM +0900, Namjae Jeon wrote:
>> Al pointed out that ksmbd has racy issue from using ->d_parent and
>> ->d_name
>> in ksmbd_vfs_unlink and smb2_vfs_rename(). and use new
>> lock_rename_child()
>> to lock stable parent while underlying rename racy.
>> Introduce vfs_path_parent_lookup helper to avoid out of share access and
>> export vfs functions like the following ones to use
>> vfs_path_parent_lookup().
>>  - export __lookup_hash().
>>  - export getname_kernel() and putname().
>>
>> vfs_path_parent_lookup() is used for parent lookup of destination file
>> using absolute pathname given from FILE_RENAME_INFORMATION request.
>
> First of all, this is seriously broken:
>
>> -int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry
>> *parent,
>> -			  struct dentry *child)
>> +struct dentry *ksmbd_vfs_lock_parent(struct dentry *child)
>>  {
>> -	struct dentry *dentry;
>> -	int ret = 0;
>> +	struct dentry *parent;
>>
>> +	parent = dget(child->d_parent);
>>  	inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
>
> Do that in parallel with host rename() and you are risking this:
>
> you: fetch child->d_parent
> 	get preempted away
> another thread: move child elsewhere
> another thread: drop (the last) reference to old parent
> 	memory pressure evicts that dentry and reuses memory
> you: regain the timeslice and bump what you think is parent->d_count.
> 	In reality, it's 4 bytes in completely different data structure.
> 	At that point you already have a memory corruptor.  Worse,
> 	there's a decent chance that subsequent code will revert the
> 	corruption, so it would be hell to debug - you need a race
> 	to reproduce the thing in the first place *and* you need
> 	something else to notice the temporary memory corruption.
>
> you: fetch what you think is ->d_inode of that dentry.  It actually
> 	isn't anything of that sort.
> you: grab rwsem at hell knows what address (might or might not point
> 	to an rwsem).  Here's another chance to get something
> 	reproducible - e.g. if what you thought was ->d_inode actually
> 	points to unmapped memory, you'll get an oops here.  Won't
> 	be consistent, though.
>
>> +	if (child->d_parent != parent) {
> you:	->d_parent doesn't point there anymore
>
>> +		dput(parent);
>
> you:	decrement those 4 bytes in whatever object it is; if you are
> 	lucky, it won't hit zero and nobody had noticed the temporary
> 	increment.  If you are not, well...
>
>> +		inode_unlock(d_inode(parent));
>
> you:	fetch ->d_inode of parent (mind you, it's a bug in its own right -
> 	even if parent hadn't gotten freed before your dget(), after dput()
> 	above it's fair game for getting freed; placing that dput()
> 	before unlocking d_inode() is wrong).  Assuming you've got
> 	the same pointer as the first time around, you proceed to
> 	drop rwsem at the same address where you've grabbed it.
>
> IOW, you really don't want that in the tree in this form.
>
> It *might* be partially recoverable if you replace the first dget() with
> dget_parent() and reorder dput() and inode_unlock() in failure case, but...
> some of the callers of that thing are also rather dubious.
Okay.

>
> Look: you have smb2_open() calling ksmbd_vfs_may_delete(), which calls
> that thing.  Downstream of this:
> 	if (!file_present) {
> 	...
> 	} else if (!already_permitted) {
>
> If the parent is *NOT* already locked by that point, just how much is
> your 'file_present' worth?  And if it is, you'd obviously deadlock
> right there and then...
It doesn't lock the parent for this. I'll improve it on another patch.

>
> I'm not sure I like what you've done with added exports - e.g.
> __lookup_hash had been OK as a name of static function, but exporting
> it is asking for clashes.  And honestly, what would you say when running
> into a name like that?  OK, it sounds like it's a (probably low-level)
> lookup in some hash table.  _Maybe_ it would've been fine if we had one
> and only implementation of hash tables in the entire tree and that
> had been a part of it, but it's nothing of that sort.  And "hash" in
> the name is not about doing a hash lookup as opposed to some other
> work (it *does* handle hash misses, allocating dentry, asking filesystem
> to do real on-disk lookup, etc.) - it's actually about "hash function
> of the name is already calculated".  My fault, that - predecessor of
> that thing had been called lookup_one(); it took a string, calculated
> its length and computed hash, then proceeded to do lookups.  The latter
> part could be reused in handling of rmdir et.al., where we already had
> the component length and hash precalculated, so the tail of lookup_one()
> had been carved out into a separate helper.  Circa 2.3.99...
>
> Anyway, the name is _not_ fit for an export; I'm not sure what to call
> it - lookup_one_qstr(), perhaps?  Additional fun is due to the fact
> that these days it is slightly different from the lookup_one() et.al.
> Those can be called with directory held shared; that allows parallel
> lookups, but it's not free of cost - if we run into a cache miss and need
> to allocate a new dentry and talk to filesystem, we have to recheck the
> hash table after allocation.  __lookup_hash() is called only with parent
> held exclusive and it can skip that fun - hash miss is going to remain
> a miss; nobody else will be able to insert stuff into dcache in that
> directory until we unlock it.
Okay.
>
> What I'm worried about is that renaming it to lookup_one_qstr() will
> be an invitation for "oh, we happen to have hash/len already known by the
> time of that lookup_one() call; let's just convert it to lookup_one_qsrt()"
> and if that happens in a place where the parent is held only shared, we'll
> be in trouble.  OTOH, lookup_one_qstr_excl() sounds like an invitation to
> do something painful to whoever's responsible for such name...
lookup_one_qstr_excl() doesn't look bad... but I'll use it if another
better name is suggested.
>
> Suggestions, anyone?
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-05-19  0:40 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-27  2:32 [PATCH 1/3] ksmbd: remove internal.h include Namjae Jeon
2022-04-27  2:32 ` [PATCH 2/3] fs: introduce lock_rename_child() helper Namjae Jeon
2022-04-27  2:32 ` [PATCH 3/3] ksmbd: fix racy issue from using ->d_parent and ->d_name Namjae Jeon
2022-05-15 11:09   ` Namjae Jeon
2022-05-17 21:21   ` Al Viro
2022-05-18 14:27     ` Christian Brauner
2022-05-19  0:39     ` Namjae Jeon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.