All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip
@ 2022-04-15 11:02 Yang Xu
  2022-04-15 11:02 ` [PATCH v3 2/7] fs/namei.c: Add missing umask strip in vfs_tmpfile Yang Xu
                   ` (6 more replies)
  0 siblings, 7 replies; 18+ messages in thread
From: Yang Xu @ 2022-04-15 11:02 UTC (permalink / raw)
  To: david, djwong, brauner
  Cc: linux-fsdevel, ceph-devel, linux-nfs, linux-xfs, viro, jlayton, Yang Xu

This has no functional change. Just create and export inode_sgid_strip api for
the subsequent patch. This function is used to strip S_ISGID mode when init
a new inode.

Signed-off-by: Yang Xu <xuyang2018.jy@fujitsu.com>
---
v2->v3:
1.Use const struct inode * instead of struct inode *
2.replace sgid strip with inode_sgid_strip in a single patch
 fs/inode.c         | 24 ++++++++++++++++++++----
 include/linux/fs.h |  3 ++-
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 9d9b422504d1..1b569ad882ce 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2246,10 +2246,8 @@ void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
 		/* Directories are special, and always inherit S_ISGID */
 		if (S_ISDIR(mode))
 			mode |= S_ISGID;
-		else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
-			 !in_group_p(i_gid_into_mnt(mnt_userns, dir)) &&
-			 !capable_wrt_inode_uidgid(mnt_userns, dir, CAP_FSETID))
-			mode &= ~S_ISGID;
+		else
+			inode_sgid_strip(mnt_userns, dir, &mode);
 	} else
 		inode_fsgid_set(inode, mnt_userns);
 	inode->i_mode = mode;
@@ -2405,3 +2403,21 @@ struct timespec64 current_time(struct inode *inode)
 	return timestamp_truncate(now, inode);
 }
 EXPORT_SYMBOL(current_time);
+
+void inode_sgid_strip(struct user_namespace *mnt_userns,
+		      const struct inode *dir, umode_t *mode)
+{
+	if (!dir || !(dir->i_mode & S_ISGID))
+		return;
+	if ((*mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
+		return;
+	if (S_ISDIR(*mode))
+		return;
+	if (in_group_p(i_gid_into_mnt(mnt_userns, dir)))
+		return;
+	if (capable_wrt_inode_uidgid(mnt_userns, dir, CAP_FSETID))
+		return;
+
+	*mode &= ~S_ISGID;
+}
+EXPORT_SYMBOL(inode_sgid_strip);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bbde95387a23..4a617aaab6f6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1897,7 +1897,8 @@ extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
 void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
 		      const struct inode *dir, umode_t mode);
 extern bool may_open_dev(const struct path *path);
-
+void inode_sgid_strip(struct user_namespace *mnt_userns,
+		      const struct inode *dir, umode_t *mode);
 /*
  * This is the "filldir" function type, used by readdir() to let
  * the kernel specify what kind of dirent layout it wants to have.
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v3 2/7] fs/namei.c: Add missing umask strip in vfs_tmpfile
  2022-04-15 11:02 [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip Yang Xu
@ 2022-04-15 11:02 ` Yang Xu
  2022-04-15 14:17   ` Christian Brauner
  2022-04-15 11:02 ` [PATCH v3 3/7] xfs: Only do posix acl setup/release operation under CONFIG_XFS_POSIX_ACL Yang Xu
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Yang Xu @ 2022-04-15 11:02 UTC (permalink / raw)
  To: david, djwong, brauner
  Cc: linux-fsdevel, ceph-devel, linux-nfs, linux-xfs, viro, jlayton, Yang Xu

If underflying filesystem doesn't enable own CONFIG_FS_POSIX_ACL, then
posix_acl_create can't be called. So we will miss umask strip, ie
use ext4 with noacl or disblae CONFIG_EXT4_FS_POSIX_ACL.

Reported-by: Christian Brauner (Microsoft) <brauner@kernel.org>
Signed-off-by: Yang Xu <xuyang2018.jy@fujitsu.com>
---
 fs/namei.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/namei.c b/fs/namei.c
index 3f1829b3ab5b..bbc7c950bbdc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3521,6 +3521,8 @@ struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns,
 	child = d_alloc(dentry, &slash_name);
 	if (unlikely(!child))
 		goto out_err;
+	if (!IS_POSIXACL(dir))
+		mode &= ~current_umask();
 	error = dir->i_op->tmpfile(mnt_userns, dir, child, mode);
 	if (error)
 		goto out_err;
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v3 3/7] xfs: Only do posix acl setup/release operation under CONFIG_XFS_POSIX_ACL
  2022-04-15 11:02 [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip Yang Xu
  2022-04-15 11:02 ` [PATCH v3 2/7] fs/namei.c: Add missing umask strip in vfs_tmpfile Yang Xu
@ 2022-04-15 11:02 ` Yang Xu
  2022-04-15 11:02 ` [PATCH v3 4/7] nfs3: Only do posix acl setup/release operation under CONFIG_NFS_V3_ACL Yang Xu
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Yang Xu @ 2022-04-15 11:02 UTC (permalink / raw)
  To: david, djwong, brauner
  Cc: linux-fsdevel, ceph-devel, linux-nfs, linux-xfs, viro, jlayton, Yang Xu

Usually, filesystem will use a function named as fs_init_acl function that belong
to acl.c and this function is externed in acl.h by using CONFIG_FS_POSIX_ACL.

If filesystem disable this switch, we should not call xfs_set_acl also not call
posix_acl_create/posix_acl_release because it is useless(We have do umask
strip in vfs).

Signed-off-by: Yang Xu <xuyang2018.jy@fujitsu.com>
---
 fs/xfs/xfs_iops.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index b34e8e4344a8..9487e68bdd3d 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -146,10 +146,12 @@ xfs_create_need_xattr(
 	struct posix_acl *default_acl,
 	struct posix_acl *acl)
 {
+#ifdef CONFIG_XFS_POSIX_ACL
 	if (acl)
 		return true;
 	if (default_acl)
 		return true;
+#endif
 #if IS_ENABLED(CONFIG_SECURITY)
 	if (dir->i_sb->s_security)
 		return true;
@@ -184,9 +186,11 @@ xfs_generic_create(
 		rdev = 0;
 	}
 
+#ifdef CONFIG_XFS_POSIX_ACL
 	error = posix_acl_create(dir, &mode, &default_acl, &acl);
 	if (error)
 		return error;
+#endif
 
 	/* Verify mode is valid also for tmpfile case */
 	error = xfs_dentry_mode_to_name(&name, dentry, mode);
@@ -241,8 +245,10 @@ xfs_generic_create(
 	xfs_finish_inode_setup(ip);
 
  out_free_acl:
+#ifdef CONFIG_XFS_POSIX_ACL
 	posix_acl_release(default_acl);
 	posix_acl_release(acl);
+#endif
 	return error;
 
  out_cleanup_inode:
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v3 4/7] nfs3: Only do posix acl setup/release operation under CONFIG_NFS_V3_ACL
  2022-04-15 11:02 [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip Yang Xu
  2022-04-15 11:02 ` [PATCH v3 2/7] fs/namei.c: Add missing umask strip in vfs_tmpfile Yang Xu
  2022-04-15 11:02 ` [PATCH v3 3/7] xfs: Only do posix acl setup/release operation under CONFIG_XFS_POSIX_ACL Yang Xu
@ 2022-04-15 11:02 ` Yang Xu
  2022-04-15 11:02 ` [PATCH v3 5/7] fs: Add new helper prepare_mode Yang Xu
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Yang Xu @ 2022-04-15 11:02 UTC (permalink / raw)
  To: david, djwong, brauner
  Cc: linux-fsdevel, ceph-devel, linux-nfs, linux-xfs, viro, jlayton, Yang Xu

Usually, filesystem will use a function named as fs_init_acl function that belong
to acl.c and this function is externed in acl.h by using CONFIG_FS_POSIX_ACL.

If filesystem disable this switch, we should not call nfs3_proc_setacls also not
call posix_acl_create/posix_acl_release because it is useless(We have do umask
strip in vfs).

Signed-off-by: Yang Xu <xuyang2018.jy@fujitsu.com>
---
 fs/nfs/nfs3proc.c | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 1597eef40d54..55789a625d18 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -337,7 +337,9 @@ static int
 nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		 int flags)
 {
+#ifdef CONFIG_NFS_V3_ACL
 	struct posix_acl *default_acl, *acl;
+#endif
 	struct nfs3_createdata *data;
 	struct dentry *d_alias;
 	int status = -ENOMEM;
@@ -361,9 +363,11 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		data->arg.create.verifier[1] = cpu_to_be32(current->pid);
 	}
 
+#ifdef CONFIG_NFS_V3_ACL
 	status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl);
 	if (status)
 		goto out;
+#endif
 
 	for (;;) {
 		d_alias = nfs3_do_create(dir, dentry, data);
@@ -415,13 +419,18 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 			goto out_dput;
 	}
 
+#ifdef CONFIG_NFS_V3_ACL
 	status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
+#endif
 
 out_dput:
 	dput(d_alias);
+
 out_release_acls:
+#ifdef CONFIG_NFS_V3_ACL
 	posix_acl_release(acl);
 	posix_acl_release(default_acl);
+#endif
 out:
 	nfs3_free_createdata(data);
 	dprintk("NFS reply create: %d\n", status);
@@ -580,7 +589,9 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
 static int
 nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 {
+#ifdef CONFIG_NFS_V3_ACL
 	struct posix_acl *default_acl, *acl;
+#endif
 	struct nfs3_createdata *data;
 	struct dentry *d_alias;
 	int status = -ENOMEM;
@@ -591,9 +602,11 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 	if (data == NULL)
 		goto out;
 
+#ifdef CONFIG_NFS_V3_ACL
 	status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl);
 	if (status)
 		goto out;
+#endif
 
 	data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR];
 	data->arg.mkdir.fh = NFS_FH(dir);
@@ -610,12 +623,16 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 	if (d_alias)
 		dentry = d_alias;
 
+#ifdef CONFIG_NFS_V3_ACL
 	status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
-
+#endif
 	dput(d_alias);
+
 out_release_acls:
+#ifdef CONFIG_NFS_V3_ACL
 	posix_acl_release(acl);
 	posix_acl_release(default_acl);
+#endif
 out:
 	nfs3_free_createdata(data);
 	dprintk("NFS reply mkdir: %d\n", status);
@@ -711,7 +728,9 @@ static int
 nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		dev_t rdev)
 {
+#ifdef CONFIG_NFS_V3_ACL
 	struct posix_acl *default_acl, *acl;
+#endif
 	struct nfs3_createdata *data;
 	struct dentry *d_alias;
 	int status = -ENOMEM;
@@ -723,9 +742,11 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	if (data == NULL)
 		goto out;
 
+#ifdef CONFIG_NFS_V3_ACL
 	status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl);
 	if (status)
 		goto out;
+#endif
 
 	data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD];
 	data->arg.mknod.fh = NFS_FH(dir);
@@ -760,12 +781,16 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	if (d_alias)
 		dentry = d_alias;
 
+#ifdef CONFIG_NFS_V3_ACL
 	status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
-
+#endif
 	dput(d_alias);
+
 out_release_acls:
+#ifdef CONFIG_NFS_V3_ACL
 	posix_acl_release(acl);
 	posix_acl_release(default_acl);
+#endif
 out:
 	nfs3_free_createdata(data);
 	dprintk("NFS reply mknod: %d\n", status);
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v3 5/7] fs: Add new helper prepare_mode
  2022-04-15 11:02 [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip Yang Xu
                   ` (2 preceding siblings ...)
  2022-04-15 11:02 ` [PATCH v3 4/7] nfs3: Only do posix acl setup/release operation under CONFIG_NFS_V3_ACL Yang Xu
@ 2022-04-15 11:02 ` Yang Xu
  2022-04-15 14:19   ` Christian Brauner
  2022-04-15 11:02 ` [PATCH v3 6/7] fs: strip file's S_ISGID mode on vfs instead of on underlying filesystem Yang Xu
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Yang Xu @ 2022-04-15 11:02 UTC (permalink / raw)
  To: david, djwong, brauner
  Cc: linux-fsdevel, ceph-devel, linux-nfs, linux-xfs, viro, jlayton, Yang Xu

As Christian Brauner suggested, add a new helper calls inode_sgid_strip()
and does the umask stripping as well and then call it in all these places.

This api is introduced to support strip file's S_ISGID mode on vfs instead
of on underlying filesystem.

Suggested-by: Christian Brauner (Microsoft) <brauner@kernel.org>
Signed-off-by: Yang Xu <xuyang2018.jy@fujitsu.com>
---
 include/linux/fs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4a617aaab6f6..8c2f4cde974b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3458,6 +3458,15 @@ static inline bool dir_relax_shared(struct inode *inode)
 	return !IS_DEADDIR(inode);
 }
 
+static inline void prepare_mode(struct user_namespace *mnt_userns,
+				const struct inode *dir, umode_t *mode)
+{
+	inode_sgid_strip(mnt_userns, dir, mode);
+
+	if (!IS_POSIXACL(dir))
+		*mode &= ~current_umask();
+}
+
 extern bool path_noexec(const struct path *path);
 extern void inode_nohighmem(struct inode *inode);
 
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v3 6/7] fs: strip file's S_ISGID mode on vfs instead of on underlying filesystem
  2022-04-15 11:02 [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip Yang Xu
                   ` (3 preceding siblings ...)
  2022-04-15 11:02 ` [PATCH v3 5/7] fs: Add new helper prepare_mode Yang Xu
@ 2022-04-15 11:02 ` Yang Xu
  2022-04-15 14:24   ` Christian Brauner
  2022-04-15 11:02 ` [PATCH v3 7/7] ceph: Remove S_ISGID clear code in ceph_finish_async_create Yang Xu
  2022-04-15 14:09 ` [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip Christian Brauner
  6 siblings, 1 reply; 18+ messages in thread
From: Yang Xu @ 2022-04-15 11:02 UTC (permalink / raw)
  To: david, djwong, brauner
  Cc: linux-fsdevel, ceph-devel, linux-nfs, linux-xfs, viro, jlayton, Yang Xu

Currently, vfs only passes mode argument to filesystem, then use inode_init_owner()
to strip S_ISGID. Some filesystem(ie ext4/btrfs) will call inode_init_owner
firstly, then posxi acl setup, but xfs uses the contrary order. It will affect
S_ISGID clear especially we filter S_IXGRP by umask or acl.

Regardless of which filesystem is in use, failure to strip the SGID correctly is
considered a security failure that needs to be fixed. The current VFS infrastructure
requires the filesystem to do everything right and not step on any landmines to
strip the SGID bit, when in fact it can easily be done at the VFS and the filesystems
then don't even need to be aware that the SGID needs to be (or has been stripped) by
the operation the user asked to be done.

Vfs has all the info it needs - it doesn't need the filesystems to do everything
correctly with the mode and ensuring that they order things like posix acl setup
functions correctly with inode_init_owner() to strip the SGID bit.

Just strip the SGID bit at the VFS, and then the filesystems can't get it wrong.

Also, the inode_sgid_strip() api should be used before IS_POSIXACL() because
this api may change mode.

Only the following places use inode_init_owner
"hugetlbfs/inode.c:846:          inode_init_owner(&init_user_ns, inode, dir, mode);
 nilfs2/inode.c:354:     inode_init_owner(&init_user_ns, inode, dir, mode);
 zonefs/super.c:1289:    inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555);
 reiserfs/namei.c:619:   inode_init_owner(&init_user_ns, inode, dir, mode);
 jfs/jfs_inode.c:67:     inode_init_owner(&init_user_ns, inode, parent, mode);
 f2fs/namei.c:50:        inode_init_owner(mnt_userns, inode, dir, mode);
 ext2/ialloc.c:549:              inode_init_owner(&init_user_ns, inode, dir, mode);
 overlayfs/dir.c:643:    inode_init_owner(&init_user_ns, inode, dentry->d_parent->d_inode, mode);
 ufs/ialloc.c:292:       inode_init_owner(&init_user_ns, inode, dir, mode);
 ntfs3/inode.c:1283:     inode_init_owner(mnt_userns, inode, dir, mode);
 ramfs/inode.c:64:               inode_init_owner(&init_user_ns, inode, dir, mode);
 9p/vfs_inode.c:263:     inode_init_owner(&init_user_ns, inode, NULL, mode);
 btrfs/tests/btrfs-tests.c:65:   inode_init_owner(&init_user_ns, inode, NULL, S_IFREG);
 btrfs/inode.c:6215:     inode_init_owner(mnt_userns, inode, dir, mode);
 sysv/ialloc.c:166:      inode_init_owner(&init_user_ns, inode, dir, mode);
 omfs/inode.c:51:        inode_init_owner(&init_user_ns, inode, NULL, mode);
 ubifs/dir.c:97: inode_init_owner(&init_user_ns, inode, dir, mode);
 udf/ialloc.c:108:       inode_init_owner(&init_user_ns, inode, dir, mode);
 ext4/ialloc.c:979:              inode_init_owner(mnt_userns, inode, dir, mode);
 hfsplus/inode.c:393:    inode_init_owner(&init_user_ns, inode, dir, mode);
 xfs/xfs_inode.c:840:            inode_init_owner(mnt_userns, inode, dir, mode);
 ocfs2/dlmfs/dlmfs.c:331:                inode_init_owner(&init_user_ns, inode, NULL, mode);
 ocfs2/dlmfs/dlmfs.c:354:        inode_init_owner(&init_user_ns, inode, parent, mode);
 ocfs2/namei.c:200:      inode_init_owner(&init_user_ns, inode, dir, mode);
 minix/bitmap.c:255:     inode_init_owner(&init_user_ns, inode, dir, mode);
 bfs/dir.c:99:   inode_init_owner(&init_user_ns, inode, dir, mode);
"

They are used in filesystem init new inode function and these init inode functions are used
by following operations:
mkdir
symlink
mknod
create
tmpfile
rename

We don't care about mkdir because we don't strip SGID bit for directory except fs.xfs.irix_sgid_inherit.
But we even call it in do_mkdirat() since inode_sgid_strip() will skip directories anyway. This will
enforce the  same ordering for all relevant operations and it will make the code more uniform and
easier to understand by using prepare_mode().

symlink and rename only use valid mode that doesn't have SGID bit.

We have added inode_sgid_strip api for the remaining operations.

In addition to the above six operations, two filesystems has a little difference
1) btrfs has btrfs_create_subvol_root to create new inode but used non SGID bit mode and can ignore
2) ocfs2 reflink function should add inode_sgid_strip api manually because we don't add it in vfs

This patch also changed grpid behaviour for ext4/xfs because the mode passed to them may been
changed by inode_sgid_strip.

Also as Christian Brauner said"
The patch itself is useful as it would move a security sensitive operation that is currently burried in
individual filesystems into the vfs layer. But it has a decent regression  potential since it might strip
filesystems that have so far relied on getting the S_ISGID bit with a mode argument. So this needs a lot
of testing and long exposure in -next for at least one full kernel cycle."

Suggested-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Yang Xu <xuyang2018.jy@fujitsu.com>
---
v2->v3:
1.use new helper prepare_mode to do inode sgid strip and umask strip
2.also use prepare_mode() for mkdirat
 fs/inode.c       |  2 --
 fs/namei.c       | 14 +++++---------
 fs/ocfs2/namei.c |  1 +
 3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 1b569ad882ce..a250aa01d3c3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2246,8 +2246,6 @@ void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
 		/* Directories are special, and always inherit S_ISGID */
 		if (S_ISDIR(mode))
 			mode |= S_ISGID;
-		else
-			inode_sgid_strip(mnt_userns, dir, &mode);
 	} else
 		inode_fsgid_set(inode, mnt_userns);
 	inode->i_mode = mode;
diff --git a/fs/namei.c b/fs/namei.c
index bbc7c950bbdc..0fadc884af7f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3287,8 +3287,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
 	if (open_flag & O_CREAT) {
 		if (open_flag & O_EXCL)
 			open_flag &= ~O_TRUNC;
-		if (!IS_POSIXACL(dir->d_inode))
-			mode &= ~current_umask();
+		prepare_mode(mnt_userns, dir->d_inode, &mode);
 		if (likely(got_write))
 			create_error = may_o_create(mnt_userns, &nd->path,
 						    dentry, mode);
@@ -3521,8 +3520,7 @@ struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns,
 	child = d_alloc(dentry, &slash_name);
 	if (unlikely(!child))
 		goto out_err;
-	if (!IS_POSIXACL(dir))
-		mode &= ~current_umask();
+	prepare_mode(mnt_userns, dir, &mode);
 	error = dir->i_op->tmpfile(mnt_userns, dir, child, mode);
 	if (error)
 		goto out_err;
@@ -3852,13 +3850,12 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
 	if (IS_ERR(dentry))
 		goto out1;
 
-	if (!IS_POSIXACL(path.dentry->d_inode))
-		mode &= ~current_umask();
+	mnt_userns = mnt_user_ns(path.mnt);
+	prepare_mode(mnt_userns, path.dentry->d_inode, &mode);
 	error = security_path_mknod(&path, dentry, mode, dev);
 	if (error)
 		goto out2;
 
-	mnt_userns = mnt_user_ns(path.mnt);
 	switch (mode & S_IFMT) {
 		case 0: case S_IFREG:
 			error = vfs_create(mnt_userns, path.dentry->d_inode,
@@ -3952,12 +3949,11 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode)
 	if (IS_ERR(dentry))
 		goto out_putname;
 
-	if (!IS_POSIXACL(path.dentry->d_inode))
-		mode &= ~current_umask();
 	error = security_path_mkdir(&path, dentry, mode);
 	if (!error) {
 		struct user_namespace *mnt_userns;
 		mnt_userns = mnt_user_ns(path.mnt);
+		prepare_mode(mnt_userns, path.dentry->d_inode, &mode);
 		error = vfs_mkdir(mnt_userns, path.dentry->d_inode, dentry,
 				  mode);
 	}
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index c75fd54b9185..c81b8e0847aa 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -198,6 +198,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
 	if (S_ISDIR(mode))
 		set_nlink(inode, 2);
 	inode_init_owner(&init_user_ns, inode, dir, mode);
+	inode_sgid_strip(&init_user_ns, dir, &mode);
 	status = dquot_initialize(inode);
 	if (status)
 		return ERR_PTR(status);
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v3 7/7] ceph: Remove S_ISGID clear code in ceph_finish_async_create
  2022-04-15 11:02 [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip Yang Xu
                   ` (4 preceding siblings ...)
  2022-04-15 11:02 ` [PATCH v3 6/7] fs: strip file's S_ISGID mode on vfs instead of on underlying filesystem Yang Xu
@ 2022-04-15 11:02 ` Yang Xu
  2022-04-18  3:04   ` Xiubo Li
  2022-04-15 14:09 ` [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip Christian Brauner
  6 siblings, 1 reply; 18+ messages in thread
From: Yang Xu @ 2022-04-15 11:02 UTC (permalink / raw)
  To: david, djwong, brauner
  Cc: linux-fsdevel, ceph-devel, linux-nfs, linux-xfs, viro, jlayton, Yang Xu

Since vfs has stripped S_ISGID, we don't need this code any more.

Signed-off-by: Yang Xu <xuyang2018.jy@fujitsu.com>
---
 fs/ceph/file.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 6c9e837aa1d3..8e3b99853333 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -651,10 +651,6 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
 		/* Directories always inherit the setgid bit. */
 		if (S_ISDIR(mode))
 			mode |= S_ISGID;
-		else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
-			 !in_group_p(dir->i_gid) &&
-			 !capable_wrt_inode_uidgid(&init_user_ns, dir, CAP_FSETID))
-			mode &= ~S_ISGID;
 	} else {
 		in.gid = cpu_to_le32(from_kgid(&init_user_ns, current_fsgid()));
 	}
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip
  2022-04-15 11:02 [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip Yang Xu
                   ` (5 preceding siblings ...)
  2022-04-15 11:02 ` [PATCH v3 7/7] ceph: Remove S_ISGID clear code in ceph_finish_async_create Yang Xu
@ 2022-04-15 14:09 ` Christian Brauner
  2022-04-18  2:08   ` xuyang2018.jy
  2022-04-18  3:08   ` Matthew Wilcox
  6 siblings, 2 replies; 18+ messages in thread
From: Christian Brauner @ 2022-04-15 14:09 UTC (permalink / raw)
  To: Yang Xu
  Cc: david, djwong, linux-fsdevel, ceph-devel, linux-nfs, linux-xfs,
	viro, jlayton

On Fri, Apr 15, 2022 at 07:02:17PM +0800, Yang Xu wrote:
> This has no functional change. Just create and export inode_sgid_strip api for
> the subsequent patch. This function is used to strip S_ISGID mode when init
> a new inode.
> 
> Signed-off-by: Yang Xu <xuyang2018.jy@fujitsu.com>
> ---
> v2->v3:
> 1.Use const struct inode * instead of struct inode *
> 2.replace sgid strip with inode_sgid_strip in a single patch
>  fs/inode.c         | 24 ++++++++++++++++++++----
>  include/linux/fs.h |  3 ++-
>  2 files changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/inode.c b/fs/inode.c
> index 9d9b422504d1..1b569ad882ce 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -2246,10 +2246,8 @@ void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
>  		/* Directories are special, and always inherit S_ISGID */
>  		if (S_ISDIR(mode))
>  			mode |= S_ISGID;
> -		else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
> -			 !in_group_p(i_gid_into_mnt(mnt_userns, dir)) &&
> -			 !capable_wrt_inode_uidgid(mnt_userns, dir, CAP_FSETID))
> -			mode &= ~S_ISGID;
> +		else
> +			inode_sgid_strip(mnt_userns, dir, &mode);
>  	} else
>  		inode_fsgid_set(inode, mnt_userns);
>  	inode->i_mode = mode;
> @@ -2405,3 +2403,21 @@ struct timespec64 current_time(struct inode *inode)
>  	return timestamp_truncate(now, inode);
>  }
>  EXPORT_SYMBOL(current_time);
> +
> +void inode_sgid_strip(struct user_namespace *mnt_userns,
> +		      const struct inode *dir, umode_t *mode)
> +{
> +	if (!dir || !(dir->i_mode & S_ISGID))
> +		return;
> +	if ((*mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
> +		return;
> +	if (S_ISDIR(*mode))
> +		return;

I'd place that check first as this whole function is really only
relevant for non-directories.

Otherwise I can live with *mode being a pointer although I still find
this unpleasant API wise but the bikeshed does it's job without having
my color. :)

I'd like to do some good testing on this.

Acked-by: Christian Brauner (Microsoft) <brauner@kernel.org>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 2/7] fs/namei.c: Add missing umask strip in vfs_tmpfile
  2022-04-15 11:02 ` [PATCH v3 2/7] fs/namei.c: Add missing umask strip in vfs_tmpfile Yang Xu
@ 2022-04-15 14:17   ` Christian Brauner
  2022-04-18  2:55     ` xuyang2018.jy
  0 siblings, 1 reply; 18+ messages in thread
From: Christian Brauner @ 2022-04-15 14:17 UTC (permalink / raw)
  To: Yang Xu
  Cc: david, djwong, linux-fsdevel, ceph-devel, linux-nfs, linux-xfs,
	viro, jlayton

On Fri, Apr 15, 2022 at 07:02:18PM +0800, Yang Xu wrote:
> If underflying filesystem doesn't enable own CONFIG_FS_POSIX_ACL, then
> posix_acl_create can't be called. So we will miss umask strip, ie
> use ext4 with noacl or disblae CONFIG_EXT4_FS_POSIX_ACL.

Hm, maybe:

"All creation paths except for O_TMPFILE handle umask in the vfs
directly if the filesystem doesn't support or enable POSIX ACLs. If the
filesystem does then umask handling is deferred until
posix_acl_create().
Because, O_TMPFILE misses umask handling in the vfs it will not honor
umask settings. Fix this by adding the missing umask handling."

> 
> Reported-by: Christian Brauner (Microsoft) <brauner@kernel.org>
> Signed-off-by: Yang Xu <xuyang2018.jy@fujitsu.com>
> ---

Acked-by: Christian Brauner (Microsoft) <brauner@kernel.org>

>  fs/namei.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/fs/namei.c b/fs/namei.c
> index 3f1829b3ab5b..bbc7c950bbdc 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -3521,6 +3521,8 @@ struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns,
>  	child = d_alloc(dentry, &slash_name);
>  	if (unlikely(!child))
>  		goto out_err;
> +	if (!IS_POSIXACL(dir))
> +		mode &= ~current_umask();
>  	error = dir->i_op->tmpfile(mnt_userns, dir, child, mode);
>  	if (error)
>  		goto out_err;
> -- 
> 2.27.0
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 5/7] fs: Add new helper prepare_mode
  2022-04-15 11:02 ` [PATCH v3 5/7] fs: Add new helper prepare_mode Yang Xu
@ 2022-04-15 14:19   ` Christian Brauner
  0 siblings, 0 replies; 18+ messages in thread
From: Christian Brauner @ 2022-04-15 14:19 UTC (permalink / raw)
  To: Yang Xu
  Cc: david, djwong, linux-fsdevel, ceph-devel, linux-nfs, linux-xfs,
	viro, jlayton

On Fri, Apr 15, 2022 at 07:02:21PM +0800, Yang Xu wrote:
> As Christian Brauner suggested, add a new helper calls inode_sgid_strip()
> and does the umask stripping as well and then call it in all these places.
> 
> This api is introduced to support strip file's S_ISGID mode on vfs instead
> of on underlying filesystem.
> 
> Suggested-by: Christian Brauner (Microsoft) <brauner@kernel.org>
> Signed-off-by: Yang Xu <xuyang2018.jy@fujitsu.com>
> ---

I don't think this needs to be a separate patch especially since the
helper is not in any header file. So just squah patch 5 and 6 imho.

>  include/linux/fs.h | 9 +++++++++
>  1 file changed, 9 insertions(+)
> 
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 4a617aaab6f6..8c2f4cde974b 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -3458,6 +3458,15 @@ static inline bool dir_relax_shared(struct inode *inode)
>  	return !IS_DEADDIR(inode);
>  }
>  
> +static inline void prepare_mode(struct user_namespace *mnt_userns,
> +				const struct inode *dir, umode_t *mode)
> +{
> +	inode_sgid_strip(mnt_userns, dir, mode);
> +
> +	if (!IS_POSIXACL(dir))
> +		*mode &= ~current_umask();
> +}
> +
>  extern bool path_noexec(const struct path *path);
>  extern void inode_nohighmem(struct inode *inode);
>  
> -- 
> 2.27.0
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 6/7] fs: strip file's S_ISGID mode on vfs instead of on underlying filesystem
  2022-04-15 11:02 ` [PATCH v3 6/7] fs: strip file's S_ISGID mode on vfs instead of on underlying filesystem Yang Xu
@ 2022-04-15 14:24   ` Christian Brauner
  2022-04-18  3:05     ` xuyang2018.jy
  0 siblings, 1 reply; 18+ messages in thread
From: Christian Brauner @ 2022-04-15 14:24 UTC (permalink / raw)
  To: Yang Xu
  Cc: david, djwong, linux-fsdevel, ceph-devel, linux-nfs, linux-xfs,
	viro, jlayton

On Fri, Apr 15, 2022 at 07:02:22PM +0800, Yang Xu wrote:
> Currently, vfs only passes mode argument to filesystem, then use inode_init_owner()
> to strip S_ISGID. Some filesystem(ie ext4/btrfs) will call inode_init_owner
> firstly, then posxi acl setup, but xfs uses the contrary order. It will affect
> S_ISGID clear especially we filter S_IXGRP by umask or acl.
> 
> Regardless of which filesystem is in use, failure to strip the SGID correctly is
> considered a security failure that needs to be fixed. The current VFS infrastructure
> requires the filesystem to do everything right and not step on any landmines to
> strip the SGID bit, when in fact it can easily be done at the VFS and the filesystems
> then don't even need to be aware that the SGID needs to be (or has been stripped) by
> the operation the user asked to be done.
> 
> Vfs has all the info it needs - it doesn't need the filesystems to do everything
> correctly with the mode and ensuring that they order things like posix acl setup
> functions correctly with inode_init_owner() to strip the SGID bit.
> 
> Just strip the SGID bit at the VFS, and then the filesystems can't get it wrong.
> 
> Also, the inode_sgid_strip() api should be used before IS_POSIXACL() because
> this api may change mode.
> 
> Only the following places use inode_init_owner
> "hugetlbfs/inode.c:846:          inode_init_owner(&init_user_ns, inode, dir, mode);
>  nilfs2/inode.c:354:     inode_init_owner(&init_user_ns, inode, dir, mode);
>  zonefs/super.c:1289:    inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555);
>  reiserfs/namei.c:619:   inode_init_owner(&init_user_ns, inode, dir, mode);
>  jfs/jfs_inode.c:67:     inode_init_owner(&init_user_ns, inode, parent, mode);
>  f2fs/namei.c:50:        inode_init_owner(mnt_userns, inode, dir, mode);
>  ext2/ialloc.c:549:              inode_init_owner(&init_user_ns, inode, dir, mode);
>  overlayfs/dir.c:643:    inode_init_owner(&init_user_ns, inode, dentry->d_parent->d_inode, mode);
>  ufs/ialloc.c:292:       inode_init_owner(&init_user_ns, inode, dir, mode);
>  ntfs3/inode.c:1283:     inode_init_owner(mnt_userns, inode, dir, mode);
>  ramfs/inode.c:64:               inode_init_owner(&init_user_ns, inode, dir, mode);
>  9p/vfs_inode.c:263:     inode_init_owner(&init_user_ns, inode, NULL, mode);
>  btrfs/tests/btrfs-tests.c:65:   inode_init_owner(&init_user_ns, inode, NULL, S_IFREG);
>  btrfs/inode.c:6215:     inode_init_owner(mnt_userns, inode, dir, mode);
>  sysv/ialloc.c:166:      inode_init_owner(&init_user_ns, inode, dir, mode);
>  omfs/inode.c:51:        inode_init_owner(&init_user_ns, inode, NULL, mode);
>  ubifs/dir.c:97: inode_init_owner(&init_user_ns, inode, dir, mode);
>  udf/ialloc.c:108:       inode_init_owner(&init_user_ns, inode, dir, mode);
>  ext4/ialloc.c:979:              inode_init_owner(mnt_userns, inode, dir, mode);
>  hfsplus/inode.c:393:    inode_init_owner(&init_user_ns, inode, dir, mode);
>  xfs/xfs_inode.c:840:            inode_init_owner(mnt_userns, inode, dir, mode);
>  ocfs2/dlmfs/dlmfs.c:331:                inode_init_owner(&init_user_ns, inode, NULL, mode);
>  ocfs2/dlmfs/dlmfs.c:354:        inode_init_owner(&init_user_ns, inode, parent, mode);
>  ocfs2/namei.c:200:      inode_init_owner(&init_user_ns, inode, dir, mode);
>  minix/bitmap.c:255:     inode_init_owner(&init_user_ns, inode, dir, mode);
>  bfs/dir.c:99:   inode_init_owner(&init_user_ns, inode, dir, mode);
> "
> 
> They are used in filesystem init new inode function and these init inode functions are used
> by following operations:
> mkdir
> symlink
> mknod
> create
> tmpfile
> rename
> 
> We don't care about mkdir because we don't strip SGID bit for directory except fs.xfs.irix_sgid_inherit.
> But we even call it in do_mkdirat() since inode_sgid_strip() will skip directories anyway. This will
> enforce the  same ordering for all relevant operations and it will make the code more uniform and
> easier to understand by using prepare_mode().
> 
> symlink and rename only use valid mode that doesn't have SGID bit.
> 
> We have added inode_sgid_strip api for the remaining operations.
> 
> In addition to the above six operations, two filesystems has a little difference
> 1) btrfs has btrfs_create_subvol_root to create new inode but used non SGID bit mode and can ignore
> 2) ocfs2 reflink function should add inode_sgid_strip api manually because we don't add it in vfs
> 
> This patch also changed grpid behaviour for ext4/xfs because the mode passed to them may been
> changed by inode_sgid_strip.
> 
> Also as Christian Brauner said"
> The patch itself is useful as it would move a security sensitive operation that is currently burried in
> individual filesystems into the vfs layer. But it has a decent regression  potential since it might strip
> filesystems that have so far relied on getting the S_ISGID bit with a mode argument. So this needs a lot
> of testing and long exposure in -next for at least one full kernel cycle."
> 
> Suggested-by: Dave Chinner <david@fromorbit.com>
> Signed-off-by: Yang Xu <xuyang2018.jy@fujitsu.com>
> ---
> v2->v3:
> 1.use new helper prepare_mode to do inode sgid strip and umask strip
> 2.also use prepare_mode() for mkdirat
>  fs/inode.c       |  2 --
>  fs/namei.c       | 14 +++++---------
>  fs/ocfs2/namei.c |  1 +
>  3 files changed, 6 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/inode.c b/fs/inode.c
> index 1b569ad882ce..a250aa01d3c3 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -2246,8 +2246,6 @@ void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
>  		/* Directories are special, and always inherit S_ISGID */
>  		if (S_ISDIR(mode))
>  			mode |= S_ISGID;
> -		else
> -			inode_sgid_strip(mnt_userns, dir, &mode);
>  	} else
>  		inode_fsgid_set(inode, mnt_userns);
>  	inode->i_mode = mode;
> diff --git a/fs/namei.c b/fs/namei.c
> index bbc7c950bbdc..0fadc884af7f 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -3287,8 +3287,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
>  	if (open_flag & O_CREAT) {
>  		if (open_flag & O_EXCL)
>  			open_flag &= ~O_TRUNC;
> -		if (!IS_POSIXACL(dir->d_inode))
> -			mode &= ~current_umask();
> +		prepare_mode(mnt_userns, dir->d_inode, &mode);
>  		if (likely(got_write))
>  			create_error = may_o_create(mnt_userns, &nd->path,
>  						    dentry, mode);
> @@ -3521,8 +3520,7 @@ struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns,
>  	child = d_alloc(dentry, &slash_name);
>  	if (unlikely(!child))
>  		goto out_err;
> -	if (!IS_POSIXACL(dir))
> -		mode &= ~current_umask();
> +	prepare_mode(mnt_userns, dir, &mode);
>  	error = dir->i_op->tmpfile(mnt_userns, dir, child, mode);
>  	if (error)
>  		goto out_err;
> @@ -3852,13 +3850,12 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
>  	if (IS_ERR(dentry))
>  		goto out1;
>  
> -	if (!IS_POSIXACL(path.dentry->d_inode))
> -		mode &= ~current_umask();
> +	mnt_userns = mnt_user_ns(path.mnt);
> +	prepare_mode(mnt_userns, path.dentry->d_inode, &mode);
>  	error = security_path_mknod(&path, dentry, mode, dev);
>  	if (error)
>  		goto out2;
>  
> -	mnt_userns = mnt_user_ns(path.mnt);
>  	switch (mode & S_IFMT) {
>  		case 0: case S_IFREG:
>  			error = vfs_create(mnt_userns, path.dentry->d_inode,
> @@ -3952,12 +3949,11 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode)
>  	if (IS_ERR(dentry))
>  		goto out_putname;
>  
> -	if (!IS_POSIXACL(path.dentry->d_inode))
> -		mode &= ~current_umask();
>  	error = security_path_mkdir(&path, dentry, mode);

Your changes causes the security and the filesystem layer to potentially
see different values for mode.

You need to change the patch so prepare_mode() is called before the mode
is passed to the security layer. This will ensure that both the security
layer and the vfs see the same mode.

>  	if (!error) {
>  		struct user_namespace *mnt_userns;
>  		mnt_userns = mnt_user_ns(path.mnt);
> +		prepare_mode(mnt_userns, path.dentry->d_inode, &mode);
>  		error = vfs_mkdir(mnt_userns, path.dentry->d_inode, dentry,
>  				  mode);
>  	}
> diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
> index c75fd54b9185..c81b8e0847aa 100644
> --- a/fs/ocfs2/namei.c
> +++ b/fs/ocfs2/namei.c
> @@ -198,6 +198,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
>  	if (S_ISDIR(mode))
>  		set_nlink(inode, 2);
>  	inode_init_owner(&init_user_ns, inode, dir, mode);
> +	inode_sgid_strip(&init_user_ns, dir, &mode);
>  	status = dquot_initialize(inode);
>  	if (status)
>  		return ERR_PTR(status);
> -- 
> 2.27.0
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip
  2022-04-15 14:09 ` [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip Christian Brauner
@ 2022-04-18  2:08   ` xuyang2018.jy
  2022-04-18  3:08   ` Matthew Wilcox
  1 sibling, 0 replies; 18+ messages in thread
From: xuyang2018.jy @ 2022-04-18  2:08 UTC (permalink / raw)
  To: Christian Brauner
  Cc: david, djwong, linux-fsdevel, ceph-devel, linux-nfs, linux-xfs,
	viro, jlayton

on 2022/4/15 22:09, Christian Brauner wrote:
> On Fri, Apr 15, 2022 at 07:02:17PM +0800, Yang Xu wrote:
>> This has no functional change. Just create and export inode_sgid_strip api for
>> the subsequent patch. This function is used to strip S_ISGID mode when init
>> a new inode.
>>
>> Signed-off-by: Yang Xu<xuyang2018.jy@fujitsu.com>
>> ---
>> v2->v3:
>> 1.Use const struct inode * instead of struct inode *
>> 2.replace sgid strip with inode_sgid_strip in a single patch
>>   fs/inode.c         | 24 ++++++++++++++++++++----
>>   include/linux/fs.h |  3 ++-
>>   2 files changed, 22 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/inode.c b/fs/inode.c
>> index 9d9b422504d1..1b569ad882ce 100644
>> --- a/fs/inode.c
>> +++ b/fs/inode.c
>> @@ -2246,10 +2246,8 @@ void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
>>   		/* Directories are special, and always inherit S_ISGID */
>>   		if (S_ISDIR(mode))
>>   			mode |= S_ISGID;
>> -		else if ((mode&  (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)&&
>> -			 !in_group_p(i_gid_into_mnt(mnt_userns, dir))&&
>> -			 !capable_wrt_inode_uidgid(mnt_userns, dir, CAP_FSETID))
>> -			mode&= ~S_ISGID;
>> +		else
>> +			inode_sgid_strip(mnt_userns, dir,&mode);
>>   	} else
>>   		inode_fsgid_set(inode, mnt_userns);
>>   	inode->i_mode = mode;
>> @@ -2405,3 +2403,21 @@ struct timespec64 current_time(struct inode *inode)
>>   	return timestamp_truncate(now, inode);
>>   }
>>   EXPORT_SYMBOL(current_time);
>> +
>> +void inode_sgid_strip(struct user_namespace *mnt_userns,
>> +		      const struct inode *dir, umode_t *mode)
>> +{
>> +	if (!dir || !(dir->i_mode&  S_ISGID))
>> +		return;
>> +	if ((*mode&  (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
>> +		return;
>> +	if (S_ISDIR(*mode))
>> +		return;
>
> I'd place that check first as this whole function is really only
> relevant for non-directories.
Sound reasonable.

Best Regards
Yang Xu
>
> Otherwise I can live with *mode being a pointer although I still find
> this unpleasant API wise but the bikeshed does it's job without having
> my color. :)
>
> I'd like to do some good testing on this.
>
> Acked-by: Christian Brauner (Microsoft)<brauner@kernel.org>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 2/7] fs/namei.c: Add missing umask strip in vfs_tmpfile
  2022-04-15 14:17   ` Christian Brauner
@ 2022-04-18  2:55     ` xuyang2018.jy
  0 siblings, 0 replies; 18+ messages in thread
From: xuyang2018.jy @ 2022-04-18  2:55 UTC (permalink / raw)
  To: Christian Brauner
  Cc: david, djwong, linux-fsdevel, ceph-devel, linux-nfs, linux-xfs,
	viro, jlayton

on 2022/4/15 22:17, Christian Brauner wrote:
> On Fri, Apr 15, 2022 at 07:02:18PM +0800, Yang Xu wrote:
>> If underflying filesystem doesn't enable own CONFIG_FS_POSIX_ACL, then
>> posix_acl_create can't be called. So we will miss umask strip, ie
>> use ext4 with noacl or disblae CONFIG_EXT4_FS_POSIX_ACL.
>
> Hm, maybe:
>
> "All creation paths except for O_TMPFILE handle umask in the vfs
> directly if the filesystem doesn't support or enable POSIX ACLs. If the
> filesystem does then umask handling is deferred until
> posix_acl_create().
> Because, O_TMPFILE misses umask handling in the vfs it will not honor
> umask settings. Fix this by adding the missing umask handling."
OK, will do it on v4.

Best Regards
Yang Xu
>
>>
>> Reported-by: Christian Brauner (Microsoft)<brauner@kernel.org>
>> Signed-off-by: Yang Xu<xuyang2018.jy@fujitsu.com>
>> ---
>
> Acked-by: Christian Brauner (Microsoft)<brauner@kernel.org>
>
>>   fs/namei.c | 2 ++
>>   1 file changed, 2 insertions(+)
>>
>> diff --git a/fs/namei.c b/fs/namei.c
>> index 3f1829b3ab5b..bbc7c950bbdc 100644
>> --- a/fs/namei.c
>> +++ b/fs/namei.c
>> @@ -3521,6 +3521,8 @@ struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns,
>>   	child = d_alloc(dentry,&slash_name);
>>   	if (unlikely(!child))
>>   		goto out_err;
>> +	if (!IS_POSIXACL(dir))
>> +		mode&= ~current_umask();
>>   	error = dir->i_op->tmpfile(mnt_userns, dir, child, mode);
>>   	if (error)
>>   		goto out_err;
>> --
>> 2.27.0
>>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 7/7] ceph: Remove S_ISGID clear code in ceph_finish_async_create
  2022-04-15 11:02 ` [PATCH v3 7/7] ceph: Remove S_ISGID clear code in ceph_finish_async_create Yang Xu
@ 2022-04-18  3:04   ` Xiubo Li
  2022-04-18  3:12     ` xuyang2018.jy
  0 siblings, 1 reply; 18+ messages in thread
From: Xiubo Li @ 2022-04-18  3:04 UTC (permalink / raw)
  To: Yang Xu, david, djwong, brauner
  Cc: linux-fsdevel, ceph-devel, linux-nfs, linux-xfs, viro, jlayton


On 4/15/22 7:02 PM, Yang Xu wrote:
> Since vfs has stripped S_ISGID, we don't need this code any more.
>
> Signed-off-by: Yang Xu <xuyang2018.jy@fujitsu.com>
> ---
>   fs/ceph/file.c | 4 ----
>   1 file changed, 4 deletions(-)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 6c9e837aa1d3..8e3b99853333 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -651,10 +651,6 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
>   		/* Directories always inherit the setgid bit. */
>   		if (S_ISDIR(mode))
>   			mode |= S_ISGID;
> -		else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
> -			 !in_group_p(dir->i_gid) &&
> -			 !capable_wrt_inode_uidgid(&init_user_ns, dir, CAP_FSETID))
> -			mode &= ~S_ISGID;

Could you point me where has done this for ceph ?

-- Xiubo


>   	} else {
>   		in.gid = cpu_to_le32(from_kgid(&init_user_ns, current_fsgid()));
>   	}


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 6/7] fs: strip file's S_ISGID mode on vfs instead of on underlying filesystem
  2022-04-15 14:24   ` Christian Brauner
@ 2022-04-18  3:05     ` xuyang2018.jy
  0 siblings, 0 replies; 18+ messages in thread
From: xuyang2018.jy @ 2022-04-18  3:05 UTC (permalink / raw)
  To: Christian Brauner
  Cc: david, djwong, linux-fsdevel, ceph-devel, linux-nfs, linux-xfs,
	viro, jlayton

on 2022/4/15 22:24, Christian Brauner wrote:
> On Fri, Apr 15, 2022 at 07:02:22PM +0800, Yang Xu wrote:
>> Currently, vfs only passes mode argument to filesystem, then use inode_init_owner()
>> to strip S_ISGID. Some filesystem(ie ext4/btrfs) will call inode_init_owner
>> firstly, then posxi acl setup, but xfs uses the contrary order. It will affect
>> S_ISGID clear especially we filter S_IXGRP by umask or acl.
>>
>> Regardless of which filesystem is in use, failure to strip the SGID correctly is
>> considered a security failure that needs to be fixed. The current VFS infrastructure
>> requires the filesystem to do everything right and not step on any landmines to
>> strip the SGID bit, when in fact it can easily be done at the VFS and the filesystems
>> then don't even need to be aware that the SGID needs to be (or has been stripped) by
>> the operation the user asked to be done.
>>
>> Vfs has all the info it needs - it doesn't need the filesystems to do everything
>> correctly with the mode and ensuring that they order things like posix acl setup
>> functions correctly with inode_init_owner() to strip the SGID bit.
>>
>> Just strip the SGID bit at the VFS, and then the filesystems can't get it wrong.
>>
>> Also, the inode_sgid_strip() api should be used before IS_POSIXACL() because
>> this api may change mode.
>>
>> Only the following places use inode_init_owner
>> "hugetlbfs/inode.c:846:          inode_init_owner(&init_user_ns, inode, dir, mode);
>>   nilfs2/inode.c:354:     inode_init_owner(&init_user_ns, inode, dir, mode);
>>   zonefs/super.c:1289:    inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555);
>>   reiserfs/namei.c:619:   inode_init_owner(&init_user_ns, inode, dir, mode);
>>   jfs/jfs_inode.c:67:     inode_init_owner(&init_user_ns, inode, parent, mode);
>>   f2fs/namei.c:50:        inode_init_owner(mnt_userns, inode, dir, mode);
>>   ext2/ialloc.c:549:              inode_init_owner(&init_user_ns, inode, dir, mode);
>>   overlayfs/dir.c:643:    inode_init_owner(&init_user_ns, inode, dentry->d_parent->d_inode, mode);
>>   ufs/ialloc.c:292:       inode_init_owner(&init_user_ns, inode, dir, mode);
>>   ntfs3/inode.c:1283:     inode_init_owner(mnt_userns, inode, dir, mode);
>>   ramfs/inode.c:64:               inode_init_owner(&init_user_ns, inode, dir, mode);
>>   9p/vfs_inode.c:263:     inode_init_owner(&init_user_ns, inode, NULL, mode);
>>   btrfs/tests/btrfs-tests.c:65:   inode_init_owner(&init_user_ns, inode, NULL, S_IFREG);
>>   btrfs/inode.c:6215:     inode_init_owner(mnt_userns, inode, dir, mode);
>>   sysv/ialloc.c:166:      inode_init_owner(&init_user_ns, inode, dir, mode);
>>   omfs/inode.c:51:        inode_init_owner(&init_user_ns, inode, NULL, mode);
>>   ubifs/dir.c:97: inode_init_owner(&init_user_ns, inode, dir, mode);
>>   udf/ialloc.c:108:       inode_init_owner(&init_user_ns, inode, dir, mode);
>>   ext4/ialloc.c:979:              inode_init_owner(mnt_userns, inode, dir, mode);
>>   hfsplus/inode.c:393:    inode_init_owner(&init_user_ns, inode, dir, mode);
>>   xfs/xfs_inode.c:840:            inode_init_owner(mnt_userns, inode, dir, mode);
>>   ocfs2/dlmfs/dlmfs.c:331:                inode_init_owner(&init_user_ns, inode, NULL, mode);
>>   ocfs2/dlmfs/dlmfs.c:354:        inode_init_owner(&init_user_ns, inode, parent, mode);
>>   ocfs2/namei.c:200:      inode_init_owner(&init_user_ns, inode, dir, mode);
>>   minix/bitmap.c:255:     inode_init_owner(&init_user_ns, inode, dir, mode);
>>   bfs/dir.c:99:   inode_init_owner(&init_user_ns, inode, dir, mode);
>> "
>>
>> They are used in filesystem init new inode function and these init inode functions are used
>> by following operations:
>> mkdir
>> symlink
>> mknod
>> create
>> tmpfile
>> rename
>>
>> We don't care about mkdir because we don't strip SGID bit for directory except fs.xfs.irix_sgid_inherit.
>> But we even call it in do_mkdirat() since inode_sgid_strip() will skip directories anyway. This will
>> enforce the  same ordering for all relevant operations and it will make the code more uniform and
>> easier to understand by using prepare_mode().
>>
>> symlink and rename only use valid mode that doesn't have SGID bit.
>>
>> We have added inode_sgid_strip api for the remaining operations.
>>
>> In addition to the above six operations, two filesystems has a little difference
>> 1) btrfs has btrfs_create_subvol_root to create new inode but used non SGID bit mode and can ignore
>> 2) ocfs2 reflink function should add inode_sgid_strip api manually because we don't add it in vfs
>>
>> This patch also changed grpid behaviour for ext4/xfs because the mode passed to them may been
>> changed by inode_sgid_strip.
>>
>> Also as Christian Brauner said"
>> The patch itself is useful as it would move a security sensitive operation that is currently burried in
>> individual filesystems into the vfs layer. But it has a decent regression  potential since it might strip
>> filesystems that have so far relied on getting the S_ISGID bit with a mode argument. So this needs a lot
>> of testing and long exposure in -next for at least one full kernel cycle."
>>
>> Suggested-by: Dave Chinner<david@fromorbit.com>
>> Signed-off-by: Yang Xu<xuyang2018.jy@fujitsu.com>
>> ---
>> v2->v3:
>> 1.use new helper prepare_mode to do inode sgid strip and umask strip
>> 2.also use prepare_mode() for mkdirat
>>   fs/inode.c       |  2 --
>>   fs/namei.c       | 14 +++++---------
>>   fs/ocfs2/namei.c |  1 +
>>   3 files changed, 6 insertions(+), 11 deletions(-)
>>
>> diff --git a/fs/inode.c b/fs/inode.c
>> index 1b569ad882ce..a250aa01d3c3 100644
>> --- a/fs/inode.c
>> +++ b/fs/inode.c
>> @@ -2246,8 +2246,6 @@ void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
>>   		/* Directories are special, and always inherit S_ISGID */
>>   		if (S_ISDIR(mode))
>>   			mode |= S_ISGID;
>> -		else
>> -			inode_sgid_strip(mnt_userns, dir,&mode);
>>   	} else
>>   		inode_fsgid_set(inode, mnt_userns);
>>   	inode->i_mode = mode;
>> diff --git a/fs/namei.c b/fs/namei.c
>> index bbc7c950bbdc..0fadc884af7f 100644
>> --- a/fs/namei.c
>> +++ b/fs/namei.c
>> @@ -3287,8 +3287,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
>>   	if (open_flag&  O_CREAT) {
>>   		if (open_flag&  O_EXCL)
>>   			open_flag&= ~O_TRUNC;
>> -		if (!IS_POSIXACL(dir->d_inode))
>> -			mode&= ~current_umask();
>> +		prepare_mode(mnt_userns, dir->d_inode,&mode);
>>   		if (likely(got_write))
>>   			create_error = may_o_create(mnt_userns,&nd->path,
>>   						    dentry, mode);
>> @@ -3521,8 +3520,7 @@ struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns,
>>   	child = d_alloc(dentry,&slash_name);
>>   	if (unlikely(!child))
>>   		goto out_err;
>> -	if (!IS_POSIXACL(dir))
>> -		mode&= ~current_umask();
>> +	prepare_mode(mnt_userns, dir,&mode);
>>   	error = dir->i_op->tmpfile(mnt_userns, dir, child, mode);
>>   	if (error)
>>   		goto out_err;
>> @@ -3852,13 +3850,12 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
>>   	if (IS_ERR(dentry))
>>   		goto out1;
>>
>> -	if (!IS_POSIXACL(path.dentry->d_inode))
>> -		mode&= ~current_umask();
>> +	mnt_userns = mnt_user_ns(path.mnt);
>> +	prepare_mode(mnt_userns, path.dentry->d_inode,&mode);
>>   	error = security_path_mknod(&path, dentry, mode, dev);
>>   	if (error)
>>   		goto out2;
>>
>> -	mnt_userns = mnt_user_ns(path.mnt);
>>   	switch (mode&  S_IFMT) {
>>   		case 0: case S_IFREG:
>>   			error = vfs_create(mnt_userns, path.dentry->d_inode,
>> @@ -3952,12 +3949,11 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode)
>>   	if (IS_ERR(dentry))
>>   		goto out_putname;
>>
>> -	if (!IS_POSIXACL(path.dentry->d_inode))
>> -		mode&= ~current_umask();
>>   	error = security_path_mkdir(&path, dentry, mode);
>
> Your changes causes the security and the filesystem layer to potentially
> see different values for mode.
>
> You need to change the patch so prepare_mode() is called before the mode
> is passed to the security layer. This will ensure that both the security
> layer and the vfs see the same mode.

Will move it before security_path_mkdir.

Best Regards
Yang Xu
>
>>   	if (!error) {
>>   		struct user_namespace *mnt_userns;
>>   		mnt_userns = mnt_user_ns(path.mnt);
>> +		prepare_mode(mnt_userns, path.dentry->d_inode,&mode);
>>   		error = vfs_mkdir(mnt_userns, path.dentry->d_inode, dentry,
>>   				  mode);
>>   	}
>> diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
>> index c75fd54b9185..c81b8e0847aa 100644
>> --- a/fs/ocfs2/namei.c
>> +++ b/fs/ocfs2/namei.c
>> @@ -198,6 +198,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
>>   	if (S_ISDIR(mode))
>>   		set_nlink(inode, 2);
>>   	inode_init_owner(&init_user_ns, inode, dir, mode);
>> +	inode_sgid_strip(&init_user_ns, dir,&mode);
>>   	status = dquot_initialize(inode);
>>   	if (status)
>>   		return ERR_PTR(status);
>> --
>> 2.27.0
>>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip
  2022-04-15 14:09 ` [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip Christian Brauner
  2022-04-18  2:08   ` xuyang2018.jy
@ 2022-04-18  3:08   ` Matthew Wilcox
  2022-04-18  8:39     ` xuyang2018.jy
  1 sibling, 1 reply; 18+ messages in thread
From: Matthew Wilcox @ 2022-04-18  3:08 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Yang Xu, david, djwong, linux-fsdevel, ceph-devel, linux-nfs,
	linux-xfs, viro, jlayton

On Fri, Apr 15, 2022 at 04:09:24PM +0200, Christian Brauner wrote:
> > +			inode_sgid_strip(mnt_userns, dir, &mode);
> >  	} else
> >  		inode_fsgid_set(inode, mnt_userns);
> >  	inode->i_mode = mode;
> > @@ -2405,3 +2403,21 @@ struct timespec64 current_time(struct inode *inode)
> >  	return timestamp_truncate(now, inode);
> >  }
> >  EXPORT_SYMBOL(current_time);
> > +
> > +void inode_sgid_strip(struct user_namespace *mnt_userns,
> > +		      const struct inode *dir, umode_t *mode)
> > +{
> > +	if (!dir || !(dir->i_mode & S_ISGID))
> > +		return;
> > +	if ((*mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
> > +		return;
> > +	if (S_ISDIR(*mode))
> > +		return;
> 
> I'd place that check first as this whole function is really only
> relevant for non-directories.
> 
> Otherwise I can live with *mode being a pointer although I still find
> this unpleasant API wise but the bikeshed does it's job without having
> my color. :)

No, I think your instincts are correct.  This should be

umode_t inode_sgid_strip(struct user_namespace *mnt_userns,
		const struct inode *dir, umode_t mode)
{
	if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
		return mode;
	if (mode & (S_ISGID | S_IXGRP) != (S_ISGID | S_IXGRP))
		return mode;
...

and the same for prepare_mode().

And really, I think this should be called inode_strip_sgid().  Right?

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 7/7] ceph: Remove S_ISGID clear code in ceph_finish_async_create
  2022-04-18  3:04   ` Xiubo Li
@ 2022-04-18  3:12     ` xuyang2018.jy
  0 siblings, 0 replies; 18+ messages in thread
From: xuyang2018.jy @ 2022-04-18  3:12 UTC (permalink / raw)
  To: Xiubo Li
  Cc: david, djwong, brauner, linux-fsdevel, ceph-devel, linux-nfs,
	linux-xfs, viro, jlayton

on 2022/4/18 11:04, Xiubo Li wrote:
>
> On 4/15/22 7:02 PM, Yang Xu wrote:
>> Since vfs has stripped S_ISGID, we don't need this code any more.
>>
>> Signed-off-by: Yang Xu <xuyang2018.jy@fujitsu.com>
>> ---
>> fs/ceph/file.c | 4 ----
>> 1 file changed, 4 deletions(-)
>>
>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>> index 6c9e837aa1d3..8e3b99853333 100644
>> --- a/fs/ceph/file.c
>> +++ b/fs/ceph/file.c
>> @@ -651,10 +651,6 @@ static int ceph_finish_async_create(struct inode
>> *dir, struct dentry *dentry,
>> /* Directories always inherit the setgid bit. */
>> if (S_ISDIR(mode))
>> mode |= S_ISGID;
>> - else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
>> - !in_group_p(dir->i_gid) &&
>> - !capable_wrt_inode_uidgid(&init_user_ns, dir, CAP_FSETID))
>> - mode &= ~S_ISGID;
>
> Could you point me where has done this for ceph ?

You can see the 6th patch, it added prepare_mode for tmpfile, open, 
mknodat, mkdirat in vfs. The prepare_mode does inode sgid strip and 
umask strip.

Best Regards
Yang Xu
>
> -- Xiubo
>
>
>> } else {
>> in.gid = cpu_to_le32(from_kgid(&init_user_ns, current_fsgid()));
>> }
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip
  2022-04-18  3:08   ` Matthew Wilcox
@ 2022-04-18  8:39     ` xuyang2018.jy
  0 siblings, 0 replies; 18+ messages in thread
From: xuyang2018.jy @ 2022-04-18  8:39 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Christian Brauner, david, djwong, linux-fsdevel, ceph-devel,
	linux-nfs, linux-xfs, viro, jlayton

on 2022/4/18 11:08, Matthew Wilcox wrote:
> On Fri, Apr 15, 2022 at 04:09:24PM +0200, Christian Brauner wrote:
>>> +			inode_sgid_strip(mnt_userns, dir,&mode);
>>>   	} else
>>>   		inode_fsgid_set(inode, mnt_userns);
>>>   	inode->i_mode = mode;
>>> @@ -2405,3 +2403,21 @@ struct timespec64 current_time(struct inode *inode)
>>>   	return timestamp_truncate(now, inode);
>>>   }
>>>   EXPORT_SYMBOL(current_time);
>>> +
>>> +void inode_sgid_strip(struct user_namespace *mnt_userns,
>>> +		      const struct inode *dir, umode_t *mode)
>>> +{
>>> +	if (!dir || !(dir->i_mode&  S_ISGID))
>>> +		return;
>>> +	if ((*mode&  (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
>>> +		return;
>>> +	if (S_ISDIR(*mode))
>>> +		return;
>>
>> I'd place that check first as this whole function is really only
>> relevant for non-directories.
>>
>> Otherwise I can live with *mode being a pointer although I still find
>> this unpleasant API wise but the bikeshed does it's job without having
>> my color. :)
>
> No, I think your instincts are correct.  This should be
I can't understand why returning umode_t is better. So Does kernel have 
some rules for adding new function I don't notice before? Just need a 
reason.

ps: I will decide whether use pointer or use return umode_t value before 
I send v4.

Best Regards
Yang Xu
>
> umode_t inode_sgid_strip(struct user_namespace *mnt_userns,
> 		const struct inode *dir, umode_t mode)
> {
> 	if (S_ISDIR(mode) || !dir || !(dir->i_mode&  S_ISGID))
> 		return mode;
> 	if (mode&  (S_ISGID | S_IXGRP) != (S_ISGID | S_IXGRP))
> 		return mode;
> ...
>
> and the same for prepare_mode().
>
> And really, I think this should be called inode_strip_sgid().  Right?

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2022-04-18  8:39 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-15 11:02 [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip Yang Xu
2022-04-15 11:02 ` [PATCH v3 2/7] fs/namei.c: Add missing umask strip in vfs_tmpfile Yang Xu
2022-04-15 14:17   ` Christian Brauner
2022-04-18  2:55     ` xuyang2018.jy
2022-04-15 11:02 ` [PATCH v3 3/7] xfs: Only do posix acl setup/release operation under CONFIG_XFS_POSIX_ACL Yang Xu
2022-04-15 11:02 ` [PATCH v3 4/7] nfs3: Only do posix acl setup/release operation under CONFIG_NFS_V3_ACL Yang Xu
2022-04-15 11:02 ` [PATCH v3 5/7] fs: Add new helper prepare_mode Yang Xu
2022-04-15 14:19   ` Christian Brauner
2022-04-15 11:02 ` [PATCH v3 6/7] fs: strip file's S_ISGID mode on vfs instead of on underlying filesystem Yang Xu
2022-04-15 14:24   ` Christian Brauner
2022-04-18  3:05     ` xuyang2018.jy
2022-04-15 11:02 ` [PATCH v3 7/7] ceph: Remove S_ISGID clear code in ceph_finish_async_create Yang Xu
2022-04-18  3:04   ` Xiubo Li
2022-04-18  3:12     ` xuyang2018.jy
2022-04-15 14:09 ` [PATCH v3 1/7] fs/inode: move sgid strip operation from inode_init_owner into inode_sgid_strip Christian Brauner
2022-04-18  2:08   ` xuyang2018.jy
2022-04-18  3:08   ` Matthew Wilcox
2022-04-18  8:39     ` xuyang2018.jy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.