All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC RESEND] xfs: fix up non-directory creation in SGID directories when umask S_IXGRP
@ 2022-03-22  8:04 Yang Xu
  2022-03-22  8:43 ` Dave Chinner
  0 siblings, 1 reply; 5+ messages in thread
From: Yang Xu @ 2022-03-22  8:04 UTC (permalink / raw)
  To: linux-xfs; +Cc: djwong, pvorel, Yang Xu

Petr reported a problem that S_ISGID bit was not clean when testing ltp
case create09[1] by using umask(077).

It fails because xfs will call posix_acl_create before xfs_init_new_node
calls inode_init_owner, so S_IXGRP mode will be clear when enable CONFIG_XFS_POSIXACL
and doesn't set acl or default acl on dir, then inode_init_owner will not clear
S_ISGID bit.

The calltrace as below:

use inode_init_owner(mnt_userns, inode)
[  296.760675]  xfs_init_new_inode+0x10e/0x6c0
[  296.760678]  xfs_create+0x401/0x610
use posix_acl_create(dir, &mode, &default_acl, &acl);
[  296.760681]  xfs_generic_create+0x123/0x2e0
[  296.760684]  ? _raw_spin_unlock+0x16/0x30
[  296.760687]  path_openat+0xfb8/0x1210
[  296.760689]  do_filp_open+0xb4/0x120
[  296.760691]  ? file_tty_write.isra.31+0x203/0x340
[  296.760697]  ? __check_object_size+0x150/0x170
[  296.760699]  do_sys_openat2+0x242/0x310
[  296.760702]  do_sys_open+0x4b/0x80
[  296.760704]  do_syscall_64+0x3a/0x80

Fix this is simple, we can call posix_acl_create after xfs_init_new_inode completed,
so inode_init_owner can clear S_ISGID bit correctly like ext4 or btrfs does.

But commit e6a688c33238 ("xfs: initialise attr fork on inode create") has created
attr fork in advance according to acl, so a better solution is that moving these
functions into xfs_init_new_inode.

Convert init_attrs into init_acl because xfs_create/xfs_create_tmpfile in
xfs_generic_create will call posix_acl_create, and use nlink to decide whether
initialise attr fork on inode create.

[1]https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/creat/creat09.c

Reported-by: Petr Vorel <pvorel@suse.cz>
Signed-off-by: Yang Xu <xuyang2018.jy@fujitsu.com>
---
 fs/xfs/xfs_inode.c | 64 +++++++++++++++++++++++++++++++++++++++++-----
 fs/xfs/xfs_inode.h |  2 +-
 fs/xfs/xfs_iops.c  | 63 ++++-----------------------------------------
 3 files changed, 64 insertions(+), 65 deletions(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 26227d26f274..935f28c08bbc 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -4,6 +4,7 @@
  * All Rights Reserved.
  */
 #include <linux/iversion.h>
+#include <linux/posix_acl.h>
 
 #include "xfs.h"
 #include "xfs_fs.h"
@@ -36,6 +37,7 @@
 #include "xfs_reflink.h"
 #include "xfs_ag.h"
 #include "xfs_log_priv.h"
+#include "xfs_acl.h"
 
 struct kmem_cache *xfs_inode_cache;
 
@@ -781,6 +783,36 @@ xfs_inode_inherit_flags2(
 	}
 }
 
+/*
+ * Check to see if we are likely to need an extended attribute to be added to
+ * the inode we are about to allocate. This allows the attribute fork to be
+ * created during the inode allocation, reducing the number of transactions we
+ * need to do in this fast path.
+ *
+ * The security checks are optimistic, but not guaranteed. The two LSMs that
+ * require xattrs to be added here (selinux and smack) are also the only two
+ * LSMs that add a sb->s_security structure to the superblock. Hence if security
+ * is enabled and sb->s_security is set, we have a pretty good idea that we are
+ * going to be asked to add a security xattr immediately after allocating the
+ * xfs inode and instantiating the VFS inode.
+ */
+static inline bool
+xfs_create_need_xattr(
+	struct inode    *dir,
+	struct posix_acl *default_acl,
+	struct posix_acl *acl)
+{
+	if (acl)
+		return true;
+	if (default_acl)
+		return true;
+#if IS_ENABLED(CONFIG_SECURITY)
+	if (dir->i_sb->s_security)
+		return true;
+#endif
+	return false;
+}
+
 /*
  * Initialise a newly allocated inode and return the in-core inode to the
  * caller locked exclusively.
@@ -795,7 +827,7 @@ xfs_init_new_inode(
 	xfs_nlink_t		nlink,
 	dev_t			rdev,
 	prid_t			prid,
-	bool			init_xattrs,
+	bool			init_acl,
 	struct xfs_inode	**ipp)
 {
 	struct inode		*dir = pip ? VFS_I(pip) : NULL;
@@ -805,6 +837,7 @@ xfs_init_new_inode(
 	int			error;
 	struct timespec64	tv;
 	struct inode		*inode;
+	struct posix_acl	*default_acl = NULL, *acl = NULL;
 
 	/*
 	 * Protect against obviously corrupt allocation btree records. Later
@@ -893,6 +926,11 @@ xfs_init_new_inode(
 		ASSERT(0);
 	}
 
+	if (init_acl) {
+		error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
+		if (error)
+			return error;
+	}
 	/*
 	 * If we need to create attributes immediately after allocating the
 	 * inode, initialise an empty attribute fork right now. We use the
@@ -902,7 +940,10 @@ xfs_init_new_inode(
 	 * this saves us from needing to run a separate transaction to set the
 	 * fork offset in the immediate future.
 	 */
-	if (init_xattrs && xfs_has_attr(mp)) {
+	if (init_acl &&
+	    nlink &&
+	    xfs_create_need_xattr(dir, default_acl, acl) &&
+	    xfs_has_attr(mp)) {
 		ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
 		ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0);
 	}
@@ -916,8 +957,19 @@ xfs_init_new_inode(
 	/* now that we have an i_mode we can setup the inode structure */
 	xfs_setup_inode(ip);
 
+#ifdef CONFIG_XFS_POSIX_ACL
+	if (default_acl) {
+		error = __xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+		posix_acl_release(default_acl);
+	}
+	if (acl) {
+		if (!error)
+			error = __xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
+		posix_acl_release(acl);
+	}
+#endif
 	*ipp = ip;
-	return 0;
+	return error;
 }
 
 /*
@@ -962,7 +1014,7 @@ xfs_create(
 	struct xfs_name		*name,
 	umode_t			mode,
 	dev_t			rdev,
-	bool			init_xattrs,
+	bool			init_acl,
 	xfs_inode_t		**ipp)
 {
 	int			is_dir = S_ISDIR(mode);
@@ -1037,7 +1089,7 @@ xfs_create(
 	error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
 	if (!error)
 		error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode,
-				is_dir ? 2 : 1, rdev, prid, init_xattrs, &ip);
+				is_dir ? 2 : 1, rdev, prid, init_acl, &ip);
 	if (error)
 		goto out_trans_cancel;
 
@@ -1161,7 +1213,7 @@ xfs_create_tmpfile(
 	error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
 	if (!error)
 		error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode,
-				0, 0, prid, false, &ip);
+				0, 0, prid, true, &ip);
 	if (error)
 		goto out_trans_cancel;
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 740ab13d1aa2..b0cae7b48ea9 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -448,7 +448,7 @@ xfs_extlen_t	xfs_get_cowextsz_hint(struct xfs_inode *ip);
 
 int xfs_init_new_inode(struct user_namespace *mnt_userns, struct xfs_trans *tp,
 		struct xfs_inode *pip, xfs_ino_t ino, umode_t mode,
-		xfs_nlink_t nlink, dev_t rdev, prid_t prid, bool init_xattrs,
+		xfs_nlink_t nlink, dev_t rdev, prid_t prid, bool init_acl,
 		struct xfs_inode **ipp);
 
 static inline int
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index b34e8e4344a8..5f9fcb6e7cf8 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -127,37 +127,6 @@ xfs_cleanup_inode(
 	xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
 }
 
-/*
- * Check to see if we are likely to need an extended attribute to be added to
- * the inode we are about to allocate. This allows the attribute fork to be
- * created during the inode allocation, reducing the number of transactions we
- * need to do in this fast path.
- *
- * The security checks are optimistic, but not guaranteed. The two LSMs that
- * require xattrs to be added here (selinux and smack) are also the only two
- * LSMs that add a sb->s_security structure to the superblock. Hence if security
- * is enabled and sb->s_security is set, we have a pretty good idea that we are
- * going to be asked to add a security xattr immediately after allocating the
- * xfs inode and instantiating the VFS inode.
- */
-static inline bool
-xfs_create_need_xattr(
-	struct inode	*dir,
-	struct posix_acl *default_acl,
-	struct posix_acl *acl)
-{
-	if (acl)
-		return true;
-	if (default_acl)
-		return true;
-#if IS_ENABLED(CONFIG_SECURITY)
-	if (dir->i_sb->s_security)
-		return true;
-#endif
-	return false;
-}
-
-
 STATIC int
 xfs_generic_create(
 	struct user_namespace	*mnt_userns,
@@ -169,7 +138,6 @@ xfs_generic_create(
 {
 	struct inode	*inode;
 	struct xfs_inode *ip = NULL;
-	struct posix_acl *default_acl, *acl;
 	struct xfs_name	name;
 	int		error;
 
@@ -184,24 +152,19 @@ xfs_generic_create(
 		rdev = 0;
 	}
 
-	error = posix_acl_create(dir, &mode, &default_acl, &acl);
-	if (error)
-		return error;
-
 	/* Verify mode is valid also for tmpfile case */
 	error = xfs_dentry_mode_to_name(&name, dentry, mode);
 	if (unlikely(error))
-		goto out_free_acl;
+		return error;
 
 	if (!tmpfile) {
 		error = xfs_create(mnt_userns, XFS_I(dir), &name, mode, rdev,
-				xfs_create_need_xattr(dir, default_acl, acl),
-				&ip);
+				true, &ip);
 	} else {
 		error = xfs_create_tmpfile(mnt_userns, XFS_I(dir), mode, &ip);
 	}
 	if (unlikely(error))
-		goto out_free_acl;
+		return error;
 
 	inode = VFS_I(ip);
 
@@ -209,19 +172,6 @@ xfs_generic_create(
 	if (unlikely(error))
 		goto out_cleanup_inode;
 
-#ifdef CONFIG_XFS_POSIX_ACL
-	if (default_acl) {
-		error = __xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
-		if (error)
-			goto out_cleanup_inode;
-	}
-	if (acl) {
-		error = __xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
-		if (error)
-			goto out_cleanup_inode;
-	}
-#endif
-
 	xfs_setup_iops(ip);
 
 	if (tmpfile) {
@@ -240,17 +190,14 @@ xfs_generic_create(
 
 	xfs_finish_inode_setup(ip);
 
- out_free_acl:
-	posix_acl_release(default_acl);
-	posix_acl_release(acl);
-	return error;
+	return 0;
 
  out_cleanup_inode:
 	xfs_finish_inode_setup(ip);
 	if (!tmpfile)
 		xfs_cleanup_inode(dir, inode, dentry);
 	xfs_irele(ip);
-	goto out_free_acl;
+	return error;
 }
 
 STATIC int
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC RESEND] xfs: fix up non-directory creation in SGID directories when umask S_IXGRP
  2022-03-22  8:04 [RFC RESEND] xfs: fix up non-directory creation in SGID directories when umask S_IXGRP Yang Xu
@ 2022-03-22  8:43 ` Dave Chinner
  2022-03-22  9:23   ` Dave Chinner
  2022-03-22  9:42   ` xuyang2018.jy
  0 siblings, 2 replies; 5+ messages in thread
From: Dave Chinner @ 2022-03-22  8:43 UTC (permalink / raw)
  To: Yang Xu; +Cc: linux-xfs, djwong, pvorel

On Tue, Mar 22, 2022 at 04:04:17PM +0800, Yang Xu wrote:
> Petr reported a problem that S_ISGID bit was not clean when testing ltp
> case create09[1] by using umask(077).

Ok. So what is the failure message from the test?

When did the test start failing? Is this a recent failure or
something that has been around for years? If it's recent, what
commit broke it?

> It fails because xfs will call posix_acl_create before xfs_init_new_node
> calls inode_init_owner, so S_IXGRP mode will be clear when enable CONFIG_XFS_POSIXACL
> and doesn't set acl or default acl on dir, then inode_init_owner will not clear
> S_ISGID bit.

I don't really follow what you are saying is the problem here - the
rule we are supposed to be following is not clear to me, nor how XFS
is behaving contrary to the rule. Can you explain the rule (e.g.
from the test failure results) rather than try to explain where the
code goes wrong, please?

> The calltrace as below:
> 
> use inode_init_owner(mnt_userns, inode)
> [  296.760675]  xfs_init_new_inode+0x10e/0x6c0
> [  296.760678]  xfs_create+0x401/0x610
> use posix_acl_create(dir, &mode, &default_acl, &acl);
> [  296.760681]  xfs_generic_create+0x123/0x2e0
> [  296.760684]  ? _raw_spin_unlock+0x16/0x30
> [  296.760687]  path_openat+0xfb8/0x1210
> [  296.760689]  do_filp_open+0xb4/0x120
> [  296.760691]  ? file_tty_write.isra.31+0x203/0x340
> [  296.760697]  ? __check_object_size+0x150/0x170
> [  296.760699]  do_sys_openat2+0x242/0x310
> [  296.760702]  do_sys_open+0x4b/0x80
> [  296.760704]  do_syscall_64+0x3a/0x80
> 
> Fix this is simple, we can call posix_acl_create after xfs_init_new_inode completed,
> so inode_init_owner can clear S_ISGID bit correctly like ext4 or btrfs does.
>
> But commit e6a688c33238 ("xfs: initialise attr fork on inode create") has created
> attr fork in advance according to acl, so a better solution is that moving these
> functions into xfs_init_new_inode.

No, you can't do that. Xattrs cannot be created within the
transaction context of the create operation because they require
their own transaction context to run under. We cannot nest
transaction contexts in XFS, so the ACL and other security xattrs
must be created after the inode create has completed.

Commit e6a688c33238 only initialises the inode attribute fork in the
create transaction rather than requiring a separate transaction to
do it before the xattrs are then created. It does not allow xattrs
to be created from within the create transaction context.

Hence regardless of where the problem lies, a different fix will be
required to address it.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC RESEND] xfs: fix up non-directory creation in SGID directories when umask S_IXGRP
  2022-03-22  8:43 ` Dave Chinner
@ 2022-03-22  9:23   ` Dave Chinner
  2022-03-23  7:18     ` xuyang2018.jy
  2022-03-22  9:42   ` xuyang2018.jy
  1 sibling, 1 reply; 5+ messages in thread
From: Dave Chinner @ 2022-03-22  9:23 UTC (permalink / raw)
  To: Yang Xu; +Cc: linux-xfs, djwong, pvorel

On Tue, Mar 22, 2022 at 07:43:20PM +1100, Dave Chinner wrote:
> On Tue, Mar 22, 2022 at 04:04:17PM +0800, Yang Xu wrote:
> > Petr reported a problem that S_ISGID bit was not clean when testing ltp
> > case create09[1] by using umask(077).
> 
> Ok. So what is the failure message from the test?
> 
> When did the test start failing? Is this a recent failure or
> something that has been around for years? If it's recent, what
> commit broke it?

Ok, I went and looked at the test, and it confirmed my suspicion.  I
can't find the patch that introduced this change on lore.kernel.org.
Looks like one of those silent security fixes that nobody gets told
about, gets no real review, has no test cases written for it, etc.

And nobody wrote a test for until August 2021 and that's when people
started to notice broken filesystems.

This is the commit that failed to fix several filesystems:

commit 0fa3ecd87848c9c93c2c828ef4c3a8ca36ce46c7
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Tue Jul 3 17:10:19 2018 -0700

    Fix up non-directory creation in SGID directories
    
    sgid directories have special semantics, making newly created files in
    the directory belong to the group of the directory, and newly created
    subdirectories will also become sgid.  This is historically used for
    group-shared directories.
    
    But group directories writable by non-group members should not imply
    that such non-group members can magically join the group, so make sure
    to clear the sgid bit on non-directories for non-members (but remember
    that sgid without group execute means "mandatory locking", just to
    confuse things even more).
    
    Reported-by: Jann Horn <jannh@google.com>
    Cc: Andy Lutomirski <luto@kernel.org>
    Cc: Al Viro <viro@zeniv.linux.org.uk>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/inode.c b/fs/inode.c
index 2c300e981796..8c86c809ca17 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1999,8 +1999,14 @@ void inode_init_owner(struct inode *inode, const struct inode *dir,
        inode->i_uid = current_fsuid();
        if (dir && dir->i_mode & S_ISGID) {
                inode->i_gid = dir->i_gid;
+
+               /* Directories are special, and always inherit S_ISGID */
                if (S_ISDIR(mode))
                        mode |= S_ISGID;
+               else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
+                        !in_group_p(inode->i_gid) &&
+                        !capable_wrt_inode_uidgid(dir, CAP_FSETID))
+                       mode &= ~S_ISGID;
        } else
                inode->i_gid = current_fsgid();
        inode->i_mode = mode;

The problem is it takes away mode bits that the VFS passed to us
deep in the VFS inode initialisation done during on-disk inode
initialisation, and it's hidden well away from sight of the
filesystems.

Oh, what a mess - this in_group_p() && capable_wrt_inode_uidgid()
check is splattered all over filesystems in random places to clear
SGID bits. e.g ceph_finish_async_create() is an open coded
inode_init_owner() call. There's special case
code in fuse_set_acl() to clear SGID. There's a special case in
ovl_posix_acl_xattr_set() for ACL xattrs to clear SGID. And so on.

No consistency anywhere - shouldn't the VFS just be stripping the
SGID bit before it passes the mode down to filesystems? It has all
the info it needs - it doesn't need the filesystems to do everything
correctly with the mode and ensuring that they order things like
posix acl setup functions correctly with inode_init_owner() to strip
the SGID bit...

Just strip the SGID bit at the VFS, and then the filesystems can't
get it wrong...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC RESEND] xfs: fix up non-directory creation in SGID directories when umask S_IXGRP
  2022-03-22  8:43 ` Dave Chinner
  2022-03-22  9:23   ` Dave Chinner
@ 2022-03-22  9:42   ` xuyang2018.jy
  1 sibling, 0 replies; 5+ messages in thread
From: xuyang2018.jy @ 2022-03-22  9:42 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs, djwong, pvorel

on 2022/3/22 16:43, Dave Chinner wrote:
> On Tue, Mar 22, 2022 at 04:04:17PM +0800, Yang Xu wrote:
>> Petr reported a problem that S_ISGID bit was not clean when testing ltp
>> case create09[1] by using umask(077).
>
> Ok. So what is the failure message from the test?
>
> When did the test start failing? Is this a recent failure or
> something that has been around for years? If it's recent, what
> commit broke it?
You have known this.
>
>> It fails because xfs will call posix_acl_create before xfs_init_new_node
>> calls inode_init_owner, so S_IXGRP mode will be clear when enable CONFIG_XFS_POSIXACL
>> and doesn't set acl or default acl on dir, then inode_init_owner will not clear
>> S_ISGID bit.
>
> I don't really follow what you are saying is the problem here - the
> rule we are supposed to be following is not clear to me, nor how XFS
> is behaving contrary to the rule. Can you explain the rule (e.g.
> from the test failure results) rather than try to explain where the
> code goes wrong, please?
>
>> The calltrace as below:
>>
>> use inode_init_owner(mnt_userns, inode)
>> [  296.760675]  xfs_init_new_inode+0x10e/0x6c0
>> [  296.760678]  xfs_create+0x401/0x610
>> use posix_acl_create(dir,&mode,&default_acl,&acl);
>> [  296.760681]  xfs_generic_create+0x123/0x2e0
>> [  296.760684]  ? _raw_spin_unlock+0x16/0x30
>> [  296.760687]  path_openat+0xfb8/0x1210
>> [  296.760689]  do_filp_open+0xb4/0x120
>> [  296.760691]  ? file_tty_write.isra.31+0x203/0x340
>> [  296.760697]  ? __check_object_size+0x150/0x170
>> [  296.760699]  do_sys_openat2+0x242/0x310
>> [  296.760702]  do_sys_open+0x4b/0x80
>> [  296.760704]  do_syscall_64+0x3a/0x80
>>
>> Fix this is simple, we can call posix_acl_create after xfs_init_new_inode completed,
>> so inode_init_owner can clear S_ISGID bit correctly like ext4 or btrfs does.
>>
>> But commit e6a688c33238 ("xfs: initialise attr fork on inode create") has created
>> attr fork in advance according to acl, so a better solution is that moving these
>> functions into xfs_init_new_inode.
>
> No, you can't do that. Xattrs cannot be created within the
> transaction context of the create operation because they require
> their own transaction context to run under. We cannot nest
> transaction contexts in XFS, so the ACL and other security xattrs
> must be created after the inode create has completed.
>
> Commit e6a688c33238 only initialises the inode attribute fork in the
> create transaction rather than requiring a separate transaction to
> do it before the xattrs are then created. It does not allow xattrs
> to be created from within the create transaction context.
Thanks for your reply, now, I know this.

Best Regards
Yang Xu
>
> Hence regardless of where the problem lies, a different fix will be
> required to address it.
>
> Cheers,
>
> Dave.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC RESEND] xfs: fix up non-directory creation in SGID directories when umask S_IXGRP
  2022-03-22  9:23   ` Dave Chinner
@ 2022-03-23  7:18     ` xuyang2018.jy
  0 siblings, 0 replies; 5+ messages in thread
From: xuyang2018.jy @ 2022-03-23  7:18 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs, djwong, pvorel

on 2022/3/22 17:23, Dave Chinner wrote:
> On Tue, Mar 22, 2022 at 07:43:20PM +1100, Dave Chinner wrote:
>> On Tue, Mar 22, 2022 at 04:04:17PM +0800, Yang Xu wrote:
>>> Petr reported a problem that S_ISGID bit was not clean when testing ltp
>>> case create09[1] by using umask(077).
>>
>> Ok. So what is the failure message from the test?
>>
>> When did the test start failing? Is this a recent failure or
>> something that has been around for years? If it's recent, what
>> commit broke it?
>
> Ok, I went and looked at the test, and it confirmed my suspicion.  I
> can't find the patch that introduced this change on lore.kernel.org.
> Looks like one of those silent security fixes that nobody gets told
> about, gets no real review, has no test cases written for it, etc.
>
> And nobody wrote a test for until August 2021 and that's when people
> started to notice broken filesystems.
>
> This is the commit that failed to fix several filesystems:
>
> commit 0fa3ecd87848c9c93c2c828ef4c3a8ca36ce46c7
> Author: Linus Torvalds<torvalds@linux-foundation.org>
> Date:   Tue Jul 3 17:10:19 2018 -0700
>
>      Fix up non-directory creation in SGID directories
>
>      sgid directories have special semantics, making newly created files in
>      the directory belong to the group of the directory, and newly created
>      subdirectories will also become sgid.  This is historically used for
>      group-shared directories.
>
>      But group directories writable by non-group members should not imply
>      that such non-group members can magically join the group, so make sure
>      to clear the sgid bit on non-directories for non-members (but remember
>      that sgid without group execute means "mandatory locking", just to
>      confuse things even more).
>
>      Reported-by: Jann Horn<jannh@google.com>
>      Cc: Andy Lutomirski<luto@kernel.org>
>      Cc: Al Viro<viro@zeniv.linux.org.uk>
>      Signed-off-by: Linus Torvalds<torvalds@linux-foundation.org>
>
> diff --git a/fs/inode.c b/fs/inode.c
> index 2c300e981796..8c86c809ca17 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -1999,8 +1999,14 @@ void inode_init_owner(struct inode *inode, const struct inode *dir,
>          inode->i_uid = current_fsuid();
>          if (dir&&  dir->i_mode&  S_ISGID) {
>                  inode->i_gid = dir->i_gid;
> +
> +               /* Directories are special, and always inherit S_ISGID */
>                  if (S_ISDIR(mode))
>                          mode |= S_ISGID;
> +               else if ((mode&  (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)&&
> +                        !in_group_p(inode->i_gid)&&
> +                        !capable_wrt_inode_uidgid(dir, CAP_FSETID))
> +                       mode&= ~S_ISGID;
>          } else
>                  inode->i_gid = current_fsgid();
>          inode->i_mode = mode;
>
> The problem is it takes away mode bits that the VFS passed to us
> deep in the VFS inode initialisation done during on-disk inode
> initialisation, and it's hidden well away from sight of the
> filesystems.
>
> Oh, what a mess - this in_group_p()&&  capable_wrt_inode_uidgid()
> check is splattered all over filesystems in random places to clear
> SGID bits. e.g ceph_finish_async_create() is an open coded
> inode_init_owner() call. There's special case
> code in fuse_set_acl() to clear SGID. There's a special case in
> ovl_posix_acl_xattr_set() for ACL xattrs to clear SGID. And so on.
>
> No consistency anywhere - shouldn't the VFS just be stripping the
> SGID bit before it passes the mode down to filesystems? It has all
> the info it needs - it doesn't need the filesystems to do everything
> correctly with the mode and ensuring that they order things like
> posix acl setup functions correctly with inode_init_owner() to strip
> the SGID bit...
>
> Just strip the SGID bit at the VFS, and then the filesystems can't
> get it wrong...
Thanks for your reply.

Sound reasonable, but I am not sure I have the ability to do this.
Will try ...


Best Regards
Yang Xu
>
> Cheers,
>
> Dave.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-03-23  7:20 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-22  8:04 [RFC RESEND] xfs: fix up non-directory creation in SGID directories when umask S_IXGRP Yang Xu
2022-03-22  8:43 ` Dave Chinner
2022-03-22  9:23   ` Dave Chinner
2022-03-23  7:18     ` xuyang2018.jy
2022-03-22  9:42   ` xuyang2018.jy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.