All of lore.kernel.org
 help / color / mirror / Atom feed
* xfs: fix inode uid/gid initialization
@ 2017-02-13 17:46 James Bottomley
  2017-02-13 19:43 ` Christoph Hellwig
  0 siblings, 1 reply; 17+ messages in thread
From: James Bottomley @ 2017-02-13 17:46 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel; +Cc: Eric W. Biederman, Seth Forshee

I was debugging a creation failure using a vfs shifting patch set and
discovered that xfs itself doesn't actually respect the superblock
namespace in a couple of places (these showed up as files with the
wrong ownership in my tests).  The fix is to convert xfs away from hand
rolling inode_init_owner() and to use the i_uid/gid_read/write
functions.

The rule should be that we use the i_uid/gid_read/write() functions
when converting to or from the filesystem id_uid and id_gid view.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

---
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index de32f0f..1b0d97e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -814,10 +814,10 @@ xfs_ialloc(
 	if (ip->i_d.di_version == 1)
 		ip->i_d.di_version = 2;
 
-	inode->i_mode = mode;
+	inode_init_owner(inode, pip ? VFS_I(pip) : NULL, mode);
 	set_nlink(inode, nlink);
-	ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
-	ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
+	ip->i_d.di_uid = i_uid_read(inode);
+	ip->i_d.di_gid = i_gid_read(inode);
 	xfs_set_projid(ip, prid);
 
 	if (pip && XFS_INHERIT_GID(pip)) {
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 22c1615..306766d 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -643,8 +643,8 @@ xfs_setattr_nonsize(
 		 */
 		ASSERT(udqp == NULL);
 		ASSERT(gdqp == NULL);
-		error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid),
-					   xfs_kgid_to_gid(gid),
+		error = xfs_qm_vop_dqalloc(ip, from_kuid(inode->i_sb->s_user_ns, uid),
+					   from_kgid(inode->i_sb->s_user_ns, gid),
 					   xfs_get_projid(ip),
 					   qflags, &udqp, &gdqp, NULL);
 		if (error)
@@ -714,8 +714,9 @@ xfs_setattr_nonsize(
 				olddquot1 = xfs_qm_vop_chown(tp, ip,
 							&ip->i_udquot, udqp);
 			}
-			ip->i_d.di_uid = xfs_kuid_to_uid(uid);
 			inode->i_uid = uid;
+			ip->i_d.di_uid = i_uid_read(inode);
+
 		}
 		if (!gid_eq(igid, gid)) {
 			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
@@ -726,8 +727,8 @@ xfs_setattr_nonsize(
 				olddquot2 = xfs_qm_vop_chown(tp, ip,
 							&ip->i_gdquot, gdqp);
 			}
-			ip->i_d.di_gid = xfs_kgid_to_gid(gid);
 			inode->i_gid = gid;
+			ip->i_d.di_gid = i_gid_read(inode);
 		}
 	}
 
@@ -1213,8 +1214,8 @@ xfs_setup_inode(
 	/* make the inode look hashed for the writeback code */
 	hlist_add_fake(&inode->i_hash);
 
-	inode->i_uid    = xfs_uid_to_kuid(ip->i_d.di_uid);
-	inode->i_gid    = xfs_gid_to_kgid(ip->i_d.di_gid);
+	i_uid_write(inode, ip->i_d.di_uid);
+	i_gid_write(inode, ip->i_d.di_gid);
 
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFBLK:

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: xfs: fix inode uid/gid initialization
  2017-02-13 17:46 xfs: fix inode uid/gid initialization James Bottomley
@ 2017-02-13 19:43 ` Christoph Hellwig
  2017-02-13 20:33   ` James Bottomley
  2017-02-13 21:34   ` Dave Chinner
  0 siblings, 2 replies; 17+ messages in thread
From: Christoph Hellwig @ 2017-02-13 19:43 UTC (permalink / raw)
  To: James Bottomley; +Cc: linux-xfs, linux-fsdevel, Eric W. Biederman, Seth Forshee

On Mon, Feb 13, 2017 at 09:46:41AM -0800, James Bottomley wrote:
> I was debugging a creation failure using a vfs shifting patch set and
> discovered that xfs itself doesn't actually respect the superblock
> namespace in a couple of places (these showed up as files with the
> wrong ownership in my tests).

Can you submit your test case to xfstests?  I would be good to have
testing for this in the regular test runs.

> The fix is to convert xfs away from hand
> rolling inode_init_owner() and to use the i_uid/gid_read/write
> functions.

What about the various quota users of xfs_kuid_to_uid/gid in
the create / symlink path?  I suspect they should be handle the same.

Also with your patch the di_uid/gid fields should probably just
go away as they are pointless now.  Something like the patch below,
although it still doesn't take care of the quota issues pointed out
above.

diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index d93f9d918cfc..dfe9b02a62bd 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -233,8 +233,8 @@ xfs_inode_from_disk(
 	}
 
 	to->di_format = from->di_format;
-	to->di_uid = be32_to_cpu(from->di_uid);
-	to->di_gid = be32_to_cpu(from->di_gid);
+	i_uid_write(inode, be32_to_cpu(from->di_uid));
+	i_gid_write(inode, be32_to_cpu(from->di_gid));
 	to->di_flushiter = be16_to_cpu(from->di_flushiter);
 
 	/*
@@ -286,8 +286,8 @@ xfs_inode_to_disk(
 
 	to->di_version = from->di_version;
 	to->di_format = from->di_format;
-	to->di_uid = cpu_to_be32(from->di_uid);
-	to->di_gid = cpu_to_be32(from->di_gid);
+	to->di_uid = cpu_to_be32(i_uid_read(inode));
+	to->di_gid = cpu_to_be32(i_gid_read(inode));
 	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
 
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 6848a0afbce7..a4c5502351c4 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -31,8 +31,6 @@ struct xfs_icdinode {
 	__int8_t	di_version;	/* inode version */
 	__int8_t	di_format;	/* format of di_c data */
 	__uint16_t	di_flushiter;	/* incremented on flush */
-	__uint32_t	di_uid;		/* owner's user id */
-	__uint32_t	di_gid;		/* owner's group id */
 	__uint16_t	di_projid_lo;	/* lower part of owner's project id */
 	__uint16_t	di_projid_hi;	/* higher part of owner's project id */
 	xfs_fsize_t	di_size;	/* number of bytes in file */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index de32f0fe47c8..3b0b09a6b15d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -814,18 +814,10 @@ xfs_ialloc(
 	if (ip->i_d.di_version == 1)
 		ip->i_d.di_version = 2;
 
-	inode->i_mode = mode;
+	inode_init_owner(inode, pip ? VFS_I(pip) : NULL, mode);
 	set_nlink(inode, nlink);
-	ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
-	ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
 	xfs_set_projid(ip, prid);
 
-	if (pip && XFS_INHERIT_GID(pip)) {
-		ip->i_d.di_gid = pip->i_d.di_gid;
-		if ((VFS_I(pip)->i_mode & S_ISGID) && S_ISDIR(mode))
-			inode->i_mode |= S_ISGID;
-	}
-
 	/*
 	 * If the group ID of the new file does not match the effective group
 	 * ID or one of the supplementary group IDs, the S_ISGID bit is cleared
@@ -833,7 +825,7 @@ xfs_ialloc(
 	 */
 	if ((irix_sgid_inherit) &&
 	    (inode->i_mode & S_ISGID) &&
-	    (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid))))
+	    (!in_group_p(inode->i_gid)))
 		inode->i_mode &= ~S_ISGID;
 
 	ip->i_d.di_size = 0;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index d90e7811ccdd..ed9529e3babf 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -335,8 +335,8 @@ xfs_inode_to_log_dinode(
 
 	to->di_version = from->di_version;
 	to->di_format = from->di_format;
-	to->di_uid = from->di_uid;
-	to->di_gid = from->di_gid;
+	to->di_uid = i_uid_read(inode);
+	to->di_gid = i_gid_read(inode);
 	to->di_projid_lo = from->di_projid_lo;
 	to->di_projid_hi = from->di_projid_hi;
 
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index c67cfb451fd3..92bb1317536a 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1333,8 +1333,8 @@ xfs_ioctl_setattr(
 	 * because the i_*dquot fields will get updated anyway.
 	 */
 	if (XFS_IS_QUOTA_ON(mp)) {
-		code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
-					 ip->i_d.di_gid, fa->fsx_projid,
+		code = xfs_qm_vop_dqalloc(ip, i_uid_read(VFS_I(ip)),
+					 i_gid_read(VFS_I(ip)), fa->fsx_projid,
 					 XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp);
 		if (code)
 			return code;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 22c16155f1b4..c3807a7ae466 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -643,8 +643,8 @@ xfs_setattr_nonsize(
 		 */
 		ASSERT(udqp == NULL);
 		ASSERT(gdqp == NULL);
-		error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid),
-					   xfs_kgid_to_gid(gid),
+		error = xfs_qm_vop_dqalloc(ip, from_kuid(inode->i_sb->s_user_ns, uid),
+					   from_kgid(inode->i_sb->s_user_ns, gid),
 					   xfs_get_projid(ip),
 					   qflags, &udqp, &gdqp, NULL);
 		if (error)
@@ -714,8 +714,8 @@ xfs_setattr_nonsize(
 				olddquot1 = xfs_qm_vop_chown(tp, ip,
 							&ip->i_udquot, udqp);
 			}
-			ip->i_d.di_uid = xfs_kuid_to_uid(uid);
 			inode->i_uid = uid;
+
 		}
 		if (!gid_eq(igid, gid)) {
 			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
@@ -726,7 +726,6 @@ xfs_setattr_nonsize(
 				olddquot2 = xfs_qm_vop_chown(tp, ip,
 							&ip->i_gdquot, gdqp);
 			}
-			ip->i_d.di_gid = xfs_kgid_to_gid(gid);
 			inode->i_gid = gid;
 		}
 	}
@@ -1213,9 +1212,6 @@ xfs_setup_inode(
 	/* make the inode look hashed for the writeback code */
 	hlist_add_fake(&inode->i_hash);
 
-	inode->i_uid    = xfs_uid_to_kuid(ip->i_d.di_uid);
-	inode->i_gid    = xfs_gid_to_kgid(ip->i_d.di_gid);
-
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFBLK:
 	case S_IFCHR:
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 66e881790c17..bcfc7f38e8e2 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -88,8 +88,8 @@ xfs_bulkstat_one_int(
 	buf->bs_projid_lo = dic->di_projid_lo;
 	buf->bs_projid_hi = dic->di_projid_hi;
 	buf->bs_ino = ino;
-	buf->bs_uid = dic->di_uid;
-	buf->bs_gid = dic->di_gid;
+	buf->bs_uid = i_uid_read(inode);
+	buf->bs_gid = i_gid_read(inode);
 	buf->bs_size = dic->di_size;
 
 	buf->bs_nlink = inode->i_nlink;
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index b669b123287b..0f3692123267 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -341,18 +341,18 @@ xfs_qm_dqattach_locked(
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
 	if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) {
-		error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
-						flags & XFS_QMOPT_DQALLOC,
-						&ip->i_udquot);
+		error = xfs_qm_dqattach_one(ip, i_uid_read(VFS_I(ip)),
+				XFS_DQ_USER, flags & XFS_QMOPT_DQALLOC,
+				&ip->i_udquot);
 		if (error)
 			goto done;
 		ASSERT(ip->i_udquot);
 	}
 
 	if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) {
-		error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
-						flags & XFS_QMOPT_DQALLOC,
-						&ip->i_gdquot);
+		error = xfs_qm_dqattach_one(ip, i_gid_read(VFS_I(ip)),
+				XFS_DQ_GROUP, flags & XFS_QMOPT_DQALLOC,
+				&ip->i_gdquot);
 		if (error)
 			goto done;
 		ASSERT(ip->i_gdquot);
@@ -1210,14 +1210,14 @@ xfs_qm_dqusage_adjust(
 	 * and quotaoffs don't race. (Quotachecks happen at mount time only).
 	 */
 	if (XFS_IS_UQUOTA_ON(mp)) {
-		error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
+		error = xfs_qm_quotacheck_dqadjust(ip, i_uid_read(VFS_I(ip)),
 						   XFS_DQ_USER, nblks, rtblks);
 		if (error)
 			goto error0;
 	}
 
 	if (XFS_IS_GQUOTA_ON(mp)) {
-		error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
+		error = xfs_qm_quotacheck_dqadjust(ip, i_gid_read(VFS_I(ip)),
 						   XFS_DQ_GROUP, nblks, rtblks);
 		if (error)
 			goto error0;
@@ -1650,7 +1650,7 @@ xfs_qm_vop_dqalloc(
 	xfs_ilock(ip, lockflags);
 
 	if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
-		gid = ip->i_d.di_gid;
+		gid = i_gid_read(VFS_I(ip));
 
 	/*
 	 * Attach the dquot(s) to this inode, doing a dquot allocation
@@ -1665,7 +1665,7 @@ xfs_qm_vop_dqalloc(
 	}
 
 	if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
-		if (ip->i_d.di_uid != uid) {
+		if (i_uid_read(VFS_I(ip)) != uid) {
 			/*
 			 * What we need is the dquot that has this uid, and
 			 * if we send the inode to dqget, the uid of the inode
@@ -1701,7 +1701,7 @@ xfs_qm_vop_dqalloc(
 		}
 	}
 	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
-		if (ip->i_d.di_gid != gid) {
+		if (i_gid_read(VFS_I(ip)) != gid) {
 			xfs_iunlock(ip, lockflags);
 			error = xfs_qm_dqget(mp, NULL, gid,
 						 XFS_DQ_GROUP,
@@ -1835,7 +1835,7 @@ xfs_qm_vop_chown_reserve(
 			XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
 
 	if (XFS_IS_UQUOTA_ON(mp) && udqp &&
-	    ip->i_d.di_uid != be32_to_cpu(udqp->q_core.d_id)) {
+	    i_uid_read(VFS_I(ip)) != be32_to_cpu(udqp->q_core.d_id)) {
 		udq_delblks = udqp;
 		/*
 		 * If there are delayed allocation blocks, then we have to
@@ -1848,7 +1848,7 @@ xfs_qm_vop_chown_reserve(
 		}
 	}
 	if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp &&
-	    ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id)) {
+	    i_gid_read(VFS_I(ip)) != be32_to_cpu(gdqp->q_core.d_id)) {
 		gdq_delblks = gdqp;
 		if (delblks) {
 			ASSERT(ip->i_gdquot);
@@ -1945,14 +1945,14 @@ xfs_qm_vop_create_dqattach(
 
 	if (udqp && XFS_IS_UQUOTA_ON(mp)) {
 		ASSERT(ip->i_udquot == NULL);
-		ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
+		ASSERT(i_uid_read(VFS_I(ip)) == be32_to_cpu(udqp->q_core.d_id));
 
 		ip->i_udquot = xfs_qm_dqhold(udqp);
 		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
 	}
 	if (gdqp && XFS_IS_GQUOTA_ON(mp)) {
 		ASSERT(ip->i_gdquot == NULL);
-		ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id));
+		ASSERT(i_uid_read(VFS_I(ip)) == be32_to_cpu(gdqp->q_core.d_id));
 		ip->i_gdquot = xfs_qm_dqhold(gdqp);
 		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
 	}

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: xfs: fix inode uid/gid initialization
  2017-02-13 19:43 ` Christoph Hellwig
@ 2017-02-13 20:33   ` James Bottomley
  2017-02-13 21:34   ` Dave Chinner
  1 sibling, 0 replies; 17+ messages in thread
From: James Bottomley @ 2017-02-13 20:33 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs, linux-fsdevel, Eric W. Biederman, Seth Forshee

On Mon, 2017-02-13 at 11:43 -0800, Christoph Hellwig wrote:
> On Mon, Feb 13, 2017 at 09:46:41AM -0800, James Bottomley wrote:
> > I was debugging a creation failure using a vfs shifting patch set
> > and
> > discovered that xfs itself doesn't actually respect the superblock
> > namespace in a couple of places (these showed up as files with the
> > wrong ownership in my tests).
> 
> Can you submit your test case to xfstests?  I would be good to have
> testing for this in the regular test runs.

I will eventually ... I'm planning on adding a whole set.  This issue
was just found by untarring a container image and then finding the ids
were wrong ... 

> > The fix is to convert xfs away from hand
> > rolling inode_init_owner() and to use the i_uid/gid_read/write
> > functions.
> 
> What about the various quota users of xfs_kuid_to_uid/gid in
> the create / symlink path?

Yes, looking at it again, xfs_qm_vop_dqalloc() is in terms of the
filesystem view, so current_fsuid(), which gives the uid in the kernel
view, needs to be transformed through the s_user_ns to get it into that
view.

Probably there needs to be an inode_fsuid/fsgid() (similar to i_uid/gid
_read())that returns the filesystem view of fsuid/fsgid

>   I suspect they should be handle the same.
> 
> Also with your patch the di_uid/gid fields should probably just
> go away as they are pointless now.  Something like the patch below,
> although it still doesn't take care of the quota issues pointed out
> above.

Yes, I'll go for that.

James

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: xfs: fix inode uid/gid initialization
  2017-02-13 19:43 ` Christoph Hellwig
  2017-02-13 20:33   ` James Bottomley
@ 2017-02-13 21:34   ` Dave Chinner
  2017-02-14  6:08     ` Christoph Hellwig
  1 sibling, 1 reply; 17+ messages in thread
From: Dave Chinner @ 2017-02-13 21:34 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: James Bottomley, linux-xfs, linux-fsdevel, Eric W. Biederman,
	Seth Forshee

On Mon, Feb 13, 2017 at 11:43:37AM -0800, Christoph Hellwig wrote:
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -814,18 +814,10 @@ xfs_ialloc(
>  	if (ip->i_d.di_version == 1)
>  		ip->i_d.di_version = 2;
>  
> -	inode->i_mode = mode;
> +	inode_init_owner(inode, pip ? VFS_I(pip) : NULL, mode);
>  	set_nlink(inode, nlink);
> -	ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
> -	ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
>  	xfs_set_projid(ip, prid);
>  
> -	if (pip && XFS_INHERIT_GID(pip)) {
> -		ip->i_d.di_gid = pip->i_d.di_gid;
> -		if ((VFS_I(pip)->i_mode & S_ISGID) && S_ISDIR(mode))
> -			inode->i_mode |= S_ISGID;
> -	}
> -

Doesn't this hunk break the "nogrpid" mount option?

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: xfs: fix inode uid/gid initialization
  2017-02-13 21:34   ` Dave Chinner
@ 2017-02-14  6:08     ` Christoph Hellwig
  2017-02-14  6:27       ` James Bottomley
  0 siblings, 1 reply; 17+ messages in thread
From: Christoph Hellwig @ 2017-02-14  6:08 UTC (permalink / raw)
  To: Dave Chinner
  Cc: Christoph Hellwig, James Bottomley, linux-xfs, linux-fsdevel,
	Eric W. Biederman, Seth Forshee

On Tue, Feb 14, 2017 at 08:34:16AM +1100, Dave Chinner wrote:
> >  
> > -	if (pip && XFS_INHERIT_GID(pip)) {
> > -		ip->i_d.di_gid = pip->i_d.di_gid;
> > -		if ((VFS_I(pip)->i_mode & S_ISGID) && S_ISDIR(mode))
> > -			inode->i_mode |= S_ISGID;
> > -	}
> > -
> 
> Doesn't this hunk break the "nogrpid" mount option?

It does.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: xfs: fix inode uid/gid initialization
  2017-02-14  6:08     ` Christoph Hellwig
@ 2017-02-14  6:27       ` James Bottomley
  2017-02-14  6:28         ` [PATCH 1/2] fs: add inode helpers for fsuid and fsgid James Bottomley
                           ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: James Bottomley @ 2017-02-14  6:27 UTC (permalink / raw)
  To: Christoph Hellwig, Dave Chinner
  Cc: linux-xfs, linux-fsdevel, Eric W. Biederman, Seth Forshee

On Mon, 2017-02-13 at 22:08 -0800, Christoph Hellwig wrote:
> On Tue, Feb 14, 2017 at 08:34:16AM +1100, Dave Chinner wrote:
> > >  
> > > -	if (pip && XFS_INHERIT_GID(pip)) {
> > > -		ip->i_d.di_gid = pip->i_d.di_gid;
> > > -		if ((VFS_I(pip)->i_mode & S_ISGID) &&
> > > S_ISDIR(mode))
> > > -			inode->i_mode |= S_ISGID;
> > > -	}
> > > -
> > 
> > Doesn't this hunk break the "nogrpid" mount option?
> 
> It does.

OK, so I'll fix up the s_user_mount problems and I'll let you sort out
the internals of removing the di_uid/gid if you wish to.  I checked the
quota code and I think there are only a couple of places you're using
the kernel view of the ids where you should be using the filesystem
view.  They're all identified by current_fsuid/fsgid(), so I think
(with the helper in patch 1) that this is the fix.

James

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1/2] fs: add inode helpers for fsuid and fsgid
  2017-02-14  6:27       ` James Bottomley
@ 2017-02-14  6:28         ` James Bottomley
  2017-02-14  7:46           ` Eric W. Biederman
  2017-02-14  6:29         ` [PATCH 2/2] xfs: fix inode uid/gid initialization James Bottomley
  2017-02-14  7:58         ` Christoph Hellwig
  2 siblings, 1 reply; 17+ messages in thread
From: James Bottomley @ 2017-02-14  6:28 UTC (permalink / raw)
  To: Christoph Hellwig, Dave Chinner
  Cc: linux-xfs, linux-fsdevel, Eric W. Biederman, Seth Forshee

Now that we have two different views of filesystem ids (the filesystem
view and the kernel view), we have a problem in that
current_fsuid/fsgid() return the kernel view but are sometimes used in
filesystem code where the filesystem view shoud be used.  This patch
introduces helpers to produce the filesystem view of current fsuid and
fsgid.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/include/linux/cred.h b/include/linux/cred.h
index f0e70a1..18e9c41 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -399,4 +399,9 @@ do {						\
 	*(_fsgid) = __cred->fsgid;		\
 } while(0)
 
+/* return the current id in the filesystem view */
+#define i_fsuid(i) from_kuid((i)->i_sb->s_user_ns, current_fsuid())
+#define i_fsgid(i) from_kgid((i)->i_sb->s_user_ns, current_fsgid())
+
+
 #endif /* _LINUX_CRED_H */

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/2] xfs: fix inode uid/gid initialization
  2017-02-14  6:27       ` James Bottomley
  2017-02-14  6:28         ` [PATCH 1/2] fs: add inode helpers for fsuid and fsgid James Bottomley
@ 2017-02-14  6:29         ` James Bottomley
  2017-02-14  7:58         ` Christoph Hellwig
  2 siblings, 0 replies; 17+ messages in thread
From: James Bottomley @ 2017-02-14  6:29 UTC (permalink / raw)
  To: Christoph Hellwig, Dave Chinner
  Cc: linux-xfs, linux-fsdevel, Eric W. Biederman, Seth Forshee

I was debugging a creation failure using a vfs shifting patch set and
discovered that xfs itself doesn't actually respect the superblock
namespace in a couple of places (these showed up as files with the
wrong ownership in my tests).  The fix is to convert xfs away from hand
rolling inode_init_owner() and to use the i_uid/gid_read/write
functions.

The rule should be that we use the i_uid/gid_read/write() functions
when converting to or from the filesystem id_uid and id_gid view.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index de32f0f..291a3ac 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -814,10 +814,10 @@ xfs_ialloc(
 	if (ip->i_d.di_version == 1)
 		ip->i_d.di_version = 2;
 
-	inode->i_mode = mode;
+	inode_init_owner(inode, pip ? VFS_I(pip) : NULL, mode);
 	set_nlink(inode, nlink);
-	ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
-	ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
+	ip->i_d.di_uid = i_uid_read(inode);
+	ip->i_d.di_gid = i_gid_read(inode);
 	xfs_set_projid(ip, prid);
 
 	if (pip && XFS_INHERIT_GID(pip)) {
@@ -1172,10 +1172,9 @@ xfs_create(
 	/*
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
-	error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()),
-					xfs_kgid_to_gid(current_fsgid()), prid,
-					XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
-					&udqp, &gdqp, &pdqp);
+	error = xfs_qm_vop_dqalloc(dp, i_fsuid(VFS_I(dp)), i_fsgid(VFS_I(dp)),
+				   prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+				   &udqp, &gdqp, &pdqp);
 	if (error)
 		return error;
 
@@ -1347,10 +1346,9 @@ xfs_create_tmpfile(
 	/*
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
-	error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()),
-				xfs_kgid_to_gid(current_fsgid()), prid,
-				XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
-				&udqp, &gdqp, &pdqp);
+	error = xfs_qm_vop_dqalloc(dp, i_fsuid(VFS_I(dp)), i_fsgid(VFS_I(dp)),
+				   prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+				   &udqp, &gdqp, &pdqp);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 22c1615..306766d 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -643,8 +643,8 @@ xfs_setattr_nonsize(
 		 */
 		ASSERT(udqp == NULL);
 		ASSERT(gdqp == NULL);
-		error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid),
-					   xfs_kgid_to_gid(gid),
+		error = xfs_qm_vop_dqalloc(ip, from_kuid(inode->i_sb->s_user_ns, uid),
+					   from_kgid(inode->i_sb->s_user_ns, gid),
 					   xfs_get_projid(ip),
 					   qflags, &udqp, &gdqp, NULL);
 		if (error)
@@ -714,8 +714,9 @@ xfs_setattr_nonsize(
 				olddquot1 = xfs_qm_vop_chown(tp, ip,
 							&ip->i_udquot, udqp);
 			}
-			ip->i_d.di_uid = xfs_kuid_to_uid(uid);
 			inode->i_uid = uid;
+			ip->i_d.di_uid = i_uid_read(inode);
+
 		}
 		if (!gid_eq(igid, gid)) {
 			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
@@ -726,8 +727,8 @@ xfs_setattr_nonsize(
 				olddquot2 = xfs_qm_vop_chown(tp, ip,
 							&ip->i_gdquot, gdqp);
 			}
-			ip->i_d.di_gid = xfs_kgid_to_gid(gid);
 			inode->i_gid = gid;
+			ip->i_d.di_gid = i_gid_read(inode);
 		}
 	}
 
@@ -1213,8 +1214,8 @@ xfs_setup_inode(
 	/* make the inode look hashed for the writeback code */
 	hlist_add_fake(&inode->i_hash);
 
-	inode->i_uid    = xfs_uid_to_kuid(ip->i_d.di_uid);
-	inode->i_gid    = xfs_gid_to_kgid(ip->i_d.di_gid);
+	i_uid_write(inode, ip->i_d.di_uid);
+	i_gid_write(inode, ip->i_d.di_gid);
 
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFBLK:
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index f2cb45e..49278e1 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -211,11 +211,9 @@ xfs_symlink(
 	/*
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
-	error = xfs_qm_vop_dqalloc(dp,
-			xfs_kuid_to_uid(current_fsuid()),
-			xfs_kgid_to_gid(current_fsgid()), prid,
-			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
-			&udqp, &gdqp, &pdqp);
+	error = xfs_qm_vop_dqalloc(dp, i_fsuid(VFS_I(dp)), i_fsgid(VFS_I(dp)),
+				   prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+				   &udqp, &gdqp, &pdqp);
 	if (error)
 		return error;
 

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] fs: add inode helpers for fsuid and fsgid
  2017-02-14  6:28         ` [PATCH 1/2] fs: add inode helpers for fsuid and fsgid James Bottomley
@ 2017-02-14  7:46           ` Eric W. Biederman
  2017-02-14  8:00             ` Christoph Hellwig
  2017-02-14 16:09             ` James Bottomley
  0 siblings, 2 replies; 17+ messages in thread
From: Eric W. Biederman @ 2017-02-14  7:46 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, Dave Chinner, linux-xfs, linux-fsdevel, Seth Forshee

James Bottomley <James.Bottomley@HansenPartnership.com> writes:

> Now that we have two different views of filesystem ids (the filesystem
> view and the kernel view), we have a problem in that
> current_fsuid/fsgid() return the kernel view but are sometimes used in
> filesystem code where the filesystem view shoud be used.  This patch
> introduces helpers to produce the filesystem view of current fsuid and
> fsgid.

If I am reading this right what we are seeing is that xfs explicitly
opted out of type safety with predictable results.   Accidentally
confusing kuids and uids, which is potentially security issue.

All of that said where are you getting sb->s_user_ns != &init_user_ns
for an xfs filesystem?  There are quite a few xfs interfaces that are
not ready for that.   xfs has a very wide userspace interface of ioctls
that all needs to be looked at and addressed carefully if there is
anything like this going on.

I think we really need to ask if we should use kuids and kgids for the
xfs internal quota code.  At the end of the day that is going to be
a whole lot less error prone.

> Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
>
> diff --git a/include/linux/cred.h b/include/linux/cred.h
> index f0e70a1..18e9c41 100644
> --- a/include/linux/cred.h
> +++ b/include/linux/cred.h
> @@ -399,4 +399,9 @@ do {						\
>  	*(_fsgid) = __cred->fsgid;		\
>  } while(0)
>  
> +/* return the current id in the filesystem view */
> +#define i_fsuid(i) from_kuid((i)->i_sb->s_user_ns, current_fsuid())
> +#define i_fsgid(i) from_kgid((i)->i_sb->s_user_ns, current_fsgid())

Could we please place these helpers in fs.h?
That should allow them to become inline functions and live with the
existing filesystem helpers in there.

My gut says the names disk_fsuid(i) and disk_fsgid(i) would be clearer.

Of course all of this has the challenge of error handling in the case
when current_fsuid or current_fsgid do not map into the current
filesystem.

Eric

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: xfs: fix inode uid/gid initialization
  2017-02-14  6:27       ` James Bottomley
  2017-02-14  6:28         ` [PATCH 1/2] fs: add inode helpers for fsuid and fsgid James Bottomley
  2017-02-14  6:29         ` [PATCH 2/2] xfs: fix inode uid/gid initialization James Bottomley
@ 2017-02-14  7:58         ` Christoph Hellwig
  2 siblings, 0 replies; 17+ messages in thread
From: Christoph Hellwig @ 2017-02-14  7:58 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, Dave Chinner, linux-xfs, linux-fsdevel,
	Eric W. Biederman, Seth Forshee

On Mon, Feb 13, 2017 at 10:27:31PM -0800, James Bottomley wrote:
> OK, so I'll fix up the s_user_mount problems and I'll let you sort out
> the internals of removing the di_uid/gid if you wish to.

We'll need to sort that out, and I'd rather do that than reinterpreting
the fields.  But I can take care of it by ammending or redoing your
patch if you don't feel like poking to deep into XFS.

What we need now is test cases exercising a non-standard s_user_ns
so that we have test coverage for these changes.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] fs: add inode helpers for fsuid and fsgid
  2017-02-14  7:46           ` Eric W. Biederman
@ 2017-02-14  8:00             ` Christoph Hellwig
  2017-02-14 16:09             ` James Bottomley
  1 sibling, 0 replies; 17+ messages in thread
From: Christoph Hellwig @ 2017-02-14  8:00 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: James Bottomley, Christoph Hellwig, Dave Chinner, linux-xfs,
	linux-fsdevel, Seth Forshee

On Tue, Feb 14, 2017 at 08:46:32PM +1300, Eric W. Biederman wrote:
> All of that said where are you getting sb->s_user_ns != &init_user_ns
> for an xfs filesystem?  There are quite a few xfs interfaces that are
> not ready for that.   xfs has a very wide userspace interface of ioctls
> that all needs to be looked at and addressed carefully if there is
> anything like this going on.

The only thing exposing uids/gid is the bulkstat code, and that's
easy to cover.

> > +/* return the current id in the filesystem view */
> > +#define i_fsuid(i) from_kuid((i)->i_sb->s_user_ns, current_fsuid())
> > +#define i_fsgid(i) from_kgid((i)->i_sb->s_user_ns, current_fsgid())
> 
> Could we please place these helpers in fs.h?
> That should allow them to become inline functions and live with the
> existing filesystem helpers in there.

And give them better names, i_* is rather cryptic.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] fs: add inode helpers for fsuid and fsgid
  2017-02-14  7:46           ` Eric W. Biederman
  2017-02-14  8:00             ` Christoph Hellwig
@ 2017-02-14 16:09             ` James Bottomley
  2017-02-15  2:29               ` Eric W. Biederman
  1 sibling, 1 reply; 17+ messages in thread
From: James Bottomley @ 2017-02-14 16:09 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Christoph Hellwig, Dave Chinner, linux-xfs, linux-fsdevel, Seth Forshee

On Tue, 2017-02-14 at 20:46 +1300, Eric W. Biederman wrote:
> James Bottomley <James.Bottomley@HansenPartnership.com> writes:
> 
> > Now that we have two different views of filesystem ids (the 
> > filesystem view and the kernel view), we have a problem in that
> > current_fsuid/fsgid() return the kernel view but are sometimes used 
> > in filesystem code where the filesystem view shoud be used.  This
> > patch introduces helpers to produce the filesystem view of current 
> > fsuid and fsgid.
> 
> If I am reading this right what we are seeing is that xfs explicitly
> opted out of type safety with predictable results.   Accidentally
> confusing kuids and uids, which is potentially security issue.
> 
> All of that said where are you getting sb->s_user_ns != &init_user_ns
> for an xfs filesystem?  There are quite a few xfs interfaces that are
> not ready for that.   xfs has a very wide userspace interface of 
> ioctls that all needs to be looked at and addressed carefully if 
> there is anything like this going on.
> 
> I think we really need to ask if we should use kuids and kgids for 
> the xfs internal quota code.

That question devolves to who administers quota operations in
containers.   The answer is usually that apparent root in the container
needs to be able to administer quotas as though they were real root
outside, so transforming the user quota calculations is correct to
first order.

To second order we need a way of controlling the container's quota which is why we've had a flurry of two level quota patches over the years.  We've finally settled on group or project quotas and, if you look at xfs, you'll see the project quota will work even in the face of uid shifts in the user quota, so I think it's all working.

>   At the end of the day that is going to be a whole lot less error
> prone.

It would make the job of the filesystem write harder: a lot of quota
code is very close to the disk, so they'd need a whole lot of
transforms to kernel view.

> > Signed-off-by: James Bottomley <
> > James.Bottomley@HansenPartnership.com>
> > 
> > diff --git a/include/linux/cred.h b/include/linux/cred.h
> > index f0e70a1..18e9c41 100644
> > --- a/include/linux/cred.h
> > +++ b/include/linux/cred.h
> > @@ -399,4 +399,9 @@ do {						
> > \
> >  	*(_fsgid) = __cred->fsgid;		\
> >  } while(0)
> >  
> > +/* return the current id in the filesystem view */
> > +#define i_fsuid(i) from_kuid((i)->i_sb->s_user_ns,
> > current_fsuid())
> > +#define i_fsgid(i) from_kgid((i)->i_sb->s_user_ns,
> > current_fsgid())
> 
> Could we please place these helpers in fs.h?

We could ... the current_ helpers are in cred.h, which is why I put the
new ones there, but I've no strong feelings either way.

> That should allow them to become inline functions and live with the
> existing filesystem helpers in there.

I don't believe they did.  There's code in most filesystems (usually in
quota) where they need to perform calculations with the current user
id.  The problem is that with s_user_ns, they can't use current_fsuid()
because it's the kernel view and the places where the filesystem is
using it are often in the filesystem view.

> My gut says the names disk_fsuid(i) and disk_fsgid(i) would be
> clearer.

I chose i_fsuid/fsgid for two reasons

   1. because it takes an inode as an arguments.
   2. to be consistent with i_uid_read/write() which are the other
      namespace shifting primitives for filesystems.

I think 2. is quite compelling, so if you want a different name for
this, we should rename i_uid/gid_read/write() as well.

> Of course all of this has the challenge of error handling in the case
> when current_fsuid or current_fsgid do not map into the current
> filesystem.

Yes, I think it actually fails in the quota case because unmapped
usually gives uid/gid -1 which has no quota set, so you can bust out of
your quota with the right s_user_ns.  On the other hand if you can set
up s_user_ns then you should be admin for that quota and it's caveat
emptor.

James

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] fs: add inode helpers for fsuid and fsgid
  2017-02-14 16:09             ` James Bottomley
@ 2017-02-15  2:29               ` Eric W. Biederman
  2017-02-16 15:43                 ` James Bottomley
  0 siblings, 1 reply; 17+ messages in thread
From: Eric W. Biederman @ 2017-02-15  2:29 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, Dave Chinner, linux-xfs, linux-fsdevel, Seth Forshee

James Bottomley <James.Bottomley@HansenPartnership.com> writes:

> On Tue, 2017-02-14 at 20:46 +1300, Eric W. Biederman wrote:
>> James Bottomley <James.Bottomley@HansenPartnership.com> writes:
>> 
>> > Now that we have two different views of filesystem ids (the 
>> > filesystem view and the kernel view), we have a problem in that
>> > current_fsuid/fsgid() return the kernel view but are sometimes used 
>> > in filesystem code where the filesystem view shoud be used.  This
>> > patch introduces helpers to produce the filesystem view of current 
>> > fsuid and fsgid.
>> 
>> If I am reading this right what we are seeing is that xfs explicitly
>> opted out of type safety with predictable results.   Accidentally
>> confusing kuids and uids, which is potentially security issue.
>> 
>> All of that said where are you getting sb->s_user_ns != &init_user_ns
>> for an xfs filesystem?

James please answer this question:

 Where are you getting sb->s_user_ns != &init_user_ns for an xfs filesystem?

None of this matters if sb->s_user_ns == &init_user_ns.

This is signification because only xfs keeps any in-core data structure
in it's on-disk encoding.  So this problem is xfs specific.   So
understanding how you are getting xfs to have sb->s_user_ns !=
&init_user_ns is important for discussing which direction we go with
helper functions here.

xfs with sb->s_user_ns == &init_user_ns is perfectly fine and as such no
fixes are needed.

Eric

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] fs: add inode helpers for fsuid and fsgid
  2017-02-15  2:29               ` Eric W. Biederman
@ 2017-02-16 15:43                 ` James Bottomley
  2017-02-17  1:15                   ` Eric W. Biederman
  0 siblings, 1 reply; 17+ messages in thread
From: James Bottomley @ 2017-02-16 15:43 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Christoph Hellwig, Dave Chinner, linux-xfs, linux-fsdevel, Seth Forshee

On Wed, 2017-02-15 at 15:29 +1300, Eric W. Biederman wrote:
> James Bottomley <James.Bottomley@HansenPartnership.com> writes:
> 
> > On Tue, 2017-02-14 at 20:46 +1300, Eric W. Biederman wrote:
> > > James Bottomley <James.Bottomley@HansenPartnership.com> writes:
> > > 
> > > > Now that we have two different views of filesystem ids (the 
> > > > filesystem view and the kernel view), we have a problem in that
> > > > current_fsuid/fsgid() return the kernel view but are sometimes
> > > > used 
> > > > in filesystem code where the filesystem view shoud be used. 
> > > >  This
> > > > patch introduces helpers to produce the filesystem view of
> > > > current 
> > > > fsuid and fsgid.
> > > 
> > > If I am reading this right what we are seeing is that xfs
> > > explicitly
> > > opted out of type safety with predictable results.   Accidentally
> > > confusing kuids and uids, which is potentially security issue.
> > > 
> > > All of that said where are you getting sb->s_user_ns !=
> > > &init_user_ns
> > > for an xfs filesystem?
> 
> James please answer this question:
> 
>  Where are you getting sb->s_user_ns != &init_user_ns for an xfs
> filesystem?

I'm playing with a patch that allows host admin to set up an
unprivileged container for a guest to use.  One of the extensions is to
allow anything possessing capability(CAP_SYS_ADMIN) to make s_user_ns
follow mnt_ns->user_ns for new mounts (as an option).  The idea was to
see if root could set up an id shifted container with just the current
s_user_ns infrastructure.

> None of this matters if sb->s_user_ns == &init_user_ns.
> 
> This is signification because only xfs keeps any in-core data 
> structure in it's on-disk encoding.  So this problem is xfs specific.
>    So understanding how you are getting xfs to have sb->s_user_ns !=
> &init_user_ns is important for discussing which direction we go with
> helper functions here.
> 
> xfs with sb->s_user_ns == &init_user_ns is perfectly fine and as such 
> no fixes are needed.

So what you're saying is that unless the unprivileged container could
mount the filesystem itself (i.e. only those possessing the
FS_USERNS_MOUNT flag) the filesystems are going to be full of problems
like this.  I suppose whether it's worthwhile trying to fix them all
depends on whether the ability of the administrator to set up an id
shifted container is useful or not.

James

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] fs: add inode helpers for fsuid and fsgid
  2017-02-16 15:43                 ` James Bottomley
@ 2017-02-17  1:15                   ` Eric W. Biederman
  2017-02-17 17:12                     ` James Bottomley
  0 siblings, 1 reply; 17+ messages in thread
From: Eric W. Biederman @ 2017-02-17  1:15 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, Dave Chinner, linux-xfs, linux-fsdevel, Seth Forshee

James Bottomley <James.Bottomley@HansenPartnership.com> writes:

> On Wed, 2017-02-15 at 15:29 +1300, Eric W. Biederman wrote:
>> James Bottomley <James.Bottomley@HansenPartnership.com> writes:
>> 
>> > On Tue, 2017-02-14 at 20:46 +1300, Eric W. Biederman wrote:
>> > > James Bottomley <James.Bottomley@HansenPartnership.com> writes:
>> > > 
>> > > > Now that we have two different views of filesystem ids (the 
>> > > > filesystem view and the kernel view), we have a problem in that
>> > > > current_fsuid/fsgid() return the kernel view but are sometimes
>> > > > used 
>> > > > in filesystem code where the filesystem view shoud be used. 
>> > > >  This
>> > > > patch introduces helpers to produce the filesystem view of
>> > > > current 
>> > > > fsuid and fsgid.
>> > > 
>> > > If I am reading this right what we are seeing is that xfs
>> > > explicitly
>> > > opted out of type safety with predictable results.   Accidentally
>> > > confusing kuids and uids, which is potentially security issue.
>> > > 
>> > > All of that said where are you getting sb->s_user_ns !=
>> > > &init_user_ns
>> > > for an xfs filesystem?
>> 
>> James please answer this question:
>> 
>>  Where are you getting sb->s_user_ns != &init_user_ns for an xfs
>> filesystem?
>
> I'm playing with a patch that allows host admin to set up an
> unprivileged container for a guest to use.  One of the extensions is to
> allow anything possessing capability(CAP_SYS_ADMIN) to make s_user_ns
> follow mnt_ns->user_ns for new mounts (as an option).  The idea was to
> see if root could set up an id shifted container with just the current
> s_user_ns infrastructure.
>
>> None of this matters if sb->s_user_ns == &init_user_ns.
>> 
>> This is signification because only xfs keeps any in-core data 
>> structure in it's on-disk encoding.  So this problem is xfs specific.
>>    So understanding how you are getting xfs to have sb->s_user_ns !=
>> &init_user_ns is important for discussing which direction we go with
>> helper functions here.
>> 
>> xfs with sb->s_user_ns == &init_user_ns is perfectly fine and as such 
>> no fixes are needed.
>
> So what you're saying is that unless the unprivileged container could
> mount the filesystem itself (i.e. only those possessing the
> FS_USERNS_MOUNT flag) the filesystems are going to be full of problems
> like this.  I suppose whether it's worthwhile trying to fix them all
> depends on whether the ability of the administrator to set up an id
> shifted container is useful or not.

Yes.  Setting s_user_ns and expecting everything to work with a
review/test cycle of the filesystem to shake out any rough edges is
likely to be problematic.  For historical reasons I actually expect xfs
is especially bad in this regard.  So in practice I would definitely
start a feature like that with another filesystem.

I would be happy to have a FS_S_USER_NS flag to say all that is well,
and the filesystem supports s_user_ns != &init_user_ns.  The bar is much
lower if a trusted user with CAP_SYS_ADMIN is mounting the filesystem
than if an unprivileged user is mounting the filesystem.  As we don't
have to worry about specially crafted malicious filesystem images.

In practice I think I would have passed in the user namespace via a file
descriptor to mount rather than inheriting it from the mount namespace
(more flexibility for roughly the same amount of code).

Eric

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] fs: add inode helpers for fsuid and fsgid
  2017-02-17  1:15                   ` Eric W. Biederman
@ 2017-02-17 17:12                     ` James Bottomley
  2017-02-20  4:56                       ` Eric W. Biederman
  0 siblings, 1 reply; 17+ messages in thread
From: James Bottomley @ 2017-02-17 17:12 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Christoph Hellwig, Dave Chinner, linux-xfs, linux-fsdevel, Seth Forshee

On Fri, 2017-02-17 at 14:15 +1300, Eric W. Biederman wrote:
> James Bottomley <James.Bottomley@HansenPartnership.com> writes:
> 
> > On Wed, 2017-02-15 at 15:29 +1300, Eric W. Biederman wrote:
> > > James Bottomley <James.Bottomley@HansenPartnership.com> writes:
> > > 
> > > > On Tue, 2017-02-14 at 20:46 +1300, Eric W. Biederman wrote:
> > > > > James Bottomley <James.Bottomley@HansenPartnership.com>
> > > > > writes:
> > > > > 
> > > > > > Now that we have two different views of filesystem ids (the
> > > > > > filesystem view and the kernel view), we have a problem in 
> > > > > > that current_fsuid/fsgid() return the kernel view but are
> > > > > > sometimes used in filesystem code where the filesystem view 
> > > > > > shoud be used.  This patch introduces helpers to produce 
> > > > > > the filesystem view of current fsuid and fsgid.
> > > > > 
> > > > > If I am reading this right what we are seeing is that xfs
> > > > > explicitly opted out of type safety with predictable results.
> > > > >  Accidentally confusing kuids and uids, which is potentially 
> > > > > security issue.
> > > > > 
> > > > > All of that said where are you getting sb->s_user_ns !=
> > > > > &init_user_ns for an xfs filesystem?
> > > 
> > > James please answer this question:
> > > 
> > >  Where are you getting sb->s_user_ns != &init_user_ns for an xfs
> > > filesystem?
> > 
> > I'm playing with a patch that allows host admin to set up an
> > unprivileged container for a guest to use.  One of the extensions 
> > is to allow anything possessing capability(CAP_SYS_ADMIN) to make
> > s_user_ns follow mnt_ns->user_ns for new mounts (as an option). 
> >  The idea was to see if root could set up an id shifted container 
> > with just the current s_user_ns infrastructure.
> > 
> > > None of this matters if sb->s_user_ns == &init_user_ns.
> > > 
> > > This is signification because only xfs keeps any in-core data 
> > > structure in it's on-disk encoding.  So this problem is xfs
> > > specific.
> > >    So understanding how you are getting xfs to have sb->s_user_ns 
> > > != &init_user_ns is important for discussing which direction we 
> > > go with helper functions here.
> > > 
> > > xfs with sb->s_user_ns == &init_user_ns is perfectly fine and as 
> > > such no fixes are needed.
> > 
> > So what you're saying is that unless the unprivileged container 
> > could mount the filesystem itself (i.e. only those possessing the
> > FS_USERNS_MOUNT flag) the filesystems are going to be full of 
> > problems like this.  I suppose whether it's worthwhile trying to 
> > fix them all depends on whether the ability of the administrator to 
> > set up an id shifted container is useful or not.
> 
> Yes.  Setting s_user_ns and expecting everything to work with a
> review/test cycle of the filesystem to shake out any rough edges is
> likely to be problematic.  For historical reasons I actually expect 
> xfs is especially bad in this regard.  So in practice I would 
> definitely start a feature like that with another filesystem.

It's a pragmatic choice: xfs is the filesystem on my current laptop.  I
know xfs was once very problematic for the user namespace, but having
looked through the code several times, the namespace shifts are now
nicely abstracted and easy to identify, so I don't anticipate any extra
difficulty today.

> I would be happy to have a FS_S_USER_NS flag to say all that is well,
> and the filesystem supports s_user_ns != &init_user_ns.  The bar is 
> much lower if a trusted user with CAP_SYS_ADMIN is mounting the 
> filesystem than if an unprivileged user is mounting the filesystem. 
>  As we don't have to worry about specially crafted malicious
> filesystem images.
> 
> In practice I think I would have passed in the user namespace via a 
> file descriptor to mount rather than inheriting it from the mount
> namespace (more flexibility for roughly the same amount of code).

I agree on this, but lets leave the implementation details on the side
for a while and examine the "should we do this?" question.

I can see two reasons why we might need to have this functionality

   1. Orchestration system use case: the orchestration system wants to
      build an unprivileged container root from an image file or overlay
      (I think this covers docker).
   2. USB (or other) device insertion redirected to container.  In this
      case, we'd like the mount on insertion to follow the container
      user_ns.

The reason I could see not bothering with this is that it doesn't fix
the shift on a subtree issue and fixing that gives a system which can
also be used to solve both cases above.

James

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/2] fs: add inode helpers for fsuid and fsgid
  2017-02-17 17:12                     ` James Bottomley
@ 2017-02-20  4:56                       ` Eric W. Biederman
  0 siblings, 0 replies; 17+ messages in thread
From: Eric W. Biederman @ 2017-02-20  4:56 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, Dave Chinner, linux-xfs, linux-fsdevel, Seth Forshee

James Bottomley <James.Bottomley@HansenPartnership.com> writes:

> On Fri, 2017-02-17 at 14:15 +1300, Eric W. Biederman wrote:
>> James Bottomley <James.Bottomley@HansenPartnership.com> writes:
>> 
>> > On Wed, 2017-02-15 at 15:29 +1300, Eric W. Biederman wrote:
>> > > James Bottomley <James.Bottomley@HansenPartnership.com> writes:
>> > > 
>> > > > On Tue, 2017-02-14 at 20:46 +1300, Eric W. Biederman wrote:
>> > > > > James Bottomley <James.Bottomley@HansenPartnership.com>
>> > > > > writes:
>> > > > > 
>> > > > > > Now that we have two different views of filesystem ids (the
>> > > > > > filesystem view and the kernel view), we have a problem in 
>> > > > > > that current_fsuid/fsgid() return the kernel view but are
>> > > > > > sometimes used in filesystem code where the filesystem view 
>> > > > > > shoud be used.  This patch introduces helpers to produce 
>> > > > > > the filesystem view of current fsuid and fsgid.
>> > > > > 
>> > > > > If I am reading this right what we are seeing is that xfs
>> > > > > explicitly opted out of type safety with predictable results.
>> > > > >  Accidentally confusing kuids and uids, which is potentially 
>> > > > > security issue.
>> > > > > 
>> > > > > All of that said where are you getting sb->s_user_ns !=
>> > > > > &init_user_ns for an xfs filesystem?
>> > > 
>> > > James please answer this question:
>> > > 
>> > >  Where are you getting sb->s_user_ns != &init_user_ns for an xfs
>> > > filesystem?
>> > 
>> > I'm playing with a patch that allows host admin to set up an
>> > unprivileged container for a guest to use.  One of the extensions 
>> > is to allow anything possessing capability(CAP_SYS_ADMIN) to make
>> > s_user_ns follow mnt_ns->user_ns for new mounts (as an option). 
>> >  The idea was to see if root could set up an id shifted container 
>> > with just the current s_user_ns infrastructure.
>> > 
>> > > None of this matters if sb->s_user_ns == &init_user_ns.
>> > > 
>> > > This is signification because only xfs keeps any in-core data 
>> > > structure in it's on-disk encoding.  So this problem is xfs
>> > > specific.
>> > >    So understanding how you are getting xfs to have sb->s_user_ns 
>> > > != &init_user_ns is important for discussing which direction we 
>> > > go with helper functions here.
>> > > 
>> > > xfs with sb->s_user_ns == &init_user_ns is perfectly fine and as 
>> > > such no fixes are needed.
>> > 
>> > So what you're saying is that unless the unprivileged container 
>> > could mount the filesystem itself (i.e. only those possessing the
>> > FS_USERNS_MOUNT flag) the filesystems are going to be full of 
>> > problems like this.  I suppose whether it's worthwhile trying to 
>> > fix them all depends on whether the ability of the administrator to 
>> > set up an id shifted container is useful or not.
>> 
>> Yes.  Setting s_user_ns and expecting everything to work with a
>> review/test cycle of the filesystem to shake out any rough edges is
>> likely to be problematic.  For historical reasons I actually expect 
>> xfs is especially bad in this regard.  So in practice I would 
>> definitely start a feature like that with another filesystem.
>
> It's a pragmatic choice: xfs is the filesystem on my current laptop.  I
> know xfs was once very problematic for the user namespace, but having
> looked through the code several times, the namespace shifts are now
> nicely abstracted and easy to identify, so I don't anticipate any extra
> difficulty today.

I think you have already encountered the extra difficulty.  For xfs a
couple of little things need to be fixed.  I expect most filesystems
will pretty much work out of the box.

>> I would be happy to have a FS_S_USER_NS flag to say all that is well,
>> and the filesystem supports s_user_ns != &init_user_ns.  The bar is 
>> much lower if a trusted user with CAP_SYS_ADMIN is mounting the 
>> filesystem than if an unprivileged user is mounting the filesystem. 
>>  As we don't have to worry about specially crafted malicious
>> filesystem images.
>> 
>> In practice I think I would have passed in the user namespace via a 
>> file descriptor to mount rather than inheriting it from the mount
>> namespace (more flexibility for roughly the same amount of code).
>
> I agree on this, but lets leave the implementation details on the side
> for a while and examine the "should we do this?" question.
>
> I can see two reasons why we might need to have this functionality
>
>    1. Orchestration system use case: the orchestration system wants to
>       build an unprivileged container root from an image file or overlay
>       (I think this covers docker).
>    2. USB (or other) device insertion redirected to container.  In this
>       case, we'd like the mount on insertion to follow the container
>       user_ns.

I think those are valid.

The Docker/runc cases that I am familiar with really want the sharing of
base images between containers.  To share the base image between
containers requires having a different mapping per container to separate
them.  The savings on disk space and vfs cache sharing is important for
them.

I am torn on the fact that this sneaks up on the issue of what happens
when someone injects a malicious disk image into this process.  If we
have a full to handling malicious disk images we can just set
FS_USERNS_MOUNT.  All of these use case look like cases where
it would be very reasy for the mounter of the filesystem to skip
ensuring they trust the path that generated the filesystem.  On the
other hand that is nothing new.

> The reason I could see not bothering with this is that it doesn't fix
> the shift on a subtree issue and fixing that gives a system which can
> also be used to solve both cases above.

The only reasons I have been not bothering with this are:
- Different mappings into different containers.
- It's closeness to S_USER_NS.
- A focus and getting fuse and the generic vfs bits covered and merged.

But at this point I think a generic vfs option that would set s_user_ns
and work on filesystems that opt in would be perfectly reasonable.
Especially since (a) we want to be able to display which user namespace
s_user_ns is in, and a generic mount option seems like a way to sneak it
into existing proc files, and (b) we want the file descriptor parsing code
for shiftfs.

So it seems like we might as well implement the functionality as a
generic mount option and let the filesystems opt in with FS_USERNS_MOUNT
or FS_S_USER_NS if the filesystem is not up to a full unprivileged
unmount.

Eric

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2017-02-20  5:01 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-13 17:46 xfs: fix inode uid/gid initialization James Bottomley
2017-02-13 19:43 ` Christoph Hellwig
2017-02-13 20:33   ` James Bottomley
2017-02-13 21:34   ` Dave Chinner
2017-02-14  6:08     ` Christoph Hellwig
2017-02-14  6:27       ` James Bottomley
2017-02-14  6:28         ` [PATCH 1/2] fs: add inode helpers for fsuid and fsgid James Bottomley
2017-02-14  7:46           ` Eric W. Biederman
2017-02-14  8:00             ` Christoph Hellwig
2017-02-14 16:09             ` James Bottomley
2017-02-15  2:29               ` Eric W. Biederman
2017-02-16 15:43                 ` James Bottomley
2017-02-17  1:15                   ` Eric W. Biederman
2017-02-17 17:12                     ` James Bottomley
2017-02-20  4:56                       ` Eric W. Biederman
2017-02-14  6:29         ` [PATCH 2/2] xfs: fix inode uid/gid initialization James Bottomley
2017-02-14  7:58         ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.