[V11,11/11] fs/xfs: Update xfs_ioctl_setattr_dax_invalidate()
diff mbox series

Message ID 20200428002142.404144-12-ira.weiny@intel.com
State New
Headers show
Series
  • XFS - Enable per-file/per-directory DAX operations V11
Related show

Commit Message

Ira Weiny April 28, 2020, 12:21 a.m. UTC
From: Ira Weiny <ira.weiny@intel.com>

Because of the separation of FS_XFLAG_DAX from S_DAX and the delayed
setting of S_DAX, data invalidation no longer needs to happen when
FS_XFLAG_DAX is changed.

Change xfs_ioctl_setattr_dax_invalidate() to be
xfs_ioctl_dax_check_set_cache() and alter the code to reflect the new
functionality.

Furthermore, we no longer need the locking so we remove the join_flags
logic.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
Changes from V10:
	adjust for renamed d_mark_dontcache() function

Changes from V9:
	Change name of function to xfs_ioctl_setattr_prepare_dax()

Changes from V8:
	Change name of function to xfs_ioctl_dax_check_set_cache()
	Update commit message
	Fix bit manipulations

Changes from V7:
	Use new flag_inode_dontcache()
	Skip don't cache if mount over ride is active.

Changes from v6:
	Fix completely broken implementation and update commit message.
	Use the new VFS layer I_DONTCACHE to facilitate inode eviction
	and S_DAX changing on drop_caches

Changes from v5:
	New patch
---
 fs/xfs/xfs_ioctl.c | 108 +++++++++------------------------------------
 1 file changed, 20 insertions(+), 88 deletions(-)

Comments

Darrick J. Wong April 28, 2020, 8:11 p.m. UTC | #1
On Mon, Apr 27, 2020 at 05:21:42PM -0700, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> Because of the separation of FS_XFLAG_DAX from S_DAX and the delayed
> setting of S_DAX, data invalidation no longer needs to happen when
> FS_XFLAG_DAX is changed.
> 
> Change xfs_ioctl_setattr_dax_invalidate() to be
> xfs_ioctl_dax_check_set_cache() and alter the code to reflect the new
> functionality.
> 
> Furthermore, we no longer need the locking so we remove the join_flags
> logic.
> 
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>

Looks ok,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> 
> ---
> Changes from V10:
> 	adjust for renamed d_mark_dontcache() function
> 
> Changes from V9:
> 	Change name of function to xfs_ioctl_setattr_prepare_dax()
> 
> Changes from V8:
> 	Change name of function to xfs_ioctl_dax_check_set_cache()
> 	Update commit message
> 	Fix bit manipulations
> 
> Changes from V7:
> 	Use new flag_inode_dontcache()
> 	Skip don't cache if mount over ride is active.
> 
> Changes from v6:
> 	Fix completely broken implementation and update commit message.
> 	Use the new VFS layer I_DONTCACHE to facilitate inode eviction
> 	and S_DAX changing on drop_caches
> 
> Changes from v5:
> 	New patch
> ---
>  fs/xfs/xfs_ioctl.c | 108 +++++++++------------------------------------
>  1 file changed, 20 insertions(+), 88 deletions(-)
> 
> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> index 104495ac187c..ff474f2c9acf 100644
> --- a/fs/xfs/xfs_ioctl.c
> +++ b/fs/xfs/xfs_ioctl.c
> @@ -1245,64 +1245,26 @@ xfs_ioctl_setattr_xflags(
>  	return 0;
>  }
>  
> -/*
> - * If we are changing DAX flags, we have to ensure the file is clean and any
> - * cached objects in the address space are invalidated and removed. This
> - * requires us to lock out other IO and page faults similar to a truncate
> - * operation. The locks need to be held until the transaction has been committed
> - * so that the cache invalidation is atomic with respect to the DAX flag
> - * manipulation.
> - */
> -static int
> -xfs_ioctl_setattr_dax_invalidate(
> +static void
> +xfs_ioctl_setattr_prepare_dax(
>  	struct xfs_inode	*ip,
> -	struct fsxattr		*fa,
> -	int			*join_flags)
> +	struct fsxattr		*fa)
>  {
> -	struct inode		*inode = VFS_I(ip);
> -	struct super_block	*sb = inode->i_sb;
> -	int			error;
> -
> -	*join_flags = 0;
> -
> -	/*
> -	 * It is only valid to set the DAX flag on regular files and
> -	 * directories on filesystems where the block size is equal to the page
> -	 * size. On directories it serves as an inherited hint so we don't
> -	 * have to check the device for dax support or flush pagecache.
> -	 */
> -	if (fa->fsx_xflags & FS_XFLAG_DAX) {
> -		struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
> -
> -		if (!bdev_dax_supported(target->bt_bdev, sb->s_blocksize))
> -			return -EINVAL;
> -	}
> -
> -	/* If the DAX state is not changing, we have nothing to do here. */
> -	if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode))
> -		return 0;
> -	if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
> -		return 0;
> +	struct xfs_mount	*mp = ip->i_mount;
> +	struct inode            *inode = VFS_I(ip);
>  
>  	if (S_ISDIR(inode->i_mode))
> -		return 0;
> -
> -	/* lock, flush and invalidate mapping in preparation for flag change */
> -	xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> -	error = filemap_write_and_wait(inode->i_mapping);
> -	if (error)
> -		goto out_unlock;
> -	error = invalidate_inode_pages2(inode->i_mapping);
> -	if (error)
> -		goto out_unlock;
> -
> -	*join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL;
> -	return 0;
> +		return;
>  
> -out_unlock:
> -	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> -	return error;
> +	if ((mp->m_flags & XFS_MOUNT_DAX_ALWAYS) ||
> +	    (mp->m_flags & XFS_MOUNT_DAX_NEVER))
> +		return;
>  
> +	if (((fa->fsx_xflags & FS_XFLAG_DAX) &&
> +	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) ||
> +	    (!(fa->fsx_xflags & FS_XFLAG_DAX) &&
> +	     (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)))
> +		d_mark_dontcache(inode);
>  }
>  
>  /*
> @@ -1310,17 +1272,10 @@ xfs_ioctl_setattr_dax_invalidate(
>   * have permission to do so. On success, return a clean transaction and the
>   * inode locked exclusively ready for further operation specific checks. On
>   * failure, return an error without modifying or locking the inode.
> - *
> - * The inode might already be IO locked on call. If this is the case, it is
> - * indicated in @join_flags and we take full responsibility for ensuring they
> - * are unlocked from now on. Hence if we have an error here, we still have to
> - * unlock them. Otherwise, once they are joined to the transaction, they will
> - * be unlocked on commit/cancel.
>   */
>  static struct xfs_trans *
>  xfs_ioctl_setattr_get_trans(
> -	struct xfs_inode	*ip,
> -	int			join_flags)
> +	struct xfs_inode	*ip)
>  {
>  	struct xfs_mount	*mp = ip->i_mount;
>  	struct xfs_trans	*tp;
> @@ -1337,8 +1292,7 @@ xfs_ioctl_setattr_get_trans(
>  		goto out_unlock;
>  
>  	xfs_ilock(ip, XFS_ILOCK_EXCL);
> -	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags);
> -	join_flags = 0;
> +	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
>  
>  	/*
>  	 * CAP_FOWNER overrides the following restrictions:
> @@ -1359,8 +1313,6 @@ xfs_ioctl_setattr_get_trans(
>  out_cancel:
>  	xfs_trans_cancel(tp);
>  out_unlock:
> -	if (join_flags)
> -		xfs_iunlock(ip, join_flags);
>  	return ERR_PTR(error);
>  }
>  
> @@ -1486,7 +1438,6 @@ xfs_ioctl_setattr(
>  	struct xfs_dquot	*pdqp = NULL;
>  	struct xfs_dquot	*olddquot = NULL;
>  	int			code;
> -	int			join_flags = 0;
>  
>  	trace_xfs_ioctl_setattr(ip);
>  
> @@ -1510,18 +1461,9 @@ xfs_ioctl_setattr(
>  			return code;
>  	}
>  
> -	/*
> -	 * Changing DAX config may require inode locking for mapping
> -	 * invalidation. These need to be held all the way to transaction commit
> -	 * or cancel time, so need to be passed through to
> -	 * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
> -	 * appropriately.
> -	 */
> -	code = xfs_ioctl_setattr_dax_invalidate(ip, fa, &join_flags);
> -	if (code)
> -		goto error_free_dquots;
> +	xfs_ioctl_setattr_prepare_dax(ip, fa);
>  
> -	tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
> +	tp = xfs_ioctl_setattr_get_trans(ip);
>  	if (IS_ERR(tp)) {
>  		code = PTR_ERR(tp);
>  		goto error_free_dquots;
> @@ -1651,7 +1593,6 @@ xfs_ioc_setxflags(
>  	struct fsxattr		fa;
>  	struct fsxattr		old_fa;
>  	unsigned int		flags;
> -	int			join_flags = 0;
>  	int			error;
>  
>  	if (copy_from_user(&flags, arg, sizeof(flags)))
> @@ -1668,18 +1609,9 @@ xfs_ioc_setxflags(
>  	if (error)
>  		return error;
>  
> -	/*
> -	 * Changing DAX config may require inode locking for mapping
> -	 * invalidation. These need to be held all the way to transaction commit
> -	 * or cancel time, so need to be passed through to
> -	 * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
> -	 * appropriately.
> -	 */
> -	error = xfs_ioctl_setattr_dax_invalidate(ip, &fa, &join_flags);
> -	if (error)
> -		goto out_drop_write;
> +	xfs_ioctl_setattr_prepare_dax(ip, &fa);
>  
> -	tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
> +	tp = xfs_ioctl_setattr_get_trans(ip);
>  	if (IS_ERR(tp)) {
>  		error = PTR_ERR(tp);
>  		goto out_drop_write;
> -- 
> 2.25.1
>
Darrick J. Wong June 2, 2020, 5:23 p.m. UTC | #2
On Tue, Apr 28, 2020 at 01:11:38PM -0700, Darrick J. Wong wrote:
> On Mon, Apr 27, 2020 at 05:21:42PM -0700, ira.weiny@intel.com wrote:
> > From: Ira Weiny <ira.weiny@intel.com>
> > 
> > Because of the separation of FS_XFLAG_DAX from S_DAX and the delayed
> > setting of S_DAX, data invalidation no longer needs to happen when
> > FS_XFLAG_DAX is changed.
> > 
> > Change xfs_ioctl_setattr_dax_invalidate() to be
> > xfs_ioctl_dax_check_set_cache() and alter the code to reflect the new
> > functionality.
> > 
> > Furthermore, we no longer need the locking so we remove the join_flags
> > logic.
> > 
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> Looks ok,
> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
> 
> --D
> 
> > 
> > ---
> > Changes from V10:
> > 	adjust for renamed d_mark_dontcache() function
> > 
> > Changes from V9:
> > 	Change name of function to xfs_ioctl_setattr_prepare_dax()
> > 
> > Changes from V8:
> > 	Change name of function to xfs_ioctl_dax_check_set_cache()
> > 	Update commit message
> > 	Fix bit manipulations
> > 
> > Changes from V7:
> > 	Use new flag_inode_dontcache()
> > 	Skip don't cache if mount over ride is active.
> > 
> > Changes from v6:
> > 	Fix completely broken implementation and update commit message.
> > 	Use the new VFS layer I_DONTCACHE to facilitate inode eviction
> > 	and S_DAX changing on drop_caches
> > 
> > Changes from v5:
> > 	New patch
> > ---
> >  fs/xfs/xfs_ioctl.c | 108 +++++++++------------------------------------
> >  1 file changed, 20 insertions(+), 88 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> > index 104495ac187c..ff474f2c9acf 100644
> > --- a/fs/xfs/xfs_ioctl.c
> > +++ b/fs/xfs/xfs_ioctl.c
> > @@ -1245,64 +1245,26 @@ xfs_ioctl_setattr_xflags(
> >  	return 0;
> >  }
> >  
> > -/*
> > - * If we are changing DAX flags, we have to ensure the file is clean and any
> > - * cached objects in the address space are invalidated and removed. This
> > - * requires us to lock out other IO and page faults similar to a truncate
> > - * operation. The locks need to be held until the transaction has been committed
> > - * so that the cache invalidation is atomic with respect to the DAX flag
> > - * manipulation.
> > - */
> > -static int
> > -xfs_ioctl_setattr_dax_invalidate(
> > +static void
> > +xfs_ioctl_setattr_prepare_dax(
> >  	struct xfs_inode	*ip,
> > -	struct fsxattr		*fa,
> > -	int			*join_flags)
> > +	struct fsxattr		*fa)
> >  {
> > -	struct inode		*inode = VFS_I(ip);
> > -	struct super_block	*sb = inode->i_sb;
> > -	int			error;
> > -
> > -	*join_flags = 0;
> > -
> > -	/*
> > -	 * It is only valid to set the DAX flag on regular files and
> > -	 * directories on filesystems where the block size is equal to the page
> > -	 * size. On directories it serves as an inherited hint so we don't
> > -	 * have to check the device for dax support or flush pagecache.
> > -	 */
> > -	if (fa->fsx_xflags & FS_XFLAG_DAX) {
> > -		struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
> > -
> > -		if (!bdev_dax_supported(target->bt_bdev, sb->s_blocksize))
> > -			return -EINVAL;
> > -	}
> > -
> > -	/* If the DAX state is not changing, we have nothing to do here. */
> > -	if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode))
> > -		return 0;
> > -	if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
> > -		return 0;
> > +	struct xfs_mount	*mp = ip->i_mount;
> > +	struct inode            *inode = VFS_I(ip);
> >  
> >  	if (S_ISDIR(inode->i_mode))
> > -		return 0;
> > -
> > -	/* lock, flush and invalidate mapping in preparation for flag change */
> > -	xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> > -	error = filemap_write_and_wait(inode->i_mapping);
> > -	if (error)
> > -		goto out_unlock;
> > -	error = invalidate_inode_pages2(inode->i_mapping);
> > -	if (error)
> > -		goto out_unlock;
> > -
> > -	*join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL;
> > -	return 0;
> > +		return;
> >  
> > -out_unlock:
> > -	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> > -	return error;
> > +	if ((mp->m_flags & XFS_MOUNT_DAX_ALWAYS) ||
> > +	    (mp->m_flags & XFS_MOUNT_DAX_NEVER))
> > +		return;
> >  
> > +	if (((fa->fsx_xflags & FS_XFLAG_DAX) &&
> > +	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) ||
> > +	    (!(fa->fsx_xflags & FS_XFLAG_DAX) &&
> > +	     (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)))
> > +		d_mark_dontcache(inode);

Now that I think about this further, are we /really/ sure that we want
to let unprivileged userspace cause inode evictions?

--D

> >  }
> >  
> >  /*
> > @@ -1310,17 +1272,10 @@ xfs_ioctl_setattr_dax_invalidate(
> >   * have permission to do so. On success, return a clean transaction and the
> >   * inode locked exclusively ready for further operation specific checks. On
> >   * failure, return an error without modifying or locking the inode.
> > - *
> > - * The inode might already be IO locked on call. If this is the case, it is
> > - * indicated in @join_flags and we take full responsibility for ensuring they
> > - * are unlocked from now on. Hence if we have an error here, we still have to
> > - * unlock them. Otherwise, once they are joined to the transaction, they will
> > - * be unlocked on commit/cancel.
> >   */
> >  static struct xfs_trans *
> >  xfs_ioctl_setattr_get_trans(
> > -	struct xfs_inode	*ip,
> > -	int			join_flags)
> > +	struct xfs_inode	*ip)
> >  {
> >  	struct xfs_mount	*mp = ip->i_mount;
> >  	struct xfs_trans	*tp;
> > @@ -1337,8 +1292,7 @@ xfs_ioctl_setattr_get_trans(
> >  		goto out_unlock;
> >  
> >  	xfs_ilock(ip, XFS_ILOCK_EXCL);
> > -	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags);
> > -	join_flags = 0;
> > +	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
> >  
> >  	/*
> >  	 * CAP_FOWNER overrides the following restrictions:
> > @@ -1359,8 +1313,6 @@ xfs_ioctl_setattr_get_trans(
> >  out_cancel:
> >  	xfs_trans_cancel(tp);
> >  out_unlock:
> > -	if (join_flags)
> > -		xfs_iunlock(ip, join_flags);
> >  	return ERR_PTR(error);
> >  }
> >  
> > @@ -1486,7 +1438,6 @@ xfs_ioctl_setattr(
> >  	struct xfs_dquot	*pdqp = NULL;
> >  	struct xfs_dquot	*olddquot = NULL;
> >  	int			code;
> > -	int			join_flags = 0;
> >  
> >  	trace_xfs_ioctl_setattr(ip);
> >  
> > @@ -1510,18 +1461,9 @@ xfs_ioctl_setattr(
> >  			return code;
> >  	}
> >  
> > -	/*
> > -	 * Changing DAX config may require inode locking for mapping
> > -	 * invalidation. These need to be held all the way to transaction commit
> > -	 * or cancel time, so need to be passed through to
> > -	 * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
> > -	 * appropriately.
> > -	 */
> > -	code = xfs_ioctl_setattr_dax_invalidate(ip, fa, &join_flags);
> > -	if (code)
> > -		goto error_free_dquots;
> > +	xfs_ioctl_setattr_prepare_dax(ip, fa);
> >  
> > -	tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
> > +	tp = xfs_ioctl_setattr_get_trans(ip);
> >  	if (IS_ERR(tp)) {
> >  		code = PTR_ERR(tp);
> >  		goto error_free_dquots;
> > @@ -1651,7 +1593,6 @@ xfs_ioc_setxflags(
> >  	struct fsxattr		fa;
> >  	struct fsxattr		old_fa;
> >  	unsigned int		flags;
> > -	int			join_flags = 0;
> >  	int			error;
> >  
> >  	if (copy_from_user(&flags, arg, sizeof(flags)))
> > @@ -1668,18 +1609,9 @@ xfs_ioc_setxflags(
> >  	if (error)
> >  		return error;
> >  
> > -	/*
> > -	 * Changing DAX config may require inode locking for mapping
> > -	 * invalidation. These need to be held all the way to transaction commit
> > -	 * or cancel time, so need to be passed through to
> > -	 * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
> > -	 * appropriately.
> > -	 */
> > -	error = xfs_ioctl_setattr_dax_invalidate(ip, &fa, &join_flags);
> > -	if (error)
> > -		goto out_drop_write;
> > +	xfs_ioctl_setattr_prepare_dax(ip, &fa);
> >  
> > -	tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
> > +	tp = xfs_ioctl_setattr_get_trans(ip);
> >  	if (IS_ERR(tp)) {
> >  		error = PTR_ERR(tp);
> >  		goto out_drop_write;
> > -- 
> > 2.25.1
> >
Ira Weiny June 2, 2020, 11:15 p.m. UTC | #3
On Tue, Jun 02, 2020 at 10:23:53AM -0700, Darrick J. Wong wrote:
> On Tue, Apr 28, 2020 at 01:11:38PM -0700, Darrick J. Wong wrote:
> > On Mon, Apr 27, 2020 at 05:21:42PM -0700, ira.weiny@intel.com wrote:
> > > From: Ira Weiny <ira.weiny@intel.com>
> > > 

...

> > > -out_unlock:
> > > -	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> > > -	return error;
> > > +	if ((mp->m_flags & XFS_MOUNT_DAX_ALWAYS) ||
> > > +	    (mp->m_flags & XFS_MOUNT_DAX_NEVER))
> > > +		return;
> > >  
> > > +	if (((fa->fsx_xflags & FS_XFLAG_DAX) &&
> > > +	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) ||
> > > +	    (!(fa->fsx_xflags & FS_XFLAG_DAX) &&
> > > +	     (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)))
> > > +		d_mark_dontcache(inode);
> 
> Now that I think about this further, are we /really/ sure that we want
> to let unprivileged userspace cause inode evictions?

This code only applies to files they have access to.  And it does not directly
cause an eviction.  It only hints that those inodes (for which they have access
to) will not be cached thus causing them to be reloaded sooner than they might
otherwise be.

So I think we are fine here.

Ira

> 
> --D
>
Jan Kara June 3, 2020, 10:10 a.m. UTC | #4
On Tue 02-06-20 10:23:53, Darrick J. Wong wrote:
> On Tue, Apr 28, 2020 at 01:11:38PM -0700, Darrick J. Wong wrote:
> > > -out_unlock:
> > > -	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> > > -	return error;
> > > +	if ((mp->m_flags & XFS_MOUNT_DAX_ALWAYS) ||
> > > +	    (mp->m_flags & XFS_MOUNT_DAX_NEVER))
> > > +		return;
> > >  
> > > +	if (((fa->fsx_xflags & FS_XFLAG_DAX) &&
> > > +	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) ||
> > > +	    (!(fa->fsx_xflags & FS_XFLAG_DAX) &&
> > > +	     (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)))
> > > +		d_mark_dontcache(inode);
> 
> Now that I think about this further, are we /really/ sure that we want
> to let unprivileged userspace cause inode evictions?

You have to have an equivalent of write access to the file to be able to
trigger d_mark_dontcache(). So you can e.g. delete it.  Or you could
fadvise / madvise regarding its page cache. I don't see the ability to push
inode out of cache as stronger than the abilities you already have...

								Honza
Darrick J. Wong June 3, 2020, 5:03 p.m. UTC | #5
On Wed, Jun 03, 2020 at 12:10:24PM +0200, Jan Kara wrote:
> On Tue 02-06-20 10:23:53, Darrick J. Wong wrote:
> > On Tue, Apr 28, 2020 at 01:11:38PM -0700, Darrick J. Wong wrote:
> > > > -out_unlock:
> > > > -	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> > > > -	return error;
> > > > +	if ((mp->m_flags & XFS_MOUNT_DAX_ALWAYS) ||
> > > > +	    (mp->m_flags & XFS_MOUNT_DAX_NEVER))
> > > > +		return;
> > > >  
> > > > +	if (((fa->fsx_xflags & FS_XFLAG_DAX) &&
> > > > +	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) ||
> > > > +	    (!(fa->fsx_xflags & FS_XFLAG_DAX) &&
> > > > +	     (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)))
> > > > +		d_mark_dontcache(inode);
> > 
> > Now that I think about this further, are we /really/ sure that we want
> > to let unprivileged userspace cause inode evictions?
> 
> You have to have an equivalent of write access to the file to be able to
> trigger d_mark_dontcache(). So you can e.g. delete it.  Or you could
> fadvise / madvise regarding its page cache. I don't see the ability to push
> inode out of cache as stronger than the abilities you already have...

<nod> Ok.  I just had one last bout of paranoia, but I think it'll be
fine. :)

--D

> 								Honza
> -- 
> Jan Kara <jack@suse.com>
> SUSE Labs, CR

Patch
diff mbox series

diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 104495ac187c..ff474f2c9acf 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1245,64 +1245,26 @@  xfs_ioctl_setattr_xflags(
 	return 0;
 }
 
-/*
- * If we are changing DAX flags, we have to ensure the file is clean and any
- * cached objects in the address space are invalidated and removed. This
- * requires us to lock out other IO and page faults similar to a truncate
- * operation. The locks need to be held until the transaction has been committed
- * so that the cache invalidation is atomic with respect to the DAX flag
- * manipulation.
- */
-static int
-xfs_ioctl_setattr_dax_invalidate(
+static void
+xfs_ioctl_setattr_prepare_dax(
 	struct xfs_inode	*ip,
-	struct fsxattr		*fa,
-	int			*join_flags)
+	struct fsxattr		*fa)
 {
-	struct inode		*inode = VFS_I(ip);
-	struct super_block	*sb = inode->i_sb;
-	int			error;
-
-	*join_flags = 0;
-
-	/*
-	 * It is only valid to set the DAX flag on regular files and
-	 * directories on filesystems where the block size is equal to the page
-	 * size. On directories it serves as an inherited hint so we don't
-	 * have to check the device for dax support or flush pagecache.
-	 */
-	if (fa->fsx_xflags & FS_XFLAG_DAX) {
-		struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
-
-		if (!bdev_dax_supported(target->bt_bdev, sb->s_blocksize))
-			return -EINVAL;
-	}
-
-	/* If the DAX state is not changing, we have nothing to do here. */
-	if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode))
-		return 0;
-	if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
-		return 0;
+	struct xfs_mount	*mp = ip->i_mount;
+	struct inode            *inode = VFS_I(ip);
 
 	if (S_ISDIR(inode->i_mode))
-		return 0;
-
-	/* lock, flush and invalidate mapping in preparation for flag change */
-	xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
-	error = filemap_write_and_wait(inode->i_mapping);
-	if (error)
-		goto out_unlock;
-	error = invalidate_inode_pages2(inode->i_mapping);
-	if (error)
-		goto out_unlock;
-
-	*join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL;
-	return 0;
+		return;
 
-out_unlock:
-	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
-	return error;
+	if ((mp->m_flags & XFS_MOUNT_DAX_ALWAYS) ||
+	    (mp->m_flags & XFS_MOUNT_DAX_NEVER))
+		return;
 
+	if (((fa->fsx_xflags & FS_XFLAG_DAX) &&
+	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) ||
+	    (!(fa->fsx_xflags & FS_XFLAG_DAX) &&
+	     (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)))
+		d_mark_dontcache(inode);
 }
 
 /*
@@ -1310,17 +1272,10 @@  xfs_ioctl_setattr_dax_invalidate(
  * have permission to do so. On success, return a clean transaction and the
  * inode locked exclusively ready for further operation specific checks. On
  * failure, return an error without modifying or locking the inode.
- *
- * The inode might already be IO locked on call. If this is the case, it is
- * indicated in @join_flags and we take full responsibility for ensuring they
- * are unlocked from now on. Hence if we have an error here, we still have to
- * unlock them. Otherwise, once they are joined to the transaction, they will
- * be unlocked on commit/cancel.
  */
 static struct xfs_trans *
 xfs_ioctl_setattr_get_trans(
-	struct xfs_inode	*ip,
-	int			join_flags)
+	struct xfs_inode	*ip)
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
@@ -1337,8 +1292,7 @@  xfs_ioctl_setattr_get_trans(
 		goto out_unlock;
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags);
-	join_flags = 0;
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
 	/*
 	 * CAP_FOWNER overrides the following restrictions:
@@ -1359,8 +1313,6 @@  xfs_ioctl_setattr_get_trans(
 out_cancel:
 	xfs_trans_cancel(tp);
 out_unlock:
-	if (join_flags)
-		xfs_iunlock(ip, join_flags);
 	return ERR_PTR(error);
 }
 
@@ -1486,7 +1438,6 @@  xfs_ioctl_setattr(
 	struct xfs_dquot	*pdqp = NULL;
 	struct xfs_dquot	*olddquot = NULL;
 	int			code;
-	int			join_flags = 0;
 
 	trace_xfs_ioctl_setattr(ip);
 
@@ -1510,18 +1461,9 @@  xfs_ioctl_setattr(
 			return code;
 	}
 
-	/*
-	 * Changing DAX config may require inode locking for mapping
-	 * invalidation. These need to be held all the way to transaction commit
-	 * or cancel time, so need to be passed through to
-	 * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
-	 * appropriately.
-	 */
-	code = xfs_ioctl_setattr_dax_invalidate(ip, fa, &join_flags);
-	if (code)
-		goto error_free_dquots;
+	xfs_ioctl_setattr_prepare_dax(ip, fa);
 
-	tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
+	tp = xfs_ioctl_setattr_get_trans(ip);
 	if (IS_ERR(tp)) {
 		code = PTR_ERR(tp);
 		goto error_free_dquots;
@@ -1651,7 +1593,6 @@  xfs_ioc_setxflags(
 	struct fsxattr		fa;
 	struct fsxattr		old_fa;
 	unsigned int		flags;
-	int			join_flags = 0;
 	int			error;
 
 	if (copy_from_user(&flags, arg, sizeof(flags)))
@@ -1668,18 +1609,9 @@  xfs_ioc_setxflags(
 	if (error)
 		return error;
 
-	/*
-	 * Changing DAX config may require inode locking for mapping
-	 * invalidation. These need to be held all the way to transaction commit
-	 * or cancel time, so need to be passed through to
-	 * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
-	 * appropriately.
-	 */
-	error = xfs_ioctl_setattr_dax_invalidate(ip, &fa, &join_flags);
-	if (error)
-		goto out_drop_write;
+	xfs_ioctl_setattr_prepare_dax(ip, &fa);
 
-	tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
+	tp = xfs_ioctl_setattr_get_trans(ip);
 	if (IS_ERR(tp)) {
 		error = PTR_ERR(tp);
 		goto out_drop_write;