[V8,10/11] fs/xfs: Change xfs_ioctl_setattr_dax_invalidate()
diff mbox series

Message ID 20200415064523.2244712-11-ira.weiny@intel.com
State New
Headers show
Series
  • Enable per-file/per-directory DAX operations V8
Related show

Commit Message

Ira Weiny April 15, 2020, 6:45 a.m. UTC
From: Ira Weiny <ira.weiny@intel.com>

We only support changing FS_XFLAG_DAX on directories.  Files get their
flag from the parent directory on creation only.  So no data
invalidation needs to happen.

Alter the xfs_ioctl_setattr_dax_invalidate() to be
xfs_ioctl_setattr_dax_validate().  xfs_ioctl_setattr_dax_validate() now
validates that any FS_XFLAG_DAX change is ok.

This also allows use to remove the join_flags logic.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
Changes from V7:
	Use new flag_inode_dontcache()
	Skip don't cache if mount over ride is active.

Changes from v6:
	Fix completely broken implementation and update commit message.
	Use the new VFS layer I_DONTCACHE to facilitate inode eviction
	and S_DAX changing on drop_caches

Changes from v5:
	New patch
---
 fs/xfs/xfs_ioctl.c | 105 +++++++++------------------------------------
 1 file changed, 20 insertions(+), 85 deletions(-)

Comments

Darrick J. Wong April 15, 2020, 3:21 p.m. UTC | #1
On Tue, Apr 14, 2020 at 11:45:22PM -0700, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> We only support changing FS_XFLAG_DAX on directories.  Files get their
> flag from the parent directory on creation only.  So no data
> invalidation needs to happen.
> 
> Alter the xfs_ioctl_setattr_dax_invalidate() to be
> xfs_ioctl_setattr_dax_validate().  xfs_ioctl_setattr_dax_validate() now
> validates that any FS_XFLAG_DAX change is ok.
> 
> This also allows use to remove the join_flags logic.
> 
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> Changes from V7:
> 	Use new flag_inode_dontcache()
> 	Skip don't cache if mount over ride is active.
> 
> Changes from v6:
> 	Fix completely broken implementation and update commit message.
> 	Use the new VFS layer I_DONTCACHE to facilitate inode eviction
> 	and S_DAX changing on drop_caches
> 
> Changes from v5:
> 	New patch
> ---
>  fs/xfs/xfs_ioctl.c | 105 +++++++++------------------------------------
>  1 file changed, 20 insertions(+), 85 deletions(-)
> 
> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> index c6cd92ef4a05..75d4a830ef38 100644
> --- a/fs/xfs/xfs_ioctl.c
> +++ b/fs/xfs/xfs_ioctl.c
> @@ -1145,63 +1145,28 @@ xfs_ioctl_setattr_xflags(
>  }
>  
>  /*
> - * If we are changing DAX flags, we have to ensure the file is clean and any
> - * cached objects in the address space are invalidated and removed. This
> - * requires us to lock out other IO and page faults similar to a truncate
> - * operation. The locks need to be held until the transaction has been committed
> - * so that the cache invalidation is atomic with respect to the DAX flag
> - * manipulation.
> + * Mark inodes with a changing FS_XFLAG_DAX, I_DONTCACHE
>   */
> -static int
> +static void
>  xfs_ioctl_setattr_dax_invalidate(
>  	struct xfs_inode	*ip,
> -	struct fsxattr		*fa,
> -	int			*join_flags)
> +	struct fsxattr		*fa)
>  {
> -	struct inode		*inode = VFS_I(ip);
> -	struct super_block	*sb = inode->i_sb;
> -	int			error;
> -
> -	*join_flags = 0;
> -
> -	/*
> -	 * It is only valid to set the DAX flag on regular files and
> -	 * directories on filesystems where the block size is equal to the page
> -	 * size. On directories it serves as an inherited hint so we don't
> -	 * have to check the device for dax support or flush pagecache.
> -	 */
> -	if (fa->fsx_xflags & FS_XFLAG_DAX) {
> -		struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
> -
> -		if (!bdev_dax_supported(target->bt_bdev, sb->s_blocksize))
> -			return -EINVAL;
> -	}
> -
> -	/* If the DAX state is not changing, we have nothing to do here. */
> -	if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode))
> -		return 0;
> -	if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
> -		return 0;
> +	struct xfs_mount	*mp = ip->i_mount;
> +	struct inode            *inode = VFS_I(ip);
>  
>  	if (S_ISDIR(inode->i_mode))
> -		return 0;
> -
> -	/* lock, flush and invalidate mapping in preparation for flag change */
> -	xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> -	error = filemap_write_and_wait(inode->i_mapping);
> -	if (error)
> -		goto out_unlock;
> -	error = invalidate_inode_pages2(inode->i_mapping);
> -	if (error)
> -		goto out_unlock;
> +		return;
>  
> -	*join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL;
> -	return 0;
> -
> -out_unlock:
> -	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> -	return error;
> +	if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS ||
> +	    mp->m_flags & XFS_MOUNT_DAX_NEVER)

This ought to have parentheses around the two bit manipulation
operations to avoid complaints from picky/stupid compilers.

--D

> +		return;
>  
> +	if (((fa->fsx_xflags & FS_XFLAG_DAX) &&
> +	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) ||
> +	    (!(fa->fsx_xflags & FS_XFLAG_DAX) &&
> +	     (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)))
> +		flag_inode_dontcache(inode);
>  }
>  
>  /*
> @@ -1209,17 +1174,10 @@ xfs_ioctl_setattr_dax_invalidate(
>   * have permission to do so. On success, return a clean transaction and the
>   * inode locked exclusively ready for further operation specific checks. On
>   * failure, return an error without modifying or locking the inode.
> - *
> - * The inode might already be IO locked on call. If this is the case, it is
> - * indicated in @join_flags and we take full responsibility for ensuring they
> - * are unlocked from now on. Hence if we have an error here, we still have to
> - * unlock them. Otherwise, once they are joined to the transaction, they will
> - * be unlocked on commit/cancel.
>   */
>  static struct xfs_trans *
>  xfs_ioctl_setattr_get_trans(
> -	struct xfs_inode	*ip,
> -	int			join_flags)
> +	struct xfs_inode	*ip)
>  {
>  	struct xfs_mount	*mp = ip->i_mount;
>  	struct xfs_trans	*tp;
> @@ -1236,8 +1194,7 @@ xfs_ioctl_setattr_get_trans(
>  		goto out_unlock;
>  
>  	xfs_ilock(ip, XFS_ILOCK_EXCL);
> -	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags);
> -	join_flags = 0;
> +	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
>  
>  	/*
>  	 * CAP_FOWNER overrides the following restrictions:
> @@ -1258,8 +1215,6 @@ xfs_ioctl_setattr_get_trans(
>  out_cancel:
>  	xfs_trans_cancel(tp);
>  out_unlock:
> -	if (join_flags)
> -		xfs_iunlock(ip, join_flags);
>  	return ERR_PTR(error);
>  }
>  
> @@ -1386,7 +1341,6 @@ xfs_ioctl_setattr(
>  	struct xfs_dquot	*pdqp = NULL;
>  	struct xfs_dquot	*olddquot = NULL;
>  	int			code;
> -	int			join_flags = 0;
>  
>  	trace_xfs_ioctl_setattr(ip);
>  
> @@ -1410,18 +1364,9 @@ xfs_ioctl_setattr(
>  			return code;
>  	}
>  
> -	/*
> -	 * Changing DAX config may require inode locking for mapping
> -	 * invalidation. These need to be held all the way to transaction commit
> -	 * or cancel time, so need to be passed through to
> -	 * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
> -	 * appropriately.
> -	 */
> -	code = xfs_ioctl_setattr_dax_invalidate(ip, fa, &join_flags);
> -	if (code)
> -		goto error_free_dquots;
> +	xfs_ioctl_setattr_dax_invalidate(ip, fa);
>  
> -	tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
> +	tp = xfs_ioctl_setattr_get_trans(ip);
>  	if (IS_ERR(tp)) {
>  		code = PTR_ERR(tp);
>  		goto error_free_dquots;
> @@ -1552,7 +1497,6 @@ xfs_ioc_setxflags(
>  	struct fsxattr		fa;
>  	struct fsxattr		old_fa;
>  	unsigned int		flags;
> -	int			join_flags = 0;
>  	int			error;
>  
>  	if (copy_from_user(&flags, arg, sizeof(flags)))
> @@ -1569,18 +1513,9 @@ xfs_ioc_setxflags(
>  	if (error)
>  		return error;
>  
> -	/*
> -	 * Changing DAX config may require inode locking for mapping
> -	 * invalidation. These need to be held all the way to transaction commit
> -	 * or cancel time, so need to be passed through to
> -	 * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
> -	 * appropriately.
> -	 */
> -	error = xfs_ioctl_setattr_dax_invalidate(ip, &fa, &join_flags);
> -	if (error)
> -		goto out_drop_write;
> +	xfs_ioctl_setattr_dax_invalidate(ip, &fa);
>  
> -	tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
> +	tp = xfs_ioctl_setattr_get_trans(ip);
>  	if (IS_ERR(tp)) {
>  		error = PTR_ERR(tp);
>  		goto out_drop_write;
> -- 
> 2.25.1
>
Ira Weiny April 16, 2020, 5 a.m. UTC | #2
On Wed, Apr 15, 2020 at 08:21:12AM -0700, Darrick J. Wong wrote:
> On Tue, Apr 14, 2020 at 11:45:22PM -0700, ira.weiny@intel.com wrote:
> > From: Ira Weiny <ira.weiny@intel.com>
> > 
> > We only support changing FS_XFLAG_DAX on directories.  Files get their
> > flag from the parent directory on creation only.  So no data
> > invalidation needs to happen.
> > 
> > Alter the xfs_ioctl_setattr_dax_invalidate() to be
> > xfs_ioctl_setattr_dax_validate().  xfs_ioctl_setattr_dax_validate() now
> > validates that any FS_XFLAG_DAX change is ok.
> > 
> > This also allows use to remove the join_flags logic.
> > 
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > 
> > ---
> > Changes from V7:
> > 	Use new flag_inode_dontcache()
> > 	Skip don't cache if mount over ride is active.
> > 
> > Changes from v6:
> > 	Fix completely broken implementation and update commit message.
> > 	Use the new VFS layer I_DONTCACHE to facilitate inode eviction
> > 	and S_DAX changing on drop_caches
> > 
> > Changes from v5:
> > 	New patch
> > ---
> >  fs/xfs/xfs_ioctl.c | 105 +++++++++------------------------------------
> >  1 file changed, 20 insertions(+), 85 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> > index c6cd92ef4a05..75d4a830ef38 100644
> > --- a/fs/xfs/xfs_ioctl.c
> > +++ b/fs/xfs/xfs_ioctl.c
> > @@ -1145,63 +1145,28 @@ xfs_ioctl_setattr_xflags(
> >  }
> >  
> >  /*
> > - * If we are changing DAX flags, we have to ensure the file is clean and any
> > - * cached objects in the address space are invalidated and removed. This
> > - * requires us to lock out other IO and page faults similar to a truncate
> > - * operation. The locks need to be held until the transaction has been committed
> > - * so that the cache invalidation is atomic with respect to the DAX flag
> > - * manipulation.
> > + * Mark inodes with a changing FS_XFLAG_DAX, I_DONTCACHE
> >   */
> > -static int
> > +static void
> >  xfs_ioctl_setattr_dax_invalidate(
> >  	struct xfs_inode	*ip,
> > -	struct fsxattr		*fa,
> > -	int			*join_flags)
> > +	struct fsxattr		*fa)
> >  {
> > -	struct inode		*inode = VFS_I(ip);
> > -	struct super_block	*sb = inode->i_sb;
> > -	int			error;
> > -
> > -	*join_flags = 0;
> > -
> > -	/*
> > -	 * It is only valid to set the DAX flag on regular files and
> > -	 * directories on filesystems where the block size is equal to the page
> > -	 * size. On directories it serves as an inherited hint so we don't
> > -	 * have to check the device for dax support or flush pagecache.
> > -	 */
> > -	if (fa->fsx_xflags & FS_XFLAG_DAX) {
> > -		struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
> > -
> > -		if (!bdev_dax_supported(target->bt_bdev, sb->s_blocksize))
> > -			return -EINVAL;
> > -	}
> > -
> > -	/* If the DAX state is not changing, we have nothing to do here. */
> > -	if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode))
> > -		return 0;
> > -	if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
> > -		return 0;
> > +	struct xfs_mount	*mp = ip->i_mount;
> > +	struct inode            *inode = VFS_I(ip);
> >  
> >  	if (S_ISDIR(inode->i_mode))
> > -		return 0;
> > -
> > -	/* lock, flush and invalidate mapping in preparation for flag change */
> > -	xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> > -	error = filemap_write_and_wait(inode->i_mapping);
> > -	if (error)
> > -		goto out_unlock;
> > -	error = invalidate_inode_pages2(inode->i_mapping);
> > -	if (error)
> > -		goto out_unlock;
> > +		return;
> >  
> > -	*join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL;
> > -	return 0;
> > -
> > -out_unlock:
> > -	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> > -	return error;
> > +	if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS ||
> > +	    mp->m_flags & XFS_MOUNT_DAX_NEVER)
> 
> This ought to have parentheses around the two bit manipulation
> operations to avoid complaints from picky/stupid compilers.

Fixed,
Ira

> 
> --D
> 
> > +		return;
> >  
> > +	if (((fa->fsx_xflags & FS_XFLAG_DAX) &&
> > +	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) ||
> > +	    (!(fa->fsx_xflags & FS_XFLAG_DAX) &&
> > +	     (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)))
> > +		flag_inode_dontcache(inode);
> >  }
> >  
> >  /*
> > @@ -1209,17 +1174,10 @@ xfs_ioctl_setattr_dax_invalidate(
> >   * have permission to do so. On success, return a clean transaction and the
> >   * inode locked exclusively ready for further operation specific checks. On
> >   * failure, return an error without modifying or locking the inode.
> > - *
> > - * The inode might already be IO locked on call. If this is the case, it is
> > - * indicated in @join_flags and we take full responsibility for ensuring they
> > - * are unlocked from now on. Hence if we have an error here, we still have to
> > - * unlock them. Otherwise, once they are joined to the transaction, they will
> > - * be unlocked on commit/cancel.
> >   */
> >  static struct xfs_trans *
> >  xfs_ioctl_setattr_get_trans(
> > -	struct xfs_inode	*ip,
> > -	int			join_flags)
> > +	struct xfs_inode	*ip)
> >  {
> >  	struct xfs_mount	*mp = ip->i_mount;
> >  	struct xfs_trans	*tp;
> > @@ -1236,8 +1194,7 @@ xfs_ioctl_setattr_get_trans(
> >  		goto out_unlock;
> >  
> >  	xfs_ilock(ip, XFS_ILOCK_EXCL);
> > -	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags);
> > -	join_flags = 0;
> > +	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
> >  
> >  	/*
> >  	 * CAP_FOWNER overrides the following restrictions:
> > @@ -1258,8 +1215,6 @@ xfs_ioctl_setattr_get_trans(
> >  out_cancel:
> >  	xfs_trans_cancel(tp);
> >  out_unlock:
> > -	if (join_flags)
> > -		xfs_iunlock(ip, join_flags);
> >  	return ERR_PTR(error);
> >  }
> >  
> > @@ -1386,7 +1341,6 @@ xfs_ioctl_setattr(
> >  	struct xfs_dquot	*pdqp = NULL;
> >  	struct xfs_dquot	*olddquot = NULL;
> >  	int			code;
> > -	int			join_flags = 0;
> >  
> >  	trace_xfs_ioctl_setattr(ip);
> >  
> > @@ -1410,18 +1364,9 @@ xfs_ioctl_setattr(
> >  			return code;
> >  	}
> >  
> > -	/*
> > -	 * Changing DAX config may require inode locking for mapping
> > -	 * invalidation. These need to be held all the way to transaction commit
> > -	 * or cancel time, so need to be passed through to
> > -	 * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
> > -	 * appropriately.
> > -	 */
> > -	code = xfs_ioctl_setattr_dax_invalidate(ip, fa, &join_flags);
> > -	if (code)
> > -		goto error_free_dquots;
> > +	xfs_ioctl_setattr_dax_invalidate(ip, fa);
> >  
> > -	tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
> > +	tp = xfs_ioctl_setattr_get_trans(ip);
> >  	if (IS_ERR(tp)) {
> >  		code = PTR_ERR(tp);
> >  		goto error_free_dquots;
> > @@ -1552,7 +1497,6 @@ xfs_ioc_setxflags(
> >  	struct fsxattr		fa;
> >  	struct fsxattr		old_fa;
> >  	unsigned int		flags;
> > -	int			join_flags = 0;
> >  	int			error;
> >  
> >  	if (copy_from_user(&flags, arg, sizeof(flags)))
> > @@ -1569,18 +1513,9 @@ xfs_ioc_setxflags(
> >  	if (error)
> >  		return error;
> >  
> > -	/*
> > -	 * Changing DAX config may require inode locking for mapping
> > -	 * invalidation. These need to be held all the way to transaction commit
> > -	 * or cancel time, so need to be passed through to
> > -	 * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
> > -	 * appropriately.
> > -	 */
> > -	error = xfs_ioctl_setattr_dax_invalidate(ip, &fa, &join_flags);
> > -	if (error)
> > -		goto out_drop_write;
> > +	xfs_ioctl_setattr_dax_invalidate(ip, &fa);
> >  
> > -	tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
> > +	tp = xfs_ioctl_setattr_get_trans(ip);
> >  	if (IS_ERR(tp)) {
> >  		error = PTR_ERR(tp);
> >  		goto out_drop_write;
> > -- 
> > 2.25.1
> >
Dave Chinner April 20, 2020, 2:31 a.m. UTC | #3
On Tue, Apr 14, 2020 at 11:45:22PM -0700, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> We only support changing FS_XFLAG_DAX on directories.  Files get their
> flag from the parent directory on creation only.  So no data
> invalidation needs to happen.
> 
> Alter the xfs_ioctl_setattr_dax_invalidate() to be
> xfs_ioctl_setattr_dax_validate().  xfs_ioctl_setattr_dax_validate() now
> validates that any FS_XFLAG_DAX change is ok.
> 
> This also allows use to remove the join_flags logic.
> 
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> Changes from V7:
> 	Use new flag_inode_dontcache()
> 	Skip don't cache if mount over ride is active.
> 
> Changes from v6:
> 	Fix completely broken implementation and update commit message.
> 	Use the new VFS layer I_DONTCACHE to facilitate inode eviction
> 	and S_DAX changing on drop_caches
> 
> Changes from v5:
> 	New patch
> ---
>  fs/xfs/xfs_ioctl.c | 105 +++++++++------------------------------------
>  1 file changed, 20 insertions(+), 85 deletions(-)
> 
> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> index c6cd92ef4a05..75d4a830ef38 100644
> --- a/fs/xfs/xfs_ioctl.c
> +++ b/fs/xfs/xfs_ioctl.c
> @@ -1145,63 +1145,28 @@ xfs_ioctl_setattr_xflags(
>  }
>  
>  /*
> - * If we are changing DAX flags, we have to ensure the file is clean and any
> - * cached objects in the address space are invalidated and removed. This
> - * requires us to lock out other IO and page faults similar to a truncate
> - * operation. The locks need to be held until the transaction has been committed
> - * so that the cache invalidation is atomic with respect to the DAX flag
> - * manipulation.
> + * Mark inodes with a changing FS_XFLAG_DAX, I_DONTCACHE

That describes what the code is doing, not why.

>   */
> -static int
> +static void
>  xfs_ioctl_setattr_dax_invalidate(
>  	struct xfs_inode	*ip,
> -	struct fsxattr		*fa,
> -	int			*join_flags)
> +	struct fsxattr		*fa)

It's not an invalidation function anymore, so needs a name change.

> -	if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
> -		return 0;
> +	struct xfs_mount	*mp = ip->i_mount;
> +	struct inode            *inode = VFS_I(ip);
>  
>  	if (S_ISDIR(inode->i_mode))
> -		return 0;
> -
> -	/* lock, flush and invalidate mapping in preparation for flag change */
> -	xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> -	error = filemap_write_and_wait(inode->i_mapping);
> -	if (error)
> -		goto out_unlock;
> -	error = invalidate_inode_pages2(inode->i_mapping);
> -	if (error)
> -		goto out_unlock;
> +		return;
>  
> -	*join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL;
> -	return 0;
> -
> -out_unlock:
> -	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> -	return error;
> +	if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS ||
> +	    mp->m_flags & XFS_MOUNT_DAX_NEVER)
> +		return;

	if (mp->m_flags & (XFS_MOUNT_DAX_ALWAYS | XFS_MOUNT_DAX_NEVER))
		return;
> +	if (((fa->fsx_xflags & FS_XFLAG_DAX) &&
> +	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) ||
> +	    (!(fa->fsx_xflags & FS_XFLAG_DAX) &&
> +	     (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)))
> +		flag_inode_dontcache(inode);

This doesn't set the XFS inode's "don't cache" flag, despite it
having one that serves exactly the same purpose. IOWs, if the
XFS_IDONTCACHE flag is now redundant, please replace it's current
usage with this new flag and get rid of the XFS inode flag. i.e.
the only place we set XFS_IDONTCACHE can be replaced with a call to
this mark_inode_dontcache() call...

Cheers,

Dave.
Ira Weiny April 20, 2020, 6:36 p.m. UTC | #4
On Mon, Apr 20, 2020 at 12:31:31PM +1000, Dave Chinner wrote:
> On Tue, Apr 14, 2020 at 11:45:22PM -0700, ira.weiny@intel.com wrote:
> > From: Ira Weiny <ira.weiny@intel.com>
> > 
> > We only support changing FS_XFLAG_DAX on directories.  Files get their
> > flag from the parent directory on creation only.  So no data
> > invalidation needs to happen.
> > 
> > Alter the xfs_ioctl_setattr_dax_invalidate() to be
> > xfs_ioctl_setattr_dax_validate().  xfs_ioctl_setattr_dax_validate() now
> > validates that any FS_XFLAG_DAX change is ok.
> > 
> > This also allows use to remove the join_flags logic.
> > 
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > 
> > ---
> > Changes from V7:
> > 	Use new flag_inode_dontcache()
> > 	Skip don't cache if mount over ride is active.
> > 
> > Changes from v6:
> > 	Fix completely broken implementation and update commit message.
> > 	Use the new VFS layer I_DONTCACHE to facilitate inode eviction
> > 	and S_DAX changing on drop_caches
> > 
> > Changes from v5:
> > 	New patch
> > ---
> >  fs/xfs/xfs_ioctl.c | 105 +++++++++------------------------------------
> >  1 file changed, 20 insertions(+), 85 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> > index c6cd92ef4a05..75d4a830ef38 100644
> > --- a/fs/xfs/xfs_ioctl.c
> > +++ b/fs/xfs/xfs_ioctl.c
> > @@ -1145,63 +1145,28 @@ xfs_ioctl_setattr_xflags(
> >  }
> >  
> >  /*
> > - * If we are changing DAX flags, we have to ensure the file is clean and any
> > - * cached objects in the address space are invalidated and removed. This
> > - * requires us to lock out other IO and page faults similar to a truncate
> > - * operation. The locks need to be held until the transaction has been committed
> > - * so that the cache invalidation is atomic with respect to the DAX flag
> > - * manipulation.
> > + * Mark inodes with a changing FS_XFLAG_DAX, I_DONTCACHE
> 
> That describes what the code is doing, not why.

I'll remove the comment.

> 
> >   */
> > -static int
> > +static void
> >  xfs_ioctl_setattr_dax_invalidate(
> >  	struct xfs_inode	*ip,
> > -	struct fsxattr		*fa,
> > -	int			*join_flags)
> > +	struct fsxattr		*fa)
> 
> It's not an invalidation function anymore, so needs a name change.

Done.

> 
> > -	if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
> > -		return 0;
> > +	struct xfs_mount	*mp = ip->i_mount;
> > +	struct inode            *inode = VFS_I(ip);
> >  
> >  	if (S_ISDIR(inode->i_mode))
> > -		return 0;
> > -
> > -	/* lock, flush and invalidate mapping in preparation for flag change */
> > -	xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> > -	error = filemap_write_and_wait(inode->i_mapping);
> > -	if (error)
> > -		goto out_unlock;
> > -	error = invalidate_inode_pages2(inode->i_mapping);
> > -	if (error)
> > -		goto out_unlock;
> > +		return;
> >  
> > -	*join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL;
> > -	return 0;
> > -
> > -out_unlock:
> > -	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> > -	return error;
> > +	if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS ||
> > +	    mp->m_flags & XFS_MOUNT_DAX_NEVER)
> > +		return;
> 
> 	if (mp->m_flags & (XFS_MOUNT_DAX_ALWAYS | XFS_MOUNT_DAX_NEVER))
> 		return;
> > +	if (((fa->fsx_xflags & FS_XFLAG_DAX) &&
> > +	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) ||
> > +	    (!(fa->fsx_xflags & FS_XFLAG_DAX) &&
> > +	     (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)))
> > +		flag_inode_dontcache(inode);
> 
> This doesn't set the XFS inode's "don't cache" flag, despite it
> having one that serves exactly the same purpose.  IOWs, if the XFS_IDONTCACHE
> flag is now redundant, please replace it's current usage with this new flag
> and get rid of the XFS inode flag. i.e.  the only place we set XFS_IDONTCACHE
> can be replaced with a call to this mark_inode_dontcache() call...

I agree, and I would have removed XFS_IDONTCACHE, except I was not convinced
that XFS_IDONTCACHE was redundant.

Currently XFS_IDONTCACHE can be cleared if the inode is found in the cache and
I was unable to convince myself that it would be ok to remove it.  I mentioned
this to Darrick in V7.

https://lore.kernel.org/lkml/20200413194432.GD1649878@iweiny-DESK2.sc.intel.com/

What am I missing with this code?

xfs_iget_cache_hit():
...
        if (!(flags & XFS_IGET_INCORE))
		xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
...

Why is XFS_IDONTCACHE not 'sticky'?  And why does xfs_iget_cache_hit() clear it
rather than fail when XFS_IDONTCACHE is set?

Ira

> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
Dave Chinner April 21, 2020, 12:19 a.m. UTC | #5
On Mon, Apr 20, 2020 at 11:36:17AM -0700, Ira Weiny wrote:
> On Mon, Apr 20, 2020 at 12:31:31PM +1000, Dave Chinner wrote:
> > On Tue, Apr 14, 2020 at 11:45:22PM -0700, ira.weiny@intel.com wrote:
> > > From: Ira Weiny <ira.weiny@intel.com>
> > > -out_unlock:
> > > -	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
> > > -	return error;
> > > +	if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS ||
> > > +	    mp->m_flags & XFS_MOUNT_DAX_NEVER)
> > > +		return;
> > 
> > 	if (mp->m_flags & (XFS_MOUNT_DAX_ALWAYS | XFS_MOUNT_DAX_NEVER))
> > 		return;
> > > +	if (((fa->fsx_xflags & FS_XFLAG_DAX) &&
> > > +	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) ||
> > > +	    (!(fa->fsx_xflags & FS_XFLAG_DAX) &&
> > > +	     (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)))
> > > +		flag_inode_dontcache(inode);
> > 
> > This doesn't set the XFS inode's "don't cache" flag, despite it
> > having one that serves exactly the same purpose.  IOWs, if the XFS_IDONTCACHE
> > flag is now redundant, please replace it's current usage with this new flag
> > and get rid of the XFS inode flag. i.e.  the only place we set XFS_IDONTCACHE
> > can be replaced with a call to this mark_inode_dontcache() call...
> 
> I agree, and I would have removed XFS_IDONTCACHE, except I was not convinced
> that XFS_IDONTCACHE was redundant.
> 
> Currently XFS_IDONTCACHE can be cleared if the inode is found in the cache and
> I was unable to convince myself that it would be ok to remove it.  I mentioned
> this to Darrick in V7.
> 
> https://lore.kernel.org/lkml/20200413194432.GD1649878@iweiny-DESK2.sc.intel.com/
> 
> What am I missing with this code?
> 
> xfs_iget_cache_hit():
> ...
>         if (!(flags & XFS_IGET_INCORE))
> 		xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
> ...
> 
> Why is XFS_IDONTCACHE not 'sticky'?
> And why does xfs_iget_cache_hit() clear it

Because it was designed to do exactly what bulkstat required, and
nothing else.  xfs_iget() is an internal filesystem interface, not a
VFS level interface. Hence we can make up whatever semantics we
want. And if we get a cache hit, we have multiple references to the
inode so we probably should cache it regardless of whether the
original lookup said "I'm a one-shot wonder, so don't cache me".

IOWs, it's a classic "don't cache unless a second reference comes
along during the current life cycle" algorithm.

This isn't actually a frequently travelled path - bulkstat is a
pretty rare thing to be doing - so the behaviour is "be nice to the
cache because we can do it easily", not a hard requirement.

> rather than fail when XFS_IDONTCACHE is set?

Because then it would be impossible to access an inode that has
IDONTCACHE set on it. e.g. bulkstat an inode, now you can't open()
it because it has XFS_IDONTCACHE set and VFS pathwalk lookups fail
trying to resolve the inode number to a struct inode....

Same goes for I_DONTCACHE - this does not prevent new lookups from
taking references to the inode while it is still referenced. i.e.
the reference count can still go up after the flag is set. The flag
only takes effect when the reference count goes to zero.

Hence the only difference between XFS_IDONTCACHE and I_DONTCACHE is
the behaviour when cache hits on existing XFS_IDONTCACHE inodes
occur. It's not going to make a significant difference to cache
residency if we leave the I_DONTCACHE flag in place, because the
vast majority of inodes with that flag (from bulkstat) are still
one-shot wonders and hence the reclaim decision is still the
overwhelmingly correct decision to be making...

And, realistically, we have a second layer of inode caching in XFS
(the cluster buffers) and so it's likely if we evict and reclaim an
inode just before it gets re-used, then we'll hit the buffer cache
anyway. i.e. we still avoid the IO to read the inode back into
memory, we just burn a little more CPU re-instantiating it from the
buffer....

Cheers,

Dave.

Patch
diff mbox series

diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index c6cd92ef4a05..75d4a830ef38 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1145,63 +1145,28 @@  xfs_ioctl_setattr_xflags(
 }
 
 /*
- * If we are changing DAX flags, we have to ensure the file is clean and any
- * cached objects in the address space are invalidated and removed. This
- * requires us to lock out other IO and page faults similar to a truncate
- * operation. The locks need to be held until the transaction has been committed
- * so that the cache invalidation is atomic with respect to the DAX flag
- * manipulation.
+ * Mark inodes with a changing FS_XFLAG_DAX, I_DONTCACHE
  */
-static int
+static void
 xfs_ioctl_setattr_dax_invalidate(
 	struct xfs_inode	*ip,
-	struct fsxattr		*fa,
-	int			*join_flags)
+	struct fsxattr		*fa)
 {
-	struct inode		*inode = VFS_I(ip);
-	struct super_block	*sb = inode->i_sb;
-	int			error;
-
-	*join_flags = 0;
-
-	/*
-	 * It is only valid to set the DAX flag on regular files and
-	 * directories on filesystems where the block size is equal to the page
-	 * size. On directories it serves as an inherited hint so we don't
-	 * have to check the device for dax support or flush pagecache.
-	 */
-	if (fa->fsx_xflags & FS_XFLAG_DAX) {
-		struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
-
-		if (!bdev_dax_supported(target->bt_bdev, sb->s_blocksize))
-			return -EINVAL;
-	}
-
-	/* If the DAX state is not changing, we have nothing to do here. */
-	if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode))
-		return 0;
-	if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
-		return 0;
+	struct xfs_mount	*mp = ip->i_mount;
+	struct inode            *inode = VFS_I(ip);
 
 	if (S_ISDIR(inode->i_mode))
-		return 0;
-
-	/* lock, flush and invalidate mapping in preparation for flag change */
-	xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
-	error = filemap_write_and_wait(inode->i_mapping);
-	if (error)
-		goto out_unlock;
-	error = invalidate_inode_pages2(inode->i_mapping);
-	if (error)
-		goto out_unlock;
+		return;
 
-	*join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL;
-	return 0;
-
-out_unlock:
-	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
-	return error;
+	if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS ||
+	    mp->m_flags & XFS_MOUNT_DAX_NEVER)
+		return;
 
+	if (((fa->fsx_xflags & FS_XFLAG_DAX) &&
+	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) ||
+	    (!(fa->fsx_xflags & FS_XFLAG_DAX) &&
+	     (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)))
+		flag_inode_dontcache(inode);
 }
 
 /*
@@ -1209,17 +1174,10 @@  xfs_ioctl_setattr_dax_invalidate(
  * have permission to do so. On success, return a clean transaction and the
  * inode locked exclusively ready for further operation specific checks. On
  * failure, return an error without modifying or locking the inode.
- *
- * The inode might already be IO locked on call. If this is the case, it is
- * indicated in @join_flags and we take full responsibility for ensuring they
- * are unlocked from now on. Hence if we have an error here, we still have to
- * unlock them. Otherwise, once they are joined to the transaction, they will
- * be unlocked on commit/cancel.
  */
 static struct xfs_trans *
 xfs_ioctl_setattr_get_trans(
-	struct xfs_inode	*ip,
-	int			join_flags)
+	struct xfs_inode	*ip)
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
@@ -1236,8 +1194,7 @@  xfs_ioctl_setattr_get_trans(
 		goto out_unlock;
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags);
-	join_flags = 0;
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
 	/*
 	 * CAP_FOWNER overrides the following restrictions:
@@ -1258,8 +1215,6 @@  xfs_ioctl_setattr_get_trans(
 out_cancel:
 	xfs_trans_cancel(tp);
 out_unlock:
-	if (join_flags)
-		xfs_iunlock(ip, join_flags);
 	return ERR_PTR(error);
 }
 
@@ -1386,7 +1341,6 @@  xfs_ioctl_setattr(
 	struct xfs_dquot	*pdqp = NULL;
 	struct xfs_dquot	*olddquot = NULL;
 	int			code;
-	int			join_flags = 0;
 
 	trace_xfs_ioctl_setattr(ip);
 
@@ -1410,18 +1364,9 @@  xfs_ioctl_setattr(
 			return code;
 	}
 
-	/*
-	 * Changing DAX config may require inode locking for mapping
-	 * invalidation. These need to be held all the way to transaction commit
-	 * or cancel time, so need to be passed through to
-	 * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
-	 * appropriately.
-	 */
-	code = xfs_ioctl_setattr_dax_invalidate(ip, fa, &join_flags);
-	if (code)
-		goto error_free_dquots;
+	xfs_ioctl_setattr_dax_invalidate(ip, fa);
 
-	tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
+	tp = xfs_ioctl_setattr_get_trans(ip);
 	if (IS_ERR(tp)) {
 		code = PTR_ERR(tp);
 		goto error_free_dquots;
@@ -1552,7 +1497,6 @@  xfs_ioc_setxflags(
 	struct fsxattr		fa;
 	struct fsxattr		old_fa;
 	unsigned int		flags;
-	int			join_flags = 0;
 	int			error;
 
 	if (copy_from_user(&flags, arg, sizeof(flags)))
@@ -1569,18 +1513,9 @@  xfs_ioc_setxflags(
 	if (error)
 		return error;
 
-	/*
-	 * Changing DAX config may require inode locking for mapping
-	 * invalidation. These need to be held all the way to transaction commit
-	 * or cancel time, so need to be passed through to
-	 * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
-	 * appropriately.
-	 */
-	error = xfs_ioctl_setattr_dax_invalidate(ip, &fa, &join_flags);
-	if (error)
-		goto out_drop_write;
+	xfs_ioctl_setattr_dax_invalidate(ip, &fa);
 
-	tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
+	tp = xfs_ioctl_setattr_get_trans(ip);
 	if (IS_ERR(tp)) {
 		error = PTR_ERR(tp);
 		goto out_drop_write;