All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
To: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w@public.gmane.org,
	xfs-VZNHf3L845pBDgjK7y7TUQ@public.gmane.org
Subject: Re: [PATCH 7/8] xfs: split direct I/O and DAX path
Date: Wed, 28 Sep 2016 19:53:52 -0700	[thread overview]
Message-ID: <20160929025351.GB4901@birch.djwong.org> (raw)
In-Reply-To: <1466609236-23801-8-git-send-email-hch-jcswGhMUV9g@public.gmane.org>

On Wed, Jun 22, 2016 at 05:27:15PM +0200, Christoph Hellwig wrote:
> So far the DAX code overloaded the direct I/O code path.  There is very little
> in common between the two, and untangling them allows to clean up both variants.
> 
> As a ѕide effect we also get separate trace points for both I/O types.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_file.c  | 139 ++++++++++++++++++++++++++++++++++++++++++-----------
>  fs/xfs/xfs_trace.h |   2 +
>  2 files changed, 112 insertions(+), 29 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 24b267d..0e74325 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -262,13 +262,11 @@ xfs_file_dio_aio_read(
>  	else
>  		target = ip->i_mount->m_ddev_targp;
>  
> -	if (!IS_DAX(inode)) {
> -		/* DIO must be aligned to device logical sector size */
> -		if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
> -			if (iocb->ki_pos == isize)
> -				return 0;
> -			return -EINVAL;
> -		}
> +	/* DIO must be aligned to device logical sector size */
> +	if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
> +		if (iocb->ki_pos == isize)
> +			return 0;
> +		return -EINVAL;
>  	}
>  
>  	/*
> @@ -317,13 +315,37 @@ xfs_file_dio_aio_read(
>  	}
>  
>  	data = *to;
> -	if (IS_DAX(inode)) {
> -		ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
> -				NULL, 0);
> -	} else {
> -		ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> -				xfs_get_blocks_direct, NULL, NULL, 0);
> +	ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> +			xfs_get_blocks_direct, NULL, NULL, 0);
> +	if (ret > 0) {
> +		iocb->ki_pos += ret;
> +		iov_iter_advance(to, ret);
>  	}
> +	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
> +
> +	file_accessed(iocb->ki_filp);

So I noticed that generic/323 starts crashing in file_accessed -> touch_atime
because iocb->ki_filp->f_path.dentry == NULL.  For a while I thought it was
some weird reflink bug, but I finally had time to go build a vanilla 4.8-rc8
kernel and that blew up here too.  I'm not sure why this line got inserted
here, since it wasn't there prior to this patch, AFAICT.

<shrug>

--D

> +	return ret;
> +}
> +
> +STATIC ssize_t
> +xfs_file_dax_read(
> +	struct kiocb		*iocb,
> +	struct iov_iter		*to)
> +{
> +	struct address_space	*mapping = iocb->ki_filp->f_mapping;
> +	struct inode		*inode = mapping->host;
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	struct iov_iter		data = *to;
> +	size_t			count = iov_iter_count(to);
> +	ssize_t			ret = 0;
> +
> +	trace_xfs_file_dax_read(ip, count, iocb->ki_pos);
> +
> +	if (!count)
> +		return 0; /* skip atime */
> +
> +	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
> +	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
>  	if (ret > 0) {
>  		iocb->ki_pos += ret;
>  		iov_iter_advance(to, ret);
> @@ -356,7 +378,8 @@ xfs_file_read_iter(
>  	struct kiocb		*iocb,
>  	struct iov_iter		*to)
>  {
> -	struct xfs_mount	*mp = XFS_I(file_inode(iocb->ki_filp))->i_mount;
> +	struct inode		*inode = file_inode(iocb->ki_filp);
> +	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
>  	ssize_t			ret = 0;
>  
>  	XFS_STATS_INC(mp, xs_read_calls);
> @@ -364,7 +387,9 @@ xfs_file_read_iter(
>  	if (XFS_FORCED_SHUTDOWN(mp))
>  		return -EIO;
>  
> -	if (iocb->ki_flags & IOCB_DIRECT)
> +	if (IS_DAX(inode))
> +		ret = xfs_file_dax_read(iocb, to);
> +	else if (iocb->ki_flags & IOCB_DIRECT)
>  		ret = xfs_file_dio_aio_read(iocb, to);
>  	else
>  		ret = xfs_file_buffered_aio_read(iocb, to);
> @@ -586,8 +611,7 @@ xfs_file_dio_aio_write(
>  					mp->m_rtdev_targp : mp->m_ddev_targp;
>  
>  	/* DIO must be aligned to device logical sector size */
> -	if (!IS_DAX(inode) &&
> -	    ((iocb->ki_pos | count) & target->bt_logical_sectormask))
> +	if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
>  		return -EINVAL;
>  
>  	/* "unaligned" here means not aligned to a filesystem block */
> @@ -656,14 +680,9 @@ xfs_file_dio_aio_write(
>  	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
>  
>  	data = *from;
> -	if (IS_DAX(inode)) {
> -		ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
> -				xfs_end_io_direct_write, 0);
> -	} else {
> -		ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> -				xfs_get_blocks_direct, xfs_end_io_direct_write,
> -				NULL, DIO_ASYNC_EXTEND);
> -	}
> +	ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> +			xfs_get_blocks_direct, xfs_end_io_direct_write,
> +			NULL, DIO_ASYNC_EXTEND);
>  
>  	/* see generic_file_direct_write() for why this is necessary */
>  	if (mapping->nrpages) {
> @@ -680,10 +699,70 @@ out:
>  	xfs_rw_iunlock(ip, iolock);
>  
>  	/*
> -	 * No fallback to buffered IO on errors for XFS. DAX can result in
> -	 * partial writes, but direct IO will either complete fully or fail.
> +	 * No fallback to buffered IO on errors for XFS, direct IO will either
> +	 * complete fully or fail.
> +	 */
> +	ASSERT(ret < 0 || ret == count);
> +	return ret;
> +}
> +
> +STATIC ssize_t
> +xfs_file_dax_write(
> +	struct kiocb		*iocb,
> +	struct iov_iter		*from)
> +{
> +	struct address_space	*mapping = iocb->ki_filp->f_mapping;
> +	struct inode		*inode = mapping->host;
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	struct xfs_mount	*mp = ip->i_mount;
> +	ssize_t			ret = 0;
> +	int			unaligned_io = 0;
> +	int			iolock;
> +	struct iov_iter		data;
> +
> +	/* "unaligned" here means not aligned to a filesystem block */
> +	if ((iocb->ki_pos & mp->m_blockmask) ||
> +	    ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
> +		unaligned_io = 1;
> +		iolock = XFS_IOLOCK_EXCL;
> +	} else if (mapping->nrpages) {
> +		iolock = XFS_IOLOCK_EXCL;
> +	} else {
> +		iolock = XFS_IOLOCK_SHARED;
> +	}
> +	xfs_rw_ilock(ip, iolock);
> +
> +	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
> +	if (ret)
> +		goto out;
> +
> +	/*
> +	 * Yes, even DAX files can have page cache attached to them:  A zeroed
> +	 * page is inserted into the pagecache when we have to serve a write
> +	 * fault on a hole.  It should never be dirtied and can simply be
> +	 * dropped from the pagecache once we get real data for the page.
>  	 */
> -	ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
> +	if (mapping->nrpages) {
> +		ret = invalidate_inode_pages2(mapping);
> +		WARN_ON_ONCE(ret);
> +	}
> +
> +	if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
> +		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
> +		iolock = XFS_IOLOCK_SHARED;
> +	}
> +
> +	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
> +
> +	data = *from;
> +	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
> +			xfs_end_io_direct_write, 0);
> +	if (ret > 0) {
> +		iocb->ki_pos += ret;
> +		iov_iter_advance(from, ret);
> +	}
> +out:
> +	xfs_rw_iunlock(ip, iolock);
>  	return ret;
>  }
>  
> @@ -765,7 +844,9 @@ xfs_file_write_iter(
>  	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
>  		return -EIO;
>  
> -	if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
> +	if (IS_DAX(inode))
> +		ret = xfs_file_dax_write(iocb, from);
> +	else if (iocb->ki_flags & IOCB_DIRECT)
>  		ret = xfs_file_dio_aio_write(iocb, from);
>  	else
>  		ret = xfs_file_buffered_aio_write(iocb, from);
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 2504f94..1451690 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -1165,8 +1165,10 @@ DEFINE_EVENT(xfs_file_class, name,	\
>  	TP_ARGS(ip, count, offset))
>  DEFINE_RW_EVENT(xfs_file_buffered_read);
>  DEFINE_RW_EVENT(xfs_file_direct_read);
> +DEFINE_RW_EVENT(xfs_file_dax_read);
>  DEFINE_RW_EVENT(xfs_file_buffered_write);
>  DEFINE_RW_EVENT(xfs_file_direct_write);
> +DEFINE_RW_EVENT(xfs_file_dax_write);
>  DEFINE_RW_EVENT(xfs_file_splice_read);
>  
>  DECLARE_EVENT_CLASS(xfs_page_class,
> -- 
> 2.1.4
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

WARNING: multiple messages have this Message-ID (diff)
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: Christoph Hellwig <hch@lst.de>
Cc: xfs@oss.sgi.com, linux-fsdevel@vger.kernel.org, linux-nvdimm@ml01.01.org
Subject: Re: [PATCH 7/8] xfs: split direct I/O and DAX path
Date: Wed, 28 Sep 2016 19:53:52 -0700	[thread overview]
Message-ID: <20160929025351.GB4901@birch.djwong.org> (raw)
In-Reply-To: <1466609236-23801-8-git-send-email-hch@lst.de>

On Wed, Jun 22, 2016 at 05:27:15PM +0200, Christoph Hellwig wrote:
> So far the DAX code overloaded the direct I/O code path.  There is very little
> in common between the two, and untangling them allows to clean up both variants.
> 
> As a ѕide effect we also get separate trace points for both I/O types.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_file.c  | 139 ++++++++++++++++++++++++++++++++++++++++++-----------
>  fs/xfs/xfs_trace.h |   2 +
>  2 files changed, 112 insertions(+), 29 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 24b267d..0e74325 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -262,13 +262,11 @@ xfs_file_dio_aio_read(
>  	else
>  		target = ip->i_mount->m_ddev_targp;
>  
> -	if (!IS_DAX(inode)) {
> -		/* DIO must be aligned to device logical sector size */
> -		if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
> -			if (iocb->ki_pos == isize)
> -				return 0;
> -			return -EINVAL;
> -		}
> +	/* DIO must be aligned to device logical sector size */
> +	if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
> +		if (iocb->ki_pos == isize)
> +			return 0;
> +		return -EINVAL;
>  	}
>  
>  	/*
> @@ -317,13 +315,37 @@ xfs_file_dio_aio_read(
>  	}
>  
>  	data = *to;
> -	if (IS_DAX(inode)) {
> -		ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
> -				NULL, 0);
> -	} else {
> -		ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> -				xfs_get_blocks_direct, NULL, NULL, 0);
> +	ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> +			xfs_get_blocks_direct, NULL, NULL, 0);
> +	if (ret > 0) {
> +		iocb->ki_pos += ret;
> +		iov_iter_advance(to, ret);
>  	}
> +	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
> +
> +	file_accessed(iocb->ki_filp);

So I noticed that generic/323 starts crashing in file_accessed -> touch_atime
because iocb->ki_filp->f_path.dentry == NULL.  For a while I thought it was
some weird reflink bug, but I finally had time to go build a vanilla 4.8-rc8
kernel and that blew up here too.  I'm not sure why this line got inserted
here, since it wasn't there prior to this patch, AFAICT.

<shrug>

--D

> +	return ret;
> +}
> +
> +STATIC ssize_t
> +xfs_file_dax_read(
> +	struct kiocb		*iocb,
> +	struct iov_iter		*to)
> +{
> +	struct address_space	*mapping = iocb->ki_filp->f_mapping;
> +	struct inode		*inode = mapping->host;
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	struct iov_iter		data = *to;
> +	size_t			count = iov_iter_count(to);
> +	ssize_t			ret = 0;
> +
> +	trace_xfs_file_dax_read(ip, count, iocb->ki_pos);
> +
> +	if (!count)
> +		return 0; /* skip atime */
> +
> +	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
> +	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
>  	if (ret > 0) {
>  		iocb->ki_pos += ret;
>  		iov_iter_advance(to, ret);
> @@ -356,7 +378,8 @@ xfs_file_read_iter(
>  	struct kiocb		*iocb,
>  	struct iov_iter		*to)
>  {
> -	struct xfs_mount	*mp = XFS_I(file_inode(iocb->ki_filp))->i_mount;
> +	struct inode		*inode = file_inode(iocb->ki_filp);
> +	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
>  	ssize_t			ret = 0;
>  
>  	XFS_STATS_INC(mp, xs_read_calls);
> @@ -364,7 +387,9 @@ xfs_file_read_iter(
>  	if (XFS_FORCED_SHUTDOWN(mp))
>  		return -EIO;
>  
> -	if (iocb->ki_flags & IOCB_DIRECT)
> +	if (IS_DAX(inode))
> +		ret = xfs_file_dax_read(iocb, to);
> +	else if (iocb->ki_flags & IOCB_DIRECT)
>  		ret = xfs_file_dio_aio_read(iocb, to);
>  	else
>  		ret = xfs_file_buffered_aio_read(iocb, to);
> @@ -586,8 +611,7 @@ xfs_file_dio_aio_write(
>  					mp->m_rtdev_targp : mp->m_ddev_targp;
>  
>  	/* DIO must be aligned to device logical sector size */
> -	if (!IS_DAX(inode) &&
> -	    ((iocb->ki_pos | count) & target->bt_logical_sectormask))
> +	if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
>  		return -EINVAL;
>  
>  	/* "unaligned" here means not aligned to a filesystem block */
> @@ -656,14 +680,9 @@ xfs_file_dio_aio_write(
>  	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
>  
>  	data = *from;
> -	if (IS_DAX(inode)) {
> -		ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
> -				xfs_end_io_direct_write, 0);
> -	} else {
> -		ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> -				xfs_get_blocks_direct, xfs_end_io_direct_write,
> -				NULL, DIO_ASYNC_EXTEND);
> -	}
> +	ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> +			xfs_get_blocks_direct, xfs_end_io_direct_write,
> +			NULL, DIO_ASYNC_EXTEND);
>  
>  	/* see generic_file_direct_write() for why this is necessary */
>  	if (mapping->nrpages) {
> @@ -680,10 +699,70 @@ out:
>  	xfs_rw_iunlock(ip, iolock);
>  
>  	/*
> -	 * No fallback to buffered IO on errors for XFS. DAX can result in
> -	 * partial writes, but direct IO will either complete fully or fail.
> +	 * No fallback to buffered IO on errors for XFS, direct IO will either
> +	 * complete fully or fail.
> +	 */
> +	ASSERT(ret < 0 || ret == count);
> +	return ret;
> +}
> +
> +STATIC ssize_t
> +xfs_file_dax_write(
> +	struct kiocb		*iocb,
> +	struct iov_iter		*from)
> +{
> +	struct address_space	*mapping = iocb->ki_filp->f_mapping;
> +	struct inode		*inode = mapping->host;
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	struct xfs_mount	*mp = ip->i_mount;
> +	ssize_t			ret = 0;
> +	int			unaligned_io = 0;
> +	int			iolock;
> +	struct iov_iter		data;
> +
> +	/* "unaligned" here means not aligned to a filesystem block */
> +	if ((iocb->ki_pos & mp->m_blockmask) ||
> +	    ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
> +		unaligned_io = 1;
> +		iolock = XFS_IOLOCK_EXCL;
> +	} else if (mapping->nrpages) {
> +		iolock = XFS_IOLOCK_EXCL;
> +	} else {
> +		iolock = XFS_IOLOCK_SHARED;
> +	}
> +	xfs_rw_ilock(ip, iolock);
> +
> +	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
> +	if (ret)
> +		goto out;
> +
> +	/*
> +	 * Yes, even DAX files can have page cache attached to them:  A zeroed
> +	 * page is inserted into the pagecache when we have to serve a write
> +	 * fault on a hole.  It should never be dirtied and can simply be
> +	 * dropped from the pagecache once we get real data for the page.
>  	 */
> -	ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
> +	if (mapping->nrpages) {
> +		ret = invalidate_inode_pages2(mapping);
> +		WARN_ON_ONCE(ret);
> +	}
> +
> +	if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
> +		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
> +		iolock = XFS_IOLOCK_SHARED;
> +	}
> +
> +	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
> +
> +	data = *from;
> +	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
> +			xfs_end_io_direct_write, 0);
> +	if (ret > 0) {
> +		iocb->ki_pos += ret;
> +		iov_iter_advance(from, ret);
> +	}
> +out:
> +	xfs_rw_iunlock(ip, iolock);
>  	return ret;
>  }
>  
> @@ -765,7 +844,9 @@ xfs_file_write_iter(
>  	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
>  		return -EIO;
>  
> -	if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
> +	if (IS_DAX(inode))
> +		ret = xfs_file_dax_write(iocb, from);
> +	else if (iocb->ki_flags & IOCB_DIRECT)
>  		ret = xfs_file_dio_aio_write(iocb, from);
>  	else
>  		ret = xfs_file_buffered_aio_write(iocb, from);
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 2504f94..1451690 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -1165,8 +1165,10 @@ DEFINE_EVENT(xfs_file_class, name,	\
>  	TP_ARGS(ip, count, offset))
>  DEFINE_RW_EVENT(xfs_file_buffered_read);
>  DEFINE_RW_EVENT(xfs_file_direct_read);
> +DEFINE_RW_EVENT(xfs_file_dax_read);
>  DEFINE_RW_EVENT(xfs_file_buffered_write);
>  DEFINE_RW_EVENT(xfs_file_direct_write);
> +DEFINE_RW_EVENT(xfs_file_dax_write);
>  DEFINE_RW_EVENT(xfs_file_splice_read);
>  
>  DECLARE_EVENT_CLASS(xfs_page_class,
> -- 
> 2.1.4
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

WARNING: multiple messages have this Message-ID (diff)
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: Christoph Hellwig <hch@lst.de>
Cc: linux-fsdevel@vger.kernel.org, linux-nvdimm@ml01.01.org, xfs@oss.sgi.com
Subject: Re: [PATCH 7/8] xfs: split direct I/O and DAX path
Date: Wed, 28 Sep 2016 19:53:52 -0700	[thread overview]
Message-ID: <20160929025351.GB4901@birch.djwong.org> (raw)
In-Reply-To: <1466609236-23801-8-git-send-email-hch@lst.de>

On Wed, Jun 22, 2016 at 05:27:15PM +0200, Christoph Hellwig wrote:
> So far the DAX code overloaded the direct I/O code path.  There is very little
> in common between the two, and untangling them allows to clean up both variants.
> 
> As a ѕide effect we also get separate trace points for both I/O types.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_file.c  | 139 ++++++++++++++++++++++++++++++++++++++++++-----------
>  fs/xfs/xfs_trace.h |   2 +
>  2 files changed, 112 insertions(+), 29 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 24b267d..0e74325 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -262,13 +262,11 @@ xfs_file_dio_aio_read(
>  	else
>  		target = ip->i_mount->m_ddev_targp;
>  
> -	if (!IS_DAX(inode)) {
> -		/* DIO must be aligned to device logical sector size */
> -		if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
> -			if (iocb->ki_pos == isize)
> -				return 0;
> -			return -EINVAL;
> -		}
> +	/* DIO must be aligned to device logical sector size */
> +	if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
> +		if (iocb->ki_pos == isize)
> +			return 0;
> +		return -EINVAL;
>  	}
>  
>  	/*
> @@ -317,13 +315,37 @@ xfs_file_dio_aio_read(
>  	}
>  
>  	data = *to;
> -	if (IS_DAX(inode)) {
> -		ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
> -				NULL, 0);
> -	} else {
> -		ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> -				xfs_get_blocks_direct, NULL, NULL, 0);
> +	ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> +			xfs_get_blocks_direct, NULL, NULL, 0);
> +	if (ret > 0) {
> +		iocb->ki_pos += ret;
> +		iov_iter_advance(to, ret);
>  	}
> +	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
> +
> +	file_accessed(iocb->ki_filp);

So I noticed that generic/323 starts crashing in file_accessed -> touch_atime
because iocb->ki_filp->f_path.dentry == NULL.  For a while I thought it was
some weird reflink bug, but I finally had time to go build a vanilla 4.8-rc8
kernel and that blew up here too.  I'm not sure why this line got inserted
here, since it wasn't there prior to this patch, AFAICT.

<shrug>

--D

> +	return ret;
> +}
> +
> +STATIC ssize_t
> +xfs_file_dax_read(
> +	struct kiocb		*iocb,
> +	struct iov_iter		*to)
> +{
> +	struct address_space	*mapping = iocb->ki_filp->f_mapping;
> +	struct inode		*inode = mapping->host;
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	struct iov_iter		data = *to;
> +	size_t			count = iov_iter_count(to);
> +	ssize_t			ret = 0;
> +
> +	trace_xfs_file_dax_read(ip, count, iocb->ki_pos);
> +
> +	if (!count)
> +		return 0; /* skip atime */
> +
> +	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
> +	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
>  	if (ret > 0) {
>  		iocb->ki_pos += ret;
>  		iov_iter_advance(to, ret);
> @@ -356,7 +378,8 @@ xfs_file_read_iter(
>  	struct kiocb		*iocb,
>  	struct iov_iter		*to)
>  {
> -	struct xfs_mount	*mp = XFS_I(file_inode(iocb->ki_filp))->i_mount;
> +	struct inode		*inode = file_inode(iocb->ki_filp);
> +	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
>  	ssize_t			ret = 0;
>  
>  	XFS_STATS_INC(mp, xs_read_calls);
> @@ -364,7 +387,9 @@ xfs_file_read_iter(
>  	if (XFS_FORCED_SHUTDOWN(mp))
>  		return -EIO;
>  
> -	if (iocb->ki_flags & IOCB_DIRECT)
> +	if (IS_DAX(inode))
> +		ret = xfs_file_dax_read(iocb, to);
> +	else if (iocb->ki_flags & IOCB_DIRECT)
>  		ret = xfs_file_dio_aio_read(iocb, to);
>  	else
>  		ret = xfs_file_buffered_aio_read(iocb, to);
> @@ -586,8 +611,7 @@ xfs_file_dio_aio_write(
>  					mp->m_rtdev_targp : mp->m_ddev_targp;
>  
>  	/* DIO must be aligned to device logical sector size */
> -	if (!IS_DAX(inode) &&
> -	    ((iocb->ki_pos | count) & target->bt_logical_sectormask))
> +	if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
>  		return -EINVAL;
>  
>  	/* "unaligned" here means not aligned to a filesystem block */
> @@ -656,14 +680,9 @@ xfs_file_dio_aio_write(
>  	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
>  
>  	data = *from;
> -	if (IS_DAX(inode)) {
> -		ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
> -				xfs_end_io_direct_write, 0);
> -	} else {
> -		ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> -				xfs_get_blocks_direct, xfs_end_io_direct_write,
> -				NULL, DIO_ASYNC_EXTEND);
> -	}
> +	ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> +			xfs_get_blocks_direct, xfs_end_io_direct_write,
> +			NULL, DIO_ASYNC_EXTEND);
>  
>  	/* see generic_file_direct_write() for why this is necessary */
>  	if (mapping->nrpages) {
> @@ -680,10 +699,70 @@ out:
>  	xfs_rw_iunlock(ip, iolock);
>  
>  	/*
> -	 * No fallback to buffered IO on errors for XFS. DAX can result in
> -	 * partial writes, but direct IO will either complete fully or fail.
> +	 * No fallback to buffered IO on errors for XFS, direct IO will either
> +	 * complete fully or fail.
> +	 */
> +	ASSERT(ret < 0 || ret == count);
> +	return ret;
> +}
> +
> +STATIC ssize_t
> +xfs_file_dax_write(
> +	struct kiocb		*iocb,
> +	struct iov_iter		*from)
> +{
> +	struct address_space	*mapping = iocb->ki_filp->f_mapping;
> +	struct inode		*inode = mapping->host;
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	struct xfs_mount	*mp = ip->i_mount;
> +	ssize_t			ret = 0;
> +	int			unaligned_io = 0;
> +	int			iolock;
> +	struct iov_iter		data;
> +
> +	/* "unaligned" here means not aligned to a filesystem block */
> +	if ((iocb->ki_pos & mp->m_blockmask) ||
> +	    ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
> +		unaligned_io = 1;
> +		iolock = XFS_IOLOCK_EXCL;
> +	} else if (mapping->nrpages) {
> +		iolock = XFS_IOLOCK_EXCL;
> +	} else {
> +		iolock = XFS_IOLOCK_SHARED;
> +	}
> +	xfs_rw_ilock(ip, iolock);
> +
> +	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
> +	if (ret)
> +		goto out;
> +
> +	/*
> +	 * Yes, even DAX files can have page cache attached to them:  A zeroed
> +	 * page is inserted into the pagecache when we have to serve a write
> +	 * fault on a hole.  It should never be dirtied and can simply be
> +	 * dropped from the pagecache once we get real data for the page.
>  	 */
> -	ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
> +	if (mapping->nrpages) {
> +		ret = invalidate_inode_pages2(mapping);
> +		WARN_ON_ONCE(ret);
> +	}
> +
> +	if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
> +		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
> +		iolock = XFS_IOLOCK_SHARED;
> +	}
> +
> +	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
> +
> +	data = *from;
> +	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
> +			xfs_end_io_direct_write, 0);
> +	if (ret > 0) {
> +		iocb->ki_pos += ret;
> +		iov_iter_advance(from, ret);
> +	}
> +out:
> +	xfs_rw_iunlock(ip, iolock);
>  	return ret;
>  }
>  
> @@ -765,7 +844,9 @@ xfs_file_write_iter(
>  	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
>  		return -EIO;
>  
> -	if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
> +	if (IS_DAX(inode))
> +		ret = xfs_file_dax_write(iocb, from);
> +	else if (iocb->ki_flags & IOCB_DIRECT)
>  		ret = xfs_file_dio_aio_write(iocb, from);
>  	else
>  		ret = xfs_file_buffered_aio_write(iocb, from);
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 2504f94..1451690 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -1165,8 +1165,10 @@ DEFINE_EVENT(xfs_file_class, name,	\
>  	TP_ARGS(ip, count, offset))
>  DEFINE_RW_EVENT(xfs_file_buffered_read);
>  DEFINE_RW_EVENT(xfs_file_direct_read);
> +DEFINE_RW_EVENT(xfs_file_dax_read);
>  DEFINE_RW_EVENT(xfs_file_buffered_write);
>  DEFINE_RW_EVENT(xfs_file_direct_write);
> +DEFINE_RW_EVENT(xfs_file_dax_write);
>  DEFINE_RW_EVENT(xfs_file_splice_read);
>  
>  DECLARE_EVENT_CLASS(xfs_page_class,
> -- 
> 2.1.4
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  parent reply	other threads:[~2016-09-29  2:53 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-22 15:27 xfs: untangle the direct I/O and DAX path, fix DAX locking Christoph Hellwig
2016-06-22 15:27 ` Christoph Hellwig
2016-06-22 15:27 ` Christoph Hellwig
     [not found] ` <1466609236-23801-1-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-06-22 15:27   ` [PATCH 1/8] xfs: don't pass ioflags around in the ioctl path Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
2016-06-22 15:27   ` [PATCH 2/8] xfs: kill ioflags Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
2016-06-22 15:27   ` [PATCH 3/8] xfs: remove s_maxbytes enforcement in xfs_file_read_iter Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
2016-06-22 15:27   ` [PATCH 4/8] xfs: split xfs_file_read_iter into buffered and direct I/O helpers Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
2016-06-22 15:27   ` [PATCH 5/8] xfs: stop using generic_file_read_iter for direct I/O Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
2016-06-22 15:27   ` [PATCH 6/8] xfs: direct calls in the direct I/O path Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
2016-06-22 15:27   ` [PATCH 7/8] xfs: split direct I/O and DAX path Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
     [not found]     ` <1466609236-23801-8-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-29  2:53       ` Darrick J. Wong [this message]
2016-09-29  2:53         ` Darrick J. Wong
2016-09-29  2:53         ` Darrick J. Wong
2016-09-29  8:38         ` aio completions vs file_accessed race, was: " Christoph Hellwig
2016-09-29  8:38           ` Christoph Hellwig
2016-09-29 20:18           ` Christoph Hellwig
2016-09-29 20:18             ` Christoph Hellwig
2016-09-29 20:18             ` Christoph Hellwig
2016-09-29 20:18               ` Christoph Hellwig
2016-09-29 20:33               ` Darrick J. Wong
2016-09-29 20:33                 ` Darrick J. Wong
2016-06-22 15:27   ` [PATCH 8/8] xfs: fix locking for DAX writes Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
2016-06-22 15:27     ` Christoph Hellwig
2016-06-23 14:22     ` Boaz Harrosh
2016-06-23 14:22       ` Boaz Harrosh
2016-06-23 23:24   ` xfs: untangle the direct I/O and DAX path, fix DAX locking Dave Chinner
2016-06-23 23:24     ` Dave Chinner
2016-06-23 23:24     ` Dave Chinner
2016-06-24  1:14     ` Dan Williams
2016-06-24  1:14       ` Dan Williams
2016-06-24  7:13       ` Dave Chinner
2016-06-24  7:13         ` Dave Chinner
2016-06-24  7:31         ` Christoph Hellwig
2016-06-24  7:31           ` Christoph Hellwig
2016-06-24  7:26     ` Christoph Hellwig
2016-06-24  7:26       ` Christoph Hellwig
2016-06-24  7:26       ` Christoph Hellwig
2016-06-24 23:00       ` Dave Chinner
2016-06-24 23:00         ` Dave Chinner
2016-06-28 13:10         ` Christoph Hellwig
2016-06-28 13:10           ` Christoph Hellwig
2016-06-28 13:10           ` Christoph Hellwig
     [not found]           ` <20160628131059.GA30475-jcswGhMUV9g@public.gmane.org>
2016-06-28 13:27             ` Boaz Harrosh
2016-06-28 13:27               ` Boaz Harrosh
2016-06-28 13:27               ` Boaz Harrosh
     [not found]               ` <57727B27.7060104-/8YdC2HfS5554TAoqtyWWQ@public.gmane.org>
2016-06-28 13:39                 ` Christoph Hellwig
2016-06-28 13:39                   ` Christoph Hellwig
2016-06-28 13:39                   ` Christoph Hellwig
     [not found]                   ` <20160628133928.GB31283-jcswGhMUV9g@public.gmane.org>
2016-06-28 13:56                     ` Boaz Harrosh
2016-06-28 13:56                       ` Boaz Harrosh
2016-06-28 13:56                       ` Boaz Harrosh
     [not found]                       ` <5772820E.2080403-/8YdC2HfS5554TAoqtyWWQ@public.gmane.org>
2016-06-28 15:39                         ` Christoph Hellwig
2016-06-28 15:39                           ` Christoph Hellwig
2016-06-28 15:39                           ` Christoph Hellwig
     [not found]                           ` <20160628153925.GA2643-jcswGhMUV9g@public.gmane.org>
2016-06-29 12:23                             ` Boaz Harrosh
2016-06-29 12:23                               ` Boaz Harrosh
2016-06-29 12:23                               ` Boaz Harrosh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160929025351.GB4901@birch.djwong.org \
    --to=darrick.wong-qhclzuegtsvqt0dzr+alfa@public.gmane.org \
    --cc=hch-jcswGhMUV9g@public.gmane.org \
    --cc=linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-nvdimm-y27Ovi1pjclAfugRpC6u6w@public.gmane.org \
    --cc=xfs-VZNHf3L845pBDgjK7y7TUQ@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.