All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
To: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w@public.gmane.org
Subject: Re: [PATCH 05/10] dax: provide an iomap based dax read/write path
Date: Tue, 13 Sep 2016 17:00:56 -0600	[thread overview]
Message-ID: <20160913230056.GE26002@linux.intel.com> (raw)
In-Reply-To: <1473438884-674-6-git-send-email-hch-jcswGhMUV9g@public.gmane.org>

On Fri, Sep 09, 2016 at 06:34:39PM +0200, Christoph Hellwig wrote:
> This is a much simpler implementation of the DAX read/write path that makes
> use of the iomap infrastructure.  It does not try to mirror the direct I/O
> calling conventions and thus doesn't have to deal with i_dio_count or the
> end_io handler, but instead leaves locking and filesystem-specific I/O
> completion to the caller.
> 
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
> ---
>  fs/dax.c              | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/iomap.h |   2 +
>  2 files changed, 105 insertions(+)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 84343ce..57ad456 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -31,6 +31,8 @@
>  #include <linux/vmstat.h>
>  #include <linux/pfn_t.h>
>  #include <linux/sizes.h>
> +#include <linux/iomap.h>
> +#include "internal.h"
>  
>  /*
>   * We use lowest available bit in exceptional entry for locking, other two
> @@ -1241,3 +1243,104 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
>  	return dax_zero_page_range(inode, from, length, get_block);
>  }
>  EXPORT_SYMBOL_GPL(dax_truncate_page);
> +
> +#ifdef CONFIG_FS_IOMAP
> +static loff_t
> +iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> +		struct iomap *iomap)
> +{
> +	struct iov_iter *iter = data;
> +	loff_t end = pos + length, done = 0;
> +	ssize_t ret = 0;
> +
> +	if (iov_iter_rw(iter) == READ) {
> +		end = min(end, i_size_read(inode));
> +		if (pos >= end)
> +			return 0;
> +
> +		if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
> +			return iov_iter_zero(min(length, end - pos), iter);
> +	}
> +
> +	if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
> +		return -EIO;
> +
> +	while (pos < end) {
> +		unsigned offset = pos & (PAGE_SIZE - 1);
> +		struct blk_dax_ctl dax = { 0 };
> +		ssize_t map_len;
> +
> +		dax.sector = iomap->blkno +
> +			(((pos & PAGE_MASK) - iomap->offset) >> 9);
> +		dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
> +		map_len = dax_map_atomic(iomap->bdev, &dax);
> +		if (map_len < 0) {
> +			ret = map_len;
> +			break;
> +		}
> +
> +		dax.addr += offset;
> +		map_len -= offset;
> +		if (map_len > end - pos)
> +			map_len = end - pos;
> +
> +		if (iov_iter_rw(iter) == WRITE)
> +			map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
> +		else
> +			map_len = copy_to_iter(dax.addr, map_len, iter);
> +		dax_unmap_atomic(iomap->bdev, &dax);
> +		if (map_len <= 0) {
> +			ret = map_len ? map_len : -EFAULT;
> +			break;
> +		}
> +
> +		pos += map_len;
> +		length -= map_len;
> +		done += map_len;
> +	}
> +
> +	return done ? done : ret;
> +}
> +
> +/**
> + * iomap_dax_rw - Perform I/O to a DAX file
> + * @iocb: The control block for this I/O
> + * @iter: The addresses to do I/O from or to
> + * @ops: iomap ops passed from the file system
> + *
> + * This funtions performs read and write operations to directly mapped

	   function

> + * persistent memory.  The callers needs to take care of read/write exclusion
> + * and evicting any page cache pages in the region under I/O.
> + */
> +ssize_t
> +iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
> +		struct iomap_ops *ops)
> +{
> +	struct inode *inode = iocb->ki_filp->f_mapping->host;
> +	loff_t pos = iocb->ki_pos, ret = 0, done = 0;

Just a note that 'ret' is loff_t about half the time in the iomap code and
ssize_t the other half.  I guess it doesn't really matter since they should
both be big unsigned values (64 bits on x96_64), but it's a bit inconsistent.

> +	size_t count = iov_iter_count(iter);
> +	unsigned flags = 0;
> +
> +	if (!count)
> +		return 0;
> +
> +	if (iov_iter_rw(iter) == WRITE)
> +		flags |= IOMAP_WRITE;
> +
> +	do {
> +		ret = iomap_apply(inode, pos, count, flags, ops, iter,
> +				  iomap_dax_actor);
> +		if (ret <= 0)
> +			break;
> +		pos += ret;
> +		done += ret;
> +	} while ((count = iov_iter_count(iter)));
> +
> +	if (!done)
> +		return ret;
> +
> +	iocb->ki_pos += done;
> +	return done;
> +}

I think you can remove the special casing around 'done' and 'count' and make
this a bit simpler:

ssize_t
iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
		struct iomap_ops *ops)
{
	struct inode *inode = iocb->ki_filp->f_mapping->host;
	loff_t pos = iocb->ki_pos, ret = 0, done = 0;
	unsigned flags = 0;
	size_t count;

	if (iov_iter_rw(iter) == WRITE)
		flags |= IOMAP_WRITE;

	 while ((count = iov_iter_count(iter))) {
		ret = iomap_apply(inode, pos, count, flags, ops, iter,
				  iomap_dax_actor);
		if (ret <= 0)
			break;
		pos += ret;
		done += ret;
	}

	iocb->ki_pos += done;
	return done ? done : ret;
}

This is now very similar to iomap_file_buffered_write().

> +EXPORT_SYMBOL_GPL(iomap_dax_rw);
> +#endif /* CONFIG_FS_IOMAP */
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 14d7067..3d5f785 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -65,6 +65,8 @@ struct iomap_ops {
>  
>  ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
>  		struct iomap_ops *ops);
> +ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
> +		struct iomap_ops *ops);
>  int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
>  		bool *did_zero, struct iomap_ops *ops);
>  int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
> -- 
> 2.1.4
> 
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

WARNING: multiple messages have this Message-ID (diff)
From: Ross Zwisler <ross.zwisler@linux.intel.com>
To: Christoph Hellwig <hch@lst.de>
Cc: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-nvdimm@ml01.01.org
Subject: Re: [PATCH 05/10] dax: provide an iomap based dax read/write path
Date: Tue, 13 Sep 2016 17:00:56 -0600	[thread overview]
Message-ID: <20160913230056.GE26002@linux.intel.com> (raw)
In-Reply-To: <1473438884-674-6-git-send-email-hch@lst.de>

On Fri, Sep 09, 2016 at 06:34:39PM +0200, Christoph Hellwig wrote:
> This is a much simpler implementation of the DAX read/write path that makes
> use of the iomap infrastructure.  It does not try to mirror the direct I/O
> calling conventions and thus doesn't have to deal with i_dio_count or the
> end_io handler, but instead leaves locking and filesystem-specific I/O
> completion to the caller.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/dax.c              | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/iomap.h |   2 +
>  2 files changed, 105 insertions(+)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 84343ce..57ad456 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -31,6 +31,8 @@
>  #include <linux/vmstat.h>
>  #include <linux/pfn_t.h>
>  #include <linux/sizes.h>
> +#include <linux/iomap.h>
> +#include "internal.h"
>  
>  /*
>   * We use lowest available bit in exceptional entry for locking, other two
> @@ -1241,3 +1243,104 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
>  	return dax_zero_page_range(inode, from, length, get_block);
>  }
>  EXPORT_SYMBOL_GPL(dax_truncate_page);
> +
> +#ifdef CONFIG_FS_IOMAP
> +static loff_t
> +iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> +		struct iomap *iomap)
> +{
> +	struct iov_iter *iter = data;
> +	loff_t end = pos + length, done = 0;
> +	ssize_t ret = 0;
> +
> +	if (iov_iter_rw(iter) == READ) {
> +		end = min(end, i_size_read(inode));
> +		if (pos >= end)
> +			return 0;
> +
> +		if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
> +			return iov_iter_zero(min(length, end - pos), iter);
> +	}
> +
> +	if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
> +		return -EIO;
> +
> +	while (pos < end) {
> +		unsigned offset = pos & (PAGE_SIZE - 1);
> +		struct blk_dax_ctl dax = { 0 };
> +		ssize_t map_len;
> +
> +		dax.sector = iomap->blkno +
> +			(((pos & PAGE_MASK) - iomap->offset) >> 9);
> +		dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
> +		map_len = dax_map_atomic(iomap->bdev, &dax);
> +		if (map_len < 0) {
> +			ret = map_len;
> +			break;
> +		}
> +
> +		dax.addr += offset;
> +		map_len -= offset;
> +		if (map_len > end - pos)
> +			map_len = end - pos;
> +
> +		if (iov_iter_rw(iter) == WRITE)
> +			map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
> +		else
> +			map_len = copy_to_iter(dax.addr, map_len, iter);
> +		dax_unmap_atomic(iomap->bdev, &dax);
> +		if (map_len <= 0) {
> +			ret = map_len ? map_len : -EFAULT;
> +			break;
> +		}
> +
> +		pos += map_len;
> +		length -= map_len;
> +		done += map_len;
> +	}
> +
> +	return done ? done : ret;
> +}
> +
> +/**
> + * iomap_dax_rw - Perform I/O to a DAX file
> + * @iocb: The control block for this I/O
> + * @iter: The addresses to do I/O from or to
> + * @ops: iomap ops passed from the file system
> + *
> + * This funtions performs read and write operations to directly mapped

	   function

> + * persistent memory.  The callers needs to take care of read/write exclusion
> + * and evicting any page cache pages in the region under I/O.
> + */
> +ssize_t
> +iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
> +		struct iomap_ops *ops)
> +{
> +	struct inode *inode = iocb->ki_filp->f_mapping->host;
> +	loff_t pos = iocb->ki_pos, ret = 0, done = 0;

Just a note that 'ret' is loff_t about half the time in the iomap code and
ssize_t the other half.  I guess it doesn't really matter since they should
both be big unsigned values (64 bits on x96_64), but it's a bit inconsistent.

> +	size_t count = iov_iter_count(iter);
> +	unsigned flags = 0;
> +
> +	if (!count)
> +		return 0;
> +
> +	if (iov_iter_rw(iter) == WRITE)
> +		flags |= IOMAP_WRITE;
> +
> +	do {
> +		ret = iomap_apply(inode, pos, count, flags, ops, iter,
> +				  iomap_dax_actor);
> +		if (ret <= 0)
> +			break;
> +		pos += ret;
> +		done += ret;
> +	} while ((count = iov_iter_count(iter)));
> +
> +	if (!done)
> +		return ret;
> +
> +	iocb->ki_pos += done;
> +	return done;
> +}

I think you can remove the special casing around 'done' and 'count' and make
this a bit simpler:

ssize_t
iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
		struct iomap_ops *ops)
{
	struct inode *inode = iocb->ki_filp->f_mapping->host;
	loff_t pos = iocb->ki_pos, ret = 0, done = 0;
	unsigned flags = 0;
	size_t count;

	if (iov_iter_rw(iter) == WRITE)
		flags |= IOMAP_WRITE;

	 while ((count = iov_iter_count(iter))) {
		ret = iomap_apply(inode, pos, count, flags, ops, iter,
				  iomap_dax_actor);
		if (ret <= 0)
			break;
		pos += ret;
		done += ret;
	}

	iocb->ki_pos += done;
	return done ? done : ret;
}

This is now very similar to iomap_file_buffered_write().

> +EXPORT_SYMBOL_GPL(iomap_dax_rw);
> +#endif /* CONFIG_FS_IOMAP */
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 14d7067..3d5f785 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -65,6 +65,8 @@ struct iomap_ops {
>  
>  ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
>  		struct iomap_ops *ops);
> +ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
> +		struct iomap_ops *ops);
>  int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
>  		bool *did_zero, struct iomap_ops *ops);
>  int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
> -- 
> 2.1.4
> 
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

  parent reply	other threads:[~2016-09-13 23:00 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-09 16:34 iomap based DAX path Christoph Hellwig
     [not found] ` <1473438884-674-1-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-09 16:34   ` [PATCH 01/10] iomap: add IOMAP_F_NEW flag Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
     [not found]     ` <1473438884-674-2-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-13 22:43       ` Ross Zwisler
2016-09-13 22:43         ` Ross Zwisler
2016-09-14  7:08         ` Christoph Hellwig
2016-09-09 16:34   ` [PATCH 02/10] iomap: expose iomap_apply outside iomap.c Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
     [not found]     ` <1473438884-674-3-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-13 22:48       ` Ross Zwisler
2016-09-13 22:48         ` Ross Zwisler
2016-09-09 16:34   ` [PATCH 03/10] dax: don't pass buffer_head to dax_insert_mapping Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
     [not found]     ` <1473438884-674-4-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-13 22:53       ` Ross Zwisler
2016-09-13 22:53         ` Ross Zwisler
2016-09-09 16:34   ` [PATCH 04/10] dax: don't pass buffer_head to copy_user_dax Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
2016-09-13 22:54     ` Ross Zwisler
2016-09-09 16:34   ` [PATCH 05/10] dax: provide an iomap based dax read/write path Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
     [not found]     ` <1473438884-674-6-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-13 23:00       ` Ross Zwisler [this message]
2016-09-13 23:00         ` Ross Zwisler
2016-09-09 16:34   ` [PATCH 06/10] dax: provide an iomap based fault handler Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
     [not found]     ` <1473438884-674-7-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-09 22:55       ` Dave Chinner
2016-09-09 22:55         ` Dave Chinner
2016-09-10  7:36         ` Christoph Hellwig
2016-09-10  7:36           ` Christoph Hellwig
2016-09-13 15:51           ` Ross Zwisler
     [not found]             ` <20160913155126.GA10622-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-09-14  7:06               ` Christoph Hellwig
2016-09-14  7:06                 ` Christoph Hellwig
     [not found]                 ` <20160914070633.GA17278-jcswGhMUV9g@public.gmane.org>
2016-09-14  9:53                   ` Christoph Hellwig
2016-09-14  9:53                     ` Christoph Hellwig
2016-09-23 21:02                   ` Ross Zwisler
2016-09-23 21:02                     ` Ross Zwisler
     [not found]                     ` <20160923210237.GA23346-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-09-26  0:08                       ` Christoph Hellwig
2016-09-26  0:08                         ` Christoph Hellwig
     [not found]                         ` <20160926000805.GA32252-jcswGhMUV9g@public.gmane.org>
2016-09-26 14:28                           ` Jan Kara
2016-09-26 14:28                             ` Jan Kara
2016-09-10  1:38       ` Elliott, Robert (Persistent Memory)
2016-09-10  1:38         ` Elliott, Robert (Persistent Memory)
2016-09-13 23:10     ` Ross Zwisler
     [not found]       ` <20160913231039.GF26002-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-09-14  7:19         ` Christoph Hellwig
2016-09-14  7:19           ` Christoph Hellwig
     [not found]           ` <20160914071910.GC17278-jcswGhMUV9g@public.gmane.org>
2016-09-14 17:07             ` Ross Zwisler
2016-09-14 17:07               ` Ross Zwisler
     [not found]               ` <20160914170759.GA14196-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-09-15  5:12                 ` Christoph Hellwig
2016-09-15  5:12                   ` Christoph Hellwig
     [not found]                   ` <20160915051229.GD6188-jcswGhMUV9g@public.gmane.org>
2016-09-15  5:30                     ` Darrick J. Wong
2016-09-15  5:30                       ` Darrick J. Wong
2016-09-26  0:05               ` Christoph Hellwig
2016-09-09 16:34   ` [PATCH 07/10] xfs: fix locking for DAX writes Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
2016-09-09 16:34   ` [PATCH 08/10] xfs: take the ilock shared if possible in xfs_file_iomap_begin Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
     [not found]   ` <1473438884-674-10-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-13 23:12     ` Ross Zwisler
2016-09-13 23:12       ` Ross Zwisler
2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160913230056.GE26002@linux.intel.com \
    --to=ross.zwisler-vuqaysv1563yd54fqh9/ca@public.gmane.org \
    --cc=hch-jcswGhMUV9g@public.gmane.org \
    --cc=linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-nvdimm-y27Ovi1pjclAfugRpC6u6w@public.gmane.org \
    --cc=linux-xfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.