All of lore.kernel.org
 help / color / mirror / Atom feed
* iomap based DAX path V2
@ 2016-09-14 10:01 ` Christoph Hellwig
  0 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

This series adds a DAX I/O path based on the iomap interface.  This
allows more efficient block mapping including defined hole semantics,
and is an important step toward getting rid of buffer_heads in XFS.

Changes since V1:
 - added a conversion of ext2 to the iomap interface
 - addresse feedback from Ross, Dave and Robert

^ permalink raw reply	[flat|nested] 36+ messages in thread

* iomap based DAX path V2
@ 2016-09-14 10:01 ` Christoph Hellwig
  0 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

This series adds a DAX I/O path based on the iomap interface.  This
allows more efficient block mapping including defined hole semantics,
and is an important step toward getting rid of buffer_heads in XFS.

Changes since V1:
 - added a conversion of ext2 to the iomap interface
 - addresse feedback from Ross, Dave and Robert


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 01/12] iomap: add IOMAP_F_NEW flag
  2016-09-14 10:01 ` Christoph Hellwig
@ 2016-09-14 10:01     ` Christoph Hellwig
  -1 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
 fs/xfs/xfs_iomap.c    | 2 ++
 include/linux/iomap.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f96c8ff..5d06a2d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -635,6 +635,7 @@ retry:
 	if (end_fsb != orig_end_fsb)
 		xfs_inode_set_eofblocks_tag(ip);
 
+	iomap->flags = IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
 done:
 	if (isnullstartblock(got.br_startblock))
@@ -1000,6 +1001,7 @@ xfs_file_iomap_begin(
 		if (error)
 			return error;
 
+		iomap->flags = IOMAP_F_NEW;
 		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
 	} else {
 		ASSERT(nimaps);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3d70ece..14d7067 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -22,6 +22,7 @@ struct vm_fault;
  * Flags for iomap mappings:
  */
 #define IOMAP_F_MERGED	0x01	/* contains multiple blocks/extents */
+#define IOMAP_F_NEW	0x02	/* blocks have been newly allocated */
 
 /*
  * Magic value for blkno:
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 01/12] iomap: add IOMAP_F_NEW flag
@ 2016-09-14 10:01     ` Christoph Hellwig
  0 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/xfs/xfs_iomap.c    | 2 ++
 include/linux/iomap.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f96c8ff..5d06a2d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -635,6 +635,7 @@ retry:
 	if (end_fsb != orig_end_fsb)
 		xfs_inode_set_eofblocks_tag(ip);
 
+	iomap->flags = IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
 done:
 	if (isnullstartblock(got.br_startblock))
@@ -1000,6 +1001,7 @@ xfs_file_iomap_begin(
 		if (error)
 			return error;
 
+		iomap->flags = IOMAP_F_NEW;
 		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
 	} else {
 		ASSERT(nimaps);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3d70ece..14d7067 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -22,6 +22,7 @@ struct vm_fault;
  * Flags for iomap mappings:
  */
 #define IOMAP_F_MERGED	0x01	/* contains multiple blocks/extents */
+#define IOMAP_F_NEW	0x02	/* blocks have been newly allocated */
 
 /*
  * Magic value for blkno:
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 02/12] iomap: expose iomap_apply outside iomap.c
  2016-09-14 10:01 ` Christoph Hellwig
@ 2016-09-14 10:01     ` Christoph Hellwig
  -1 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

This allows the DAX code to use it.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
 fs/internal.h | 11 +++++++++++
 fs/iomap.c    |  5 +----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index ba07376..8591786 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -12,6 +12,7 @@
 struct super_block;
 struct file_system_type;
 struct iomap;
+struct iomap_ops;
 struct linux_binprm;
 struct path;
 struct mount;
@@ -164,3 +165,13 @@ extern struct dentry_operations ns_dentry_operations;
 extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
 		    unsigned long arg);
 extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
+/*
+ * iomap support:
+ */
+typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
+		void *data, struct iomap *iomap);
+
+loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
+		unsigned flags, struct iomap_ops *ops, void *data,
+		iomap_actor_t actor);
diff --git a/fs/iomap.c b/fs/iomap.c
index 706270f..f4df9c6 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -27,9 +27,6 @@
 #include <linux/dax.h>
 #include "internal.h"
 
-typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
-		void *data, struct iomap *iomap);
-
 /*
  * Execute a iomap write on a segment of the mapping that spans a
  * contiguous range of pages that have identical block mapping state.
@@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
  * resources they require in the iomap_begin call, and release them in the
  * iomap_end call.
  */
-static loff_t
+loff_t
 iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
 		struct iomap_ops *ops, void *data, iomap_actor_t actor)
 {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 02/12] iomap: expose iomap_apply outside iomap.c
@ 2016-09-14 10:01     ` Christoph Hellwig
  0 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

This allows the DAX code to use it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/internal.h | 11 +++++++++++
 fs/iomap.c    |  5 +----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index ba07376..8591786 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -12,6 +12,7 @@
 struct super_block;
 struct file_system_type;
 struct iomap;
+struct iomap_ops;
 struct linux_binprm;
 struct path;
 struct mount;
@@ -164,3 +165,13 @@ extern struct dentry_operations ns_dentry_operations;
 extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
 		    unsigned long arg);
 extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
+/*
+ * iomap support:
+ */
+typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
+		void *data, struct iomap *iomap);
+
+loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
+		unsigned flags, struct iomap_ops *ops, void *data,
+		iomap_actor_t actor);
diff --git a/fs/iomap.c b/fs/iomap.c
index 706270f..f4df9c6 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -27,9 +27,6 @@
 #include <linux/dax.h>
 #include "internal.h"
 
-typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
-		void *data, struct iomap *iomap);
-
 /*
  * Execute a iomap write on a segment of the mapping that spans a
  * contiguous range of pages that have identical block mapping state.
@@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
  * resources they require in the iomap_begin call, and release them in the
  * iomap_end call.
  */
-static loff_t
+loff_t
 iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
 		struct iomap_ops *ops, void *data, iomap_actor_t actor)
 {
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 03/12] dax: don't pass buffer_head to dax_insert_mapping
  2016-09-14 10:01 ` Christoph Hellwig
  (?)
  (?)
@ 2016-09-14 10:01 ` Christoph Hellwig
  -1 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

This way we can use this helper for the iomap based DAX implementation
as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/dax.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 993dc6f..98463bb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -790,14 +790,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
 
 static int dax_insert_mapping(struct address_space *mapping,
-			struct buffer_head *bh, void **entryp,
-			struct vm_area_struct *vma, struct vm_fault *vmf)
+		struct block_device *bdev, sector_t sector, size_t size,
+		void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	unsigned long vaddr = (unsigned long)vmf->virtual_address;
-	struct block_device *bdev = bh->b_bdev;
 	struct blk_dax_ctl dax = {
-		.sector = to_sector(bh, mapping->host),
-		.size = bh->b_size,
+		.sector = sector,
+		.size = size,
 	};
 	void *ret;
 	void *entry = *entryp;
@@ -898,7 +897,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
 	/* Filesystem should not return unwritten buffers to us! */
 	WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
-	error = dax_insert_mapping(mapping, &bh, &entry, vma, vmf);
+	error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
+			bh.b_size, &entry, vma, vmf);
  unlock_entry:
 	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
  out:
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 04/12] dax: don't pass buffer_head to copy_user_dax
  2016-09-14 10:01 ` Christoph Hellwig
                   ` (2 preceding siblings ...)
  (?)
@ 2016-09-14 10:01 ` Christoph Hellwig
  -1 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

This way we can use this helper for the iomap based DAX implementation
as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/dax.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 98463bb..84343ce 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -580,14 +580,13 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
 	return VM_FAULT_LOCKED;
 }
 
-static int copy_user_bh(struct page *to, struct inode *inode,
-		struct buffer_head *bh, unsigned long vaddr)
+static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
+		struct page *to, unsigned long vaddr)
 {
 	struct blk_dax_ctl dax = {
-		.sector = to_sector(bh, inode),
-		.size = bh->b_size,
+		.sector = sector,
+		.size = size,
 	};
-	struct block_device *bdev = bh->b_bdev;
 	void *vto;
 
 	if (dax_map_atomic(bdev, &dax) < 0)
@@ -867,7 +866,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	if (vmf->cow_page) {
 		struct page *new_page = vmf->cow_page;
 		if (buffer_written(&bh))
-			error = copy_user_bh(new_page, inode, &bh, vaddr);
+			error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode),
+					bh.b_size, new_page, vaddr);
 		else
 			clear_user_highpage(new_page, vaddr);
 		if (error)
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 05/12] dax: provide an iomap based dax read/write path
  2016-09-14 10:01 ` Christoph Hellwig
                   ` (3 preceding siblings ...)
  (?)
@ 2016-09-14 10:01 ` Christoph Hellwig
       [not found]   ` <1473847291-18913-6-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
  -1 siblings, 1 reply; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

This is a much simpler implementation of the DAX read/write path that makes
use of the iomap infrastructure.  It does not try to mirror the direct I/O
calling conventions and thus doesn't have to deal with i_dio_count or the
end_io handler, but instead leaves locking and filesystem-specific I/O
completion to the caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/dax.c            | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dax.h |   4 ++
 2 files changed, 118 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index 84343ce..1f9f2d4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -31,6 +31,8 @@
 #include <linux/vmstat.h>
 #include <linux/pfn_t.h>
 #include <linux/sizes.h>
+#include <linux/iomap.h>
+#include "internal.h"
 
 /*
  * We use lowest available bit in exceptional entry for locking, other two
@@ -1241,3 +1243,115 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
 	return dax_zero_page_range(inode, from, length, get_block);
 }
 EXPORT_SYMBOL_GPL(dax_truncate_page);
+
+#ifdef CONFIG_FS_IOMAP
+static loff_t
+iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+		struct iomap *iomap)
+{
+	struct iov_iter *iter = data;
+	loff_t end = pos + length, done = 0;
+	ssize_t ret = 0;
+
+	if (iov_iter_rw(iter) == READ) {
+		end = min(end, i_size_read(inode));
+		if (pos >= end)
+			return 0;
+
+		if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
+			return iov_iter_zero(min(length, end - pos), iter);
+	}
+
+	if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
+		return -EIO;
+
+	while (pos < end) {
+		unsigned offset = pos & (PAGE_SIZE - 1);
+		struct blk_dax_ctl dax = { 0 };
+		ssize_t map_len;
+
+		dax.sector = iomap->blkno +
+			(((pos & PAGE_MASK) - iomap->offset) >> 9);
+		dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
+		map_len = dax_map_atomic(iomap->bdev, &dax);
+		if (map_len < 0) {
+			ret = map_len;
+			break;
+		}
+
+		dax.addr += offset;
+		map_len -= offset;
+		if (map_len > end - pos)
+			map_len = end - pos;
+
+		if (iov_iter_rw(iter) == WRITE)
+			map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
+		else
+			map_len = copy_to_iter(dax.addr, map_len, iter);
+		dax_unmap_atomic(iomap->bdev, &dax);
+		if (map_len <= 0) {
+			ret = map_len ? map_len : -EFAULT;
+			break;
+		}
+
+		pos += map_len;
+		length -= map_len;
+		done += map_len;
+	}
+
+	return done ? done : ret;
+}
+
+/**
+ * iomap_dax_rw - Perform I/O to a DAX file
+ * @iocb:	The control block for this I/O
+ * @iter:	The addresses to do I/O from or to
+ * @ops:	iomap ops passed from the file system
+ *
+ * This function performs read and write operations to directly mapped
+ * persistent memory.  The callers needs to take care of read/write exclusion
+ * and evicting any page cache pages in the region under I/O.
+ */
+ssize_t
+iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+		struct iomap_ops *ops)
+{
+	struct address_space *mapping = iocb->ki_filp->f_mapping;
+	struct inode *inode = mapping->host;
+	loff_t pos = iocb->ki_pos, ret = 0, done = 0;
+	unsigned flags = 0;
+
+	if (iov_iter_rw(iter) == WRITE)
+		flags |= IOMAP_WRITE;
+
+	/*
+	 * Yes, even DAX files can have page cache attached to them:  A zeroed
+	 * page is inserted into the pagecache when we have to serve a write
+	 * fault on a hole.  It should never be dirtied and can simply be
+	 * dropped from the pagecache once we get real data for the page.
+	 *
+	 * XXX: This is racy against mmap, and there's nothing we can do about
+	 * it. We'll eventually need to shift this down even further so that
+	 * we can check if we allocated blocks over a hole first.
+	 */
+	if (mapping->nrpages) {
+		ret = invalidate_inode_pages2_range(mapping,
+				pos >> PAGE_SHIFT,
+				(pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT);
+		WARN_ON_ONCE(ret);
+	}
+
+	while (iov_iter_count(iter)) {
+		ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
+				iter, iomap_dax_actor);
+		if (ret <= 0)
+			break;
+		pos += ret;
+		done += ret;
+	}
+
+	iocb->ki_pos += done;
+	return done ? done : ret;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_rw);
+#endif /* CONFIG_FS_IOMAP */
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9c6dc77..a0595b4 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -6,9 +6,13 @@
 #include <linux/radix-tree.h>
 #include <asm/pgtable.h>
 
+struct iomap_ops;
+
 /* We use lowest available exceptional entry bit for locking */
 #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
 
+ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+		struct iomap_ops *ops);
 ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
 		  get_block_t, dio_iodone_t, int flags);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 06/12] dax: provide an iomap based fault handler
  2016-09-14 10:01 ` Christoph Hellwig
                   ` (4 preceding siblings ...)
  (?)
@ 2016-09-14 10:01 ` Christoph Hellwig
  2016-09-14 17:27   ` Ross Zwisler
  -1 siblings, 1 reply; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

Very similar to the existing dax_fault function, but instead of using
the get_block callback we rely on the iomap_ops vector from iomap.c.
That also avoids having to do two calls into the file system for write
faults.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/dax.c            | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dax.h |   2 +
 2 files changed, 116 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index 1f9f2d4..d67147d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1354,4 +1354,118 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
 	return done ? done : ret;
 }
 EXPORT_SYMBOL_GPL(iomap_dax_rw);
+
+/**
+ * iomap_dax_fault - handle a page fault on a DAX file
+ * @vma: The virtual memory area where the fault occurred
+ * @vmf: The description of the fault
+ * @ops: iomap ops passed from the file system
+ *
+ * When a page fault occurs, filesystems may call this helper in their fault
+ * or mkwrite handler for DAX files. Assumes the caller has done all the
+ * necessary locking for the page fault to proceed successfully.
+ */
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+			struct iomap_ops *ops)
+{
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	struct inode *inode = mapping->host;
+	unsigned long vaddr = (unsigned long)vmf->virtual_address;
+	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
+	sector_t sector;
+	struct iomap iomap = { 0 };
+	unsigned flags = 0;
+	int error, major = 0;
+	void *entry;
+
+	/*
+	 * Check whether offset isn't beyond end of file now. Caller is supposed
+	 * to hold locks serializing us with truncate / punch hole so this is
+	 * a reliable test.
+	 */
+	if (pos >= i_size_read(inode))
+		return VM_FAULT_SIGBUS;
+
+	entry = grab_mapping_entry(mapping, vmf->pgoff);
+	if (IS_ERR(entry)) {
+		error = PTR_ERR(entry);
+		goto out;
+	}
+
+	if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
+		flags |= IOMAP_WRITE;
+
+	/*
+	 * Note that we don't bother to use iomap_apply here: DAX required
+	 * the file system block size to be equal the page size, which means
+	 * that we never have to deal with more than a single extent here.
+	 */
+	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
+	if (error)
+		goto unlock_entry;
+	if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
+		error = -EIO;		/* fs corruption? */
+		goto unlock_entry;
+	}
+
+	sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9);
+
+	if (vmf->cow_page) {
+		switch (iomap.type) {
+		case IOMAP_HOLE:
+		case IOMAP_UNWRITTEN:
+			clear_user_highpage(vmf->cow_page, vaddr);
+			break;
+		case IOMAP_MAPPED:
+			error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
+					vmf->cow_page, vaddr);
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			error = -EIO;
+			break;
+		}
+
+		if (error)
+			goto unlock_entry;
+		if (!radix_tree_exceptional_entry(entry)) {
+			vmf->page = entry;
+			return VM_FAULT_LOCKED;
+		}
+		vmf->entry = entry;
+		return VM_FAULT_DAX_LOCKED;
+	}
+
+	switch (iomap.type) {
+	case IOMAP_MAPPED:
+		if (iomap.flags & IOMAP_F_NEW) {
+			count_vm_event(PGMAJFAULT);
+			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+			major = VM_FAULT_MAJOR;
+		}
+		error = dax_insert_mapping(mapping, iomap.bdev, sector,
+				PAGE_SIZE, &entry, vma, vmf);
+		break;
+	case IOMAP_UNWRITTEN:
+	case IOMAP_HOLE:
+		if (!(vmf->flags & FAULT_FLAG_WRITE))
+			return dax_load_hole(mapping, entry, vmf);
+		/*FALLTHU*/
+	default:
+		WARN_ON_ONCE(1);
+		error = -EIO;
+		break;
+	}
+
+ unlock_entry:
+	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+ out:
+	if (error == -ENOMEM)
+		return VM_FAULT_OOM | major;
+	/* -EBUSY is fine, somebody else faulted on the same PTE */
+	if (error < 0 && error != -EBUSY)
+		return VM_FAULT_SIGBUS | major;
+	return VM_FAULT_NOPAGE | major;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_fault);
 #endif /* CONFIG_FS_IOMAP */
diff --git a/include/linux/dax.h b/include/linux/dax.h
index a0595b4..add6c4b 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -17,6 +17,8 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
 		  get_block_t, dio_iodone_t, int flags);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+			struct iomap_ops *ops);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
 void dax_wake_mapping_entry_waiter(struct address_space *mapping,
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 07/12] xfs: fix locking for DAX writes
  2016-09-14 10:01 ` Christoph Hellwig
                   ` (5 preceding siblings ...)
  (?)
@ 2016-09-14 10:01 ` Christoph Hellwig
  -1 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

So far DAX writes inherited the locking from direct I/O writes, but the direct
I/O model of using shared locks for writes is actually wrong for DAX.  For
direct I/O we're out of any standards and don't have to provide the Posix
required exclusion between writers, but for DAX which gets transparently
enable on applications without any knowledge of it we can't simply drop the
requirement.  Even worse this only happens for aligned writes and thus
doesn't show up for many typical use cases.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_file.c | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e612a02..62649cc 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -714,24 +714,11 @@ xfs_file_dax_write(
 	struct address_space	*mapping = iocb->ki_filp->f_mapping;
 	struct inode		*inode = mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			ret = 0;
-	int			unaligned_io = 0;
-	int			iolock;
+	int			iolock = XFS_IOLOCK_EXCL;
 	struct iov_iter		data;
 
-	/* "unaligned" here means not aligned to a filesystem block */
-	if ((iocb->ki_pos & mp->m_blockmask) ||
-	    ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
-		unaligned_io = 1;
-		iolock = XFS_IOLOCK_EXCL;
-	} else if (mapping->nrpages) {
-		iolock = XFS_IOLOCK_EXCL;
-	} else {
-		iolock = XFS_IOLOCK_SHARED;
-	}
 	xfs_rw_ilock(ip, iolock);
-
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out;
@@ -758,11 +745,6 @@ xfs_file_dax_write(
 		WARN_ON_ONCE(ret);
 	}
 
-	if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
-		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-		iolock = XFS_IOLOCK_SHARED;
-	}
-
 	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
 
 	data = *from;
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 08/12] xfs: take the ilock shared if possible in xfs_file_iomap_begin
  2016-09-14 10:01 ` Christoph Hellwig
                   ` (6 preceding siblings ...)
  (?)
@ 2016-09-14 10:01 ` Christoph Hellwig
  -1 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

We always just read the extent first, and will later lock exlusively
after first dropping the lock in case we actually allocate blocks.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_iomap.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5d06a2d..c3cc175 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -955,6 +955,7 @@ xfs_file_iomap_begin(
 	struct xfs_bmbt_irec	imap;
 	xfs_fileoff_t		offset_fsb, end_fsb;
 	int			nimaps = 1, error = 0;
+	unsigned		lockmode;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
@@ -964,7 +965,7 @@ xfs_file_iomap_begin(
 				iomap);
 	}
 
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	lockmode = xfs_ilock_data_map_shared(ip);
 
 	ASSERT(offset <= mp->m_super->s_maxbytes);
 	if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
@@ -975,7 +976,7 @@ xfs_file_iomap_begin(
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
 			       &nimaps, XFS_BMAPI_ENTIRE);
 	if (error) {
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_iunlock(ip, lockmode);
 		return error;
 	}
 
@@ -995,7 +996,8 @@ xfs_file_iomap_begin(
 		 * xfs_iomap_write_direct() expects the shared lock. It
 		 * is unlocked on return.
 		 */
-		xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+		if (lockmode == XFS_ILOCK_EXCL)
+			xfs_ilock_demote(ip, lockmode);
 		error = xfs_iomap_write_direct(ip, offset, length, &imap,
 				nimaps);
 		if (error)
@@ -1006,7 +1008,7 @@ xfs_file_iomap_begin(
 	} else {
 		ASSERT(nimaps);
 
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_iunlock(ip, lockmode);
 		trace_xfs_iomap_found(ip, offset, length, 0, &imap);
 	}
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 09/12] xfs: refactor xfs_setfilesize
  2016-09-14 10:01 ` Christoph Hellwig
                   ` (7 preceding siblings ...)
  (?)
@ 2016-09-14 10:01 ` Christoph Hellwig
  -1 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

Rename the current function to __xfs_setfilesize and add a non-static
wrapper that also takes care of creating the transaction.  This new
helper will be used by the new iomap-based DAX path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c | 31 +++++++++++++++++++++----------
 fs/xfs/xfs_aops.h |  1 +
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 7575cfc..4a28fa9 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -200,7 +200,7 @@ xfs_setfilesize_trans_alloc(
  * Update on-disk file size now that data has been written to disk.
  */
 STATIC int
-xfs_setfilesize(
+__xfs_setfilesize(
 	struct xfs_inode	*ip,
 	struct xfs_trans	*tp,
 	xfs_off_t		offset,
@@ -225,6 +225,23 @@ xfs_setfilesize(
 	return xfs_trans_commit(tp);
 }
 
+int
+xfs_setfilesize(
+	struct xfs_inode	*ip,
+	xfs_off_t		offset,
+	size_t			size)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_trans	*tp;
+	int			error;
+
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
+	if (error)
+		return error;
+
+	return __xfs_setfilesize(ip, tp, offset, size);
+}
+
 STATIC int
 xfs_setfilesize_ioend(
 	struct xfs_ioend	*ioend,
@@ -247,7 +264,7 @@ xfs_setfilesize_ioend(
 		return error;
 	}
 
-	return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
+	return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
 }
 
 /*
@@ -1336,13 +1353,12 @@ xfs_end_io_direct_write(
 {
 	struct inode		*inode = file_inode(iocb->ki_filp);
 	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
 	uintptr_t		flags = (uintptr_t)private;
 	int			error = 0;
 
 	trace_xfs_end_io_direct_write(ip, offset, size);
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		return -EIO;
 
 	if (size <= 0)
@@ -1380,14 +1396,9 @@ xfs_end_io_direct_write(
 
 		error = xfs_iomap_write_unwritten(ip, offset, size);
 	} else if (flags & XFS_DIO_FLAG_APPEND) {
-		struct xfs_trans *tp;
-
 		trace_xfs_end_io_direct_write_append(ip, offset, size);
 
-		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0,
-				&tp);
-		if (!error)
-			error = xfs_setfilesize(ip, tp, offset, size);
+		error = xfs_setfilesize(ip, offset, size);
 	}
 
 	return error;
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index bf2d9a1..1950e3b 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -62,6 +62,7 @@ int	xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
 
 int	xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset,
 		ssize_t size, void *private);
+int	xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
 
 extern void xfs_count_page_state(struct page *, int *, int *);
 extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 10/12] xfs: use iomap to implement DAX
  2016-09-14 10:01 ` Christoph Hellwig
                   ` (8 preceding siblings ...)
  (?)
@ 2016-09-14 10:01 ` Christoph Hellwig
       [not found]   ` <1473847291-18913-11-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
  2016-09-15  5:29   ` Darrick J. Wong
  -1 siblings, 2 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

Another users of buffer_heads bytes the dust.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_file.c  | 61 +++++++++++++++---------------------------------------
 fs/xfs/xfs_iomap.c | 11 ++++++----
 2 files changed, 24 insertions(+), 48 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 62649cc..663634f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -332,10 +332,7 @@ xfs_file_dax_read(
 	struct kiocb		*iocb,
 	struct iov_iter		*to)
 {
-	struct address_space	*mapping = iocb->ki_filp->f_mapping;
-	struct inode		*inode = mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct iov_iter		data = *to;
+	struct xfs_inode	*ip = XFS_I(iocb->ki_filp->f_mapping->host);
 	size_t			count = iov_iter_count(to);
 	ssize_t			ret = 0;
 
@@ -345,11 +342,7 @@ xfs_file_dax_read(
 		return 0; /* skip atime */
 
 	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
-	if (ret > 0) {
-		iocb->ki_pos += ret;
-		iov_iter_advance(to, ret);
-	}
+	ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops);
 	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 
 	file_accessed(iocb->ki_filp);
@@ -711,52 +704,32 @@ xfs_file_dax_write(
 	struct kiocb		*iocb,
 	struct iov_iter		*from)
 {
-	struct address_space	*mapping = iocb->ki_filp->f_mapping;
-	struct inode		*inode = mapping->host;
+	struct inode		*inode = iocb->ki_filp->f_mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
-	ssize_t			ret = 0;
 	int			iolock = XFS_IOLOCK_EXCL;
-	struct iov_iter		data;
+	ssize_t			ret, error = 0;
+	size_t			count;
+	loff_t			pos;
 
 	xfs_rw_ilock(ip, iolock);
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out;
 
-	/*
-	 * Yes, even DAX files can have page cache attached to them:  A zeroed
-	 * page is inserted into the pagecache when we have to serve a write
-	 * fault on a hole.  It should never be dirtied and can simply be
-	 * dropped from the pagecache once we get real data for the page.
-	 *
-	 * XXX: This is racy against mmap, and there's nothing we can do about
-	 * it. dax_do_io() should really do this invalidation internally as
-	 * it will know if we've allocated over a holei for this specific IO and
-	 * if so it needs to update the mapping tree and invalidate existing
-	 * PTEs over the newly allocated range. Remove this invalidation when
-	 * dax_do_io() is fixed up.
-	 */
-	if (mapping->nrpages) {
-		loff_t end = iocb->ki_pos + iov_iter_count(from) - 1;
-
-		ret = invalidate_inode_pages2_range(mapping,
-						    iocb->ki_pos >> PAGE_SHIFT,
-						    end >> PAGE_SHIFT);
-		WARN_ON_ONCE(ret);
-	}
+	pos = iocb->ki_pos;
+	count = iov_iter_count(from);
 
-	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
+	trace_xfs_file_dax_write(ip, count, pos);
 
-	data = *from;
-	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
-			xfs_end_io_direct_write, 0);
-	if (ret > 0) {
-		iocb->ki_pos += ret;
-		iov_iter_advance(from, ret);
+	ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops);
+	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
+		i_size_write(inode, iocb->ki_pos);
+		error = xfs_setfilesize(ip, pos, count);
 	}
+
 out:
 	xfs_rw_iunlock(ip, iolock);
-	return ret;
+	return error ? error : ret;
 }
 
 STATIC ssize_t
@@ -1495,7 +1468,7 @@ xfs_filemap_page_mkwrite(
 	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 
 	if (IS_DAX(inode)) {
-		ret = dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
+		ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
 	} else {
 		ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
 		ret = block_page_mkwrite_return(ret);
@@ -1529,7 +1502,7 @@ xfs_filemap_fault(
 		 * changes to xfs_get_blocks_direct() to map unwritten extent
 		 * ioend for conversion on read-only mappings.
 		 */
-		ret = dax_fault(vma, vmf, xfs_get_blocks_dax_fault);
+		ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
 	} else
 		ret = filemap_fault(vma, vmf);
 	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index c3cc175..23f7811 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -935,11 +935,13 @@ error_on_bmapi_transaction:
 	return error;
 }
 
-static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps)
+static inline bool imap_needs_alloc(struct inode *inode,
+		struct xfs_bmbt_irec *imap, int nimaps)
 {
 	return !nimaps ||
 		imap->br_startblock == HOLESTARTBLOCK ||
-		imap->br_startblock == DELAYSTARTBLOCK;
+		imap->br_startblock == DELAYSTARTBLOCK ||
+		(IS_DAX(inode) && ISUNWRITTEN(imap));
 }
 
 static int
@@ -960,7 +962,8 @@ xfs_file_iomap_begin(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	if ((flags & IOMAP_WRITE) && !xfs_get_extsz_hint(ip)) {
+	if ((flags & IOMAP_WRITE) &&
+	    !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
 		return xfs_file_iomap_begin_delay(inode, offset, length, flags,
 				iomap);
 	}
@@ -980,7 +983,7 @@ xfs_file_iomap_begin(
 		return error;
 	}
 
-	if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) {
+	if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
 		/*
 		 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
 		 * pages to keep the chunks of work done where somewhat symmetric
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 11/12] ext2: stop passing buffer_head to ext2_get_blocks
  2016-09-14 10:01 ` Christoph Hellwig
                   ` (9 preceding siblings ...)
  (?)
@ 2016-09-14 10:01 ` Christoph Hellwig
       [not found]   ` <1473847291-18913-12-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
  -1 siblings, 1 reply; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/ext2/inode.c | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index d5c7d09..2a69ab2 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -618,7 +618,7 @@ static void ext2_splice_branch(struct inode *inode,
  */
 static int ext2_get_blocks(struct inode *inode,
 			   sector_t iblock, unsigned long maxblocks,
-			   struct buffer_head *bh_result,
+			   u32 *bno, bool *new, bool *boundary,
 			   int create)
 {
 	int err = -EIO;
@@ -644,7 +644,6 @@ static int ext2_get_blocks(struct inode *inode,
 	/* Simplest case - block found, no allocation needed */
 	if (!partial) {
 		first_block = le32_to_cpu(chain[depth - 1].key);
-		clear_buffer_new(bh_result); /* What's this do? */
 		count++;
 		/*map more blocks*/
 		while (count < maxblocks && count <= blocks_to_boundary) {
@@ -699,7 +698,6 @@ static int ext2_get_blocks(struct inode *inode,
 			mutex_unlock(&ei->truncate_mutex);
 			if (err)
 				goto cleanup;
-			clear_buffer_new(bh_result);
 			goto got_it;
 		}
 	}
@@ -745,15 +743,16 @@ static int ext2_get_blocks(struct inode *inode,
 			mutex_unlock(&ei->truncate_mutex);
 			goto cleanup;
 		}
-	} else
-		set_buffer_new(bh_result);
+	} else {
+		*new = true;
+	}
 
 	ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
 	mutex_unlock(&ei->truncate_mutex);
 got_it:
-	map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+	*bno = le32_to_cpu(chain[depth-1].key);
 	if (count > blocks_to_boundary)
-		set_buffer_boundary(bh_result);
+		*boundary = true;
 	err = count;
 	/* Clean up and exit */
 	partial = chain + depth - 1;	/* the whole chain */
@@ -765,16 +764,26 @@ cleanup:
 	return err;
 }
 
-int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
+int ext2_get_block(struct inode *inode, sector_t iblock,
+		struct buffer_head *bh_result, int create)
 {
 	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-	int ret = ext2_get_blocks(inode, iblock, max_blocks,
-			      bh_result, create);
-	if (ret > 0) {
-		bh_result->b_size = (ret << inode->i_blkbits);
-		ret = 0;
-	}
-	return ret;
+	bool new = false, boundary = false;
+	u32 bno;
+	int ret;
+
+	ret = ext2_get_blocks(inode, iblock, max_blocks, &bno, &new, &boundary,
+			create);
+	if (ret <= 0)
+		return ret;
+
+	map_bh(bh_result, inode->i_sb, bno);
+	bh_result->b_size = (ret << inode->i_blkbits);
+	if (new)
+		set_buffer_new(bh_result);
+	if (boundary)
+		set_buffer_boundary(bh_result);
+	return 0;
 
 }
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 12/12] ext2: use iomap to implement DAX
  2016-09-14 10:01 ` Christoph Hellwig
@ 2016-09-14 10:01     ` Christoph Hellwig
  -1 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 fs/ext2/ext2.h  |  1 +
 fs/ext2/file.c  | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++------
 fs/ext2/inode.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 124 insertions(+), 12 deletions(-)

diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 06af2f9..37e2be7 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -814,6 +814,7 @@ extern const struct file_operations ext2_file_operations;
 /* inode.c */
 extern const struct address_space_operations ext2_aops;
 extern const struct address_space_operations ext2_nobh_aops;
+extern struct iomap_ops ext2_iomap_ops;
 
 /* namei.c */
 extern const struct inode_operations ext2_dir_inode_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 5efeefe..e089e1c 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -22,11 +22,59 @@
 #include <linux/pagemap.h>
 #include <linux/dax.h>
 #include <linux/quotaops.h>
+#include <linux/iomap.h>
+#include <linux/uio.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
 
 #ifdef CONFIG_FS_DAX
+static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
+	ssize_t ret;
+
+	if (!iov_iter_count(to))
+		return 0; /* skip atime */
+
+	inode_lock_shared(inode);
+	ret = iomap_dax_rw(iocb, to, &ext2_iomap_ops);
+	inode_unlock_shared(inode);
+
+	file_accessed(iocb->ki_filp);
+	return ret;
+}
+
+static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	ssize_t ret;
+
+	inode_lock(inode);
+	ret = generic_write_checks(iocb, from);
+	if (ret <= 0)
+		goto out_unlock;
+	ret = file_remove_privs(file);
+	if (ret)
+		goto out_unlock;
+	ret = file_update_time(file);
+	if (ret)
+		goto out_unlock;
+
+	ret = iomap_dax_rw(iocb, from, &ext2_iomap_ops);
+	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
+		i_size_write(inode, iocb->ki_pos);
+		mark_inode_dirty(inode);
+	}
+
+out_unlock:
+	inode_unlock(inode);
+	if (ret > 0)
+		ret = generic_write_sync(iocb, ret);
+	return ret;
+}
+
 /*
  * The lock ordering for ext2 DAX fault paths is:
  *
@@ -51,7 +99,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 	down_read(&ei->dax_sem);
 
-	ret = dax_fault(vma, vmf, ext2_get_block);
+	ret = iomap_dax_fault(vma, vmf, &ext2_iomap_ops);
 
 	up_read(&ei->dax_sem);
 	if (vmf->flags & FAULT_FLAG_WRITE)
@@ -156,14 +204,24 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	return ret;
 }
 
-/*
- * We have mostly NULL's here: the current defaults are ok for
- * the ext2 filesystem.
- */
+static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	if (IS_DAX(iocb->ki_filp->f_mapping->host))
+		return ext2_dax_read_iter(iocb, to);
+	return generic_file_read_iter(iocb, to);
+}
+
+static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	if (IS_DAX(iocb->ki_filp->f_mapping->host))
+		return ext2_dax_write_iter(iocb, from);
+	return generic_file_write_iter(iocb, from);
+}
+
 const struct file_operations ext2_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read_iter	= generic_file_read_iter,
-	.write_iter	= generic_file_write_iter,
+	.read_iter	= ext2_file_read_iter,
+	.write_iter	= ext2_file_write_iter,
 	.unlocked_ioctl = ext2_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext2_compat_ioctl,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 2a69ab2..aae5f61 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -32,6 +32,7 @@
 #include <linux/buffer_head.h>
 #include <linux/mpage.h>
 #include <linux/fiemap.h>
+#include <linux/iomap.h>
 #include <linux/namei.h>
 #include <linux/uio.h>
 #include "ext2.h"
@@ -787,6 +788,59 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
 
 }
 
+#ifdef CONFIG_FS_DAX
+static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+		unsigned flags, struct iomap *iomap)
+{
+	unsigned int blkbits = inode->i_blkbits;
+	unsigned long first_block = offset >> blkbits;
+	unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits;
+	bool new = false, boundary = false;
+	u32 bno;
+	int ret;
+
+	ret = ext2_get_blocks(inode, first_block, max_blocks,
+			&bno, &new, &boundary, flags & IOMAP_WRITE);
+	if (ret < 0)
+		return ret;
+
+	iomap->flags = 0;
+	iomap->bdev = inode->i_sb->s_bdev;
+	iomap->offset = first_block << blkbits;
+
+	if (ret == 0) {
+		iomap->type = IOMAP_HOLE;
+		iomap->blkno = IOMAP_NULL_BLOCK;
+		iomap->length = 1 << blkbits;
+	} else {
+		iomap->type = IOMAP_MAPPED;
+		iomap->blkno = (sector_t)bno << (blkbits - 9);
+		iomap->length = (u64)ret << blkbits;
+		iomap->flags |= IOMAP_F_MERGED;
+	}
+
+	if (new)
+		iomap->flags |= IOMAP_F_NEW;
+	return 0;
+}
+
+static int
+ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+		ssize_t written, unsigned flags, struct iomap *iomap)
+{
+	if (iomap->type == IOMAP_MAPPED &&
+	    written < length &&
+	    (flags & IOMAP_WRITE))
+		ext2_write_failed(inode->i_mapping, offset + length);
+	return 0;
+}
+
+struct iomap_ops ext2_iomap_ops = {
+	.iomap_begin		= ext2_iomap_begin,
+	.iomap_end		= ext2_iomap_end,
+};
+#endif /* CONFIG_FS_DAX */
+
 int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		u64 start, u64 len)
 {
@@ -872,11 +926,10 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	loff_t offset = iocb->ki_pos;
 	ssize_t ret;
 
-	if (IS_DAX(inode))
-		ret = dax_do_io(iocb, inode, iter, ext2_get_block, NULL,
-				DIO_LOCKING);
-	else
-		ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
+	if (WARN_ON_ONCE(IS_DAX(inode)))
+		return -EIO;
+
+	ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
 	if (ret < 0 && iov_iter_rw(iter) == WRITE)
 		ext2_write_failed(mapping, offset + count);
 	return ret;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 12/12] ext2: use iomap to implement DAX
@ 2016-09-14 10:01     ` Christoph Hellwig
  0 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/ext2/ext2.h  |  1 +
 fs/ext2/file.c  | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++------
 fs/ext2/inode.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 124 insertions(+), 12 deletions(-)

diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 06af2f9..37e2be7 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -814,6 +814,7 @@ extern const struct file_operations ext2_file_operations;
 /* inode.c */
 extern const struct address_space_operations ext2_aops;
 extern const struct address_space_operations ext2_nobh_aops;
+extern struct iomap_ops ext2_iomap_ops;
 
 /* namei.c */
 extern const struct inode_operations ext2_dir_inode_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 5efeefe..e089e1c 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -22,11 +22,59 @@
 #include <linux/pagemap.h>
 #include <linux/dax.h>
 #include <linux/quotaops.h>
+#include <linux/iomap.h>
+#include <linux/uio.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
 
 #ifdef CONFIG_FS_DAX
+static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
+	ssize_t ret;
+
+	if (!iov_iter_count(to))
+		return 0; /* skip atime */
+
+	inode_lock_shared(inode);
+	ret = iomap_dax_rw(iocb, to, &ext2_iomap_ops);
+	inode_unlock_shared(inode);
+
+	file_accessed(iocb->ki_filp);
+	return ret;
+}
+
+static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	ssize_t ret;
+
+	inode_lock(inode);
+	ret = generic_write_checks(iocb, from);
+	if (ret <= 0)
+		goto out_unlock;
+	ret = file_remove_privs(file);
+	if (ret)
+		goto out_unlock;
+	ret = file_update_time(file);
+	if (ret)
+		goto out_unlock;
+
+	ret = iomap_dax_rw(iocb, from, &ext2_iomap_ops);
+	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
+		i_size_write(inode, iocb->ki_pos);
+		mark_inode_dirty(inode);
+	}
+
+out_unlock:
+	inode_unlock(inode);
+	if (ret > 0)
+		ret = generic_write_sync(iocb, ret);
+	return ret;
+}
+
 /*
  * The lock ordering for ext2 DAX fault paths is:
  *
@@ -51,7 +99,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 	down_read(&ei->dax_sem);
 
-	ret = dax_fault(vma, vmf, ext2_get_block);
+	ret = iomap_dax_fault(vma, vmf, &ext2_iomap_ops);
 
 	up_read(&ei->dax_sem);
 	if (vmf->flags & FAULT_FLAG_WRITE)
@@ -156,14 +204,24 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	return ret;
 }
 
-/*
- * We have mostly NULL's here: the current defaults are ok for
- * the ext2 filesystem.
- */
+static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	if (IS_DAX(iocb->ki_filp->f_mapping->host))
+		return ext2_dax_read_iter(iocb, to);
+	return generic_file_read_iter(iocb, to);
+}
+
+static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	if (IS_DAX(iocb->ki_filp->f_mapping->host))
+		return ext2_dax_write_iter(iocb, from);
+	return generic_file_write_iter(iocb, from);
+}
+
 const struct file_operations ext2_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read_iter	= generic_file_read_iter,
-	.write_iter	= generic_file_write_iter,
+	.read_iter	= ext2_file_read_iter,
+	.write_iter	= ext2_file_write_iter,
 	.unlocked_ioctl = ext2_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext2_compat_ioctl,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 2a69ab2..aae5f61 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -32,6 +32,7 @@
 #include <linux/buffer_head.h>
 #include <linux/mpage.h>
 #include <linux/fiemap.h>
+#include <linux/iomap.h>
 #include <linux/namei.h>
 #include <linux/uio.h>
 #include "ext2.h"
@@ -787,6 +788,59 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
 
 }
 
+#ifdef CONFIG_FS_DAX
+static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+		unsigned flags, struct iomap *iomap)
+{
+	unsigned int blkbits = inode->i_blkbits;
+	unsigned long first_block = offset >> blkbits;
+	unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits;
+	bool new = false, boundary = false;
+	u32 bno;
+	int ret;
+
+	ret = ext2_get_blocks(inode, first_block, max_blocks,
+			&bno, &new, &boundary, flags & IOMAP_WRITE);
+	if (ret < 0)
+		return ret;
+
+	iomap->flags = 0;
+	iomap->bdev = inode->i_sb->s_bdev;
+	iomap->offset = first_block << blkbits;
+
+	if (ret == 0) {
+		iomap->type = IOMAP_HOLE;
+		iomap->blkno = IOMAP_NULL_BLOCK;
+		iomap->length = 1 << blkbits;
+	} else {
+		iomap->type = IOMAP_MAPPED;
+		iomap->blkno = (sector_t)bno << (blkbits - 9);
+		iomap->length = (u64)ret << blkbits;
+		iomap->flags |= IOMAP_F_MERGED;
+	}
+
+	if (new)
+		iomap->flags |= IOMAP_F_NEW;
+	return 0;
+}
+
+static int
+ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+		ssize_t written, unsigned flags, struct iomap *iomap)
+{
+	if (iomap->type == IOMAP_MAPPED &&
+	    written < length &&
+	    (flags & IOMAP_WRITE))
+		ext2_write_failed(inode->i_mapping, offset + length);
+	return 0;
+}
+
+struct iomap_ops ext2_iomap_ops = {
+	.iomap_begin		= ext2_iomap_begin,
+	.iomap_end		= ext2_iomap_end,
+};
+#endif /* CONFIG_FS_DAX */
+
 int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		u64 start, u64 len)
 {
@@ -872,11 +926,10 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	loff_t offset = iocb->ki_pos;
 	ssize_t ret;
 
-	if (IS_DAX(inode))
-		ret = dax_do_io(iocb, inode, iter, ext2_get_block, NULL,
-				DIO_LOCKING);
-	else
-		ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
+	if (WARN_ON_ONCE(IS_DAX(inode)))
+		return -EIO;
+
+	ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
 	if (ret < 0 && iov_iter_rw(iter) == WRITE)
 		ext2_write_failed(mapping, offset + count);
 	return ret;
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 05/12] dax: provide an iomap based dax read/write path
  2016-09-14 10:01 ` [PATCH 05/12] dax: provide an iomap based dax read/write path Christoph Hellwig
@ 2016-09-14 17:26       ` Ross Zwisler
  0 siblings, 0 replies; 36+ messages in thread
From: Ross Zwisler @ 2016-09-14 17:26 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Wed, Sep 14, 2016 at 12:01:24PM +0200, Christoph Hellwig wrote:
> This is a much simpler implementation of the DAX read/write path that makes
> use of the iomap infrastructure.  It does not try to mirror the direct I/O
> calling conventions and thus doesn't have to deal with i_dio_count or the
> end_io handler, but instead leaves locking and filesystem-specific I/O
> completion to the caller.
> 
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>

Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 05/12] dax: provide an iomap based dax read/write path
@ 2016-09-14 17:26       ` Ross Zwisler
  0 siblings, 0 replies; 36+ messages in thread
From: Ross Zwisler @ 2016-09-14 17:26 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm, ross.zwisler

On Wed, Sep 14, 2016 at 12:01:24PM +0200, Christoph Hellwig wrote:
> This is a much simpler implementation of the DAX read/write path that makes
> use of the iomap infrastructure.  It does not try to mirror the direct I/O
> calling conventions and thus doesn't have to deal with i_dio_count or the
> end_io handler, but instead leaves locking and filesystem-specific I/O
> completion to the caller.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 06/12] dax: provide an iomap based fault handler
  2016-09-14 10:01 ` [PATCH 06/12] dax: provide an iomap based fault handler Christoph Hellwig
@ 2016-09-14 17:27   ` Ross Zwisler
       [not found]     ` <20160914172717.GB30852-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
  0 siblings, 1 reply; 36+ messages in thread
From: Ross Zwisler @ 2016-09-14 17:27 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm, ross.zwisler

On Wed, Sep 14, 2016 at 12:01:25PM +0200, Christoph Hellwig wrote:
> Very similar to the existing dax_fault function, but instead of using
> the get_block callback we rely on the iomap_ops vector from iomap.c.
> That also avoids having to do two calls into the file system for write
> faults.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/dax.c            | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/dax.h |   2 +
>  2 files changed, 116 insertions(+)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 1f9f2d4..d67147d 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1354,4 +1354,118 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
>  	return done ? done : ret;
>  }
>  EXPORT_SYMBOL_GPL(iomap_dax_rw);
> +
> +/**
> + * iomap_dax_fault - handle a page fault on a DAX file
> + * @vma: The virtual memory area where the fault occurred
> + * @vmf: The description of the fault
> + * @ops: iomap ops passed from the file system
> + *
> + * When a page fault occurs, filesystems may call this helper in their fault
> + * or mkwrite handler for DAX files. Assumes the caller has done all the
> + * necessary locking for the page fault to proceed successfully.
> + */
> +int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> +			struct iomap_ops *ops)
> +{
> +	struct address_space *mapping = vma->vm_file->f_mapping;
> +	struct inode *inode = mapping->host;
> +	unsigned long vaddr = (unsigned long)vmf->virtual_address;
> +	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
> +	sector_t sector;
> +	struct iomap iomap = { 0 };
> +	unsigned flags = 0;
> +	int error, major = 0;
> +	void *entry;
> +
> +	/*
> +	 * Check whether offset isn't beyond end of file now. Caller is supposed
> +	 * to hold locks serializing us with truncate / punch hole so this is
> +	 * a reliable test.
> +	 */
> +	if (pos >= i_size_read(inode))
> +		return VM_FAULT_SIGBUS;
> +
> +	entry = grab_mapping_entry(mapping, vmf->pgoff);
> +	if (IS_ERR(entry)) {
> +		error = PTR_ERR(entry);
> +		goto out;
> +	}
> +
> +	if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
> +		flags |= IOMAP_WRITE;
> +
> +	/*
> +	 * Note that we don't bother to use iomap_apply here: DAX required
> +	 * the file system block size to be equal the page size, which means
> +	 * that we never have to deal with more than a single extent here.
> +	 */
> +	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
> +	if (error)
> +		goto unlock_entry;
> +	if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
> +		error = -EIO;		/* fs corruption? */
> +		goto unlock_entry;
> +	}
> +
> +	sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9);
> +
> +	if (vmf->cow_page) {
> +		switch (iomap.type) {
> +		case IOMAP_HOLE:
> +		case IOMAP_UNWRITTEN:
> +			clear_user_highpage(vmf->cow_page, vaddr);
> +			break;
> +		case IOMAP_MAPPED:
> +			error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
> +					vmf->cow_page, vaddr);
> +			break;
> +		default:
> +			WARN_ON_ONCE(1);
> +			error = -EIO;
> +			break;
> +		}
> +
> +		if (error)
> +			goto unlock_entry;
> +		if (!radix_tree_exceptional_entry(entry)) {
> +			vmf->page = entry;
> +			return VM_FAULT_LOCKED;
> +		}
> +		vmf->entry = entry;
> +		return VM_FAULT_DAX_LOCKED;
> +	}
> +
> +	switch (iomap.type) {
> +	case IOMAP_MAPPED:
> +		if (iomap.flags & IOMAP_F_NEW) {
> +			count_vm_event(PGMAJFAULT);
> +			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
> +			major = VM_FAULT_MAJOR;
> +		}
> +		error = dax_insert_mapping(mapping, iomap.bdev, sector,
> +				PAGE_SIZE, &entry, vma, vmf);
> +		break;
> +	case IOMAP_UNWRITTEN:
> +	case IOMAP_HOLE:
> +		if (!(vmf->flags & FAULT_FLAG_WRITE))
> +			return dax_load_hole(mapping, entry, vmf);
> +		/*FALLTHU*/

		  FALLTHRU

> +	default:
> +		WARN_ON_ONCE(1);
> +		error = -EIO;
> +		break;

No need to break here.

Aside from those two nits:

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/12] xfs: use iomap to implement DAX
  2016-09-14 10:01 ` [PATCH 10/12] xfs: use iomap to implement DAX Christoph Hellwig
@ 2016-09-14 17:32       ` Ross Zwisler
  2016-09-15  5:29   ` Darrick J. Wong
  1 sibling, 0 replies; 36+ messages in thread
From: Ross Zwisler @ 2016-09-14 17:32 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Wed, Sep 14, 2016 at 12:01:29PM +0200, Christoph Hellwig wrote:
> Another users of buffer_heads bytes the dust.
> 
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
> ---
>  fs/xfs/xfs_file.c  | 61 +++++++++++++++---------------------------------------
>  fs/xfs/xfs_iomap.c | 11 ++++++----
>  2 files changed, 24 insertions(+), 48 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 62649cc..663634f 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
<>
> @@ -711,52 +704,32 @@ xfs_file_dax_write(
>  	struct kiocb		*iocb,
>  	struct iov_iter		*from)
>  {
> -	struct address_space	*mapping = iocb->ki_filp->f_mapping;
> -	struct inode		*inode = mapping->host;
> +	struct inode		*inode = iocb->ki_filp->f_mapping->host;
>  	struct xfs_inode	*ip = XFS_I(inode);
> -	ssize_t			ret = 0;
>  	int			iolock = XFS_IOLOCK_EXCL;
> -	struct iov_iter		data;
> +	ssize_t			ret, error = 0;
> +	size_t			count;
> +	loff_t			pos;
>  
>  	xfs_rw_ilock(ip, iolock);
>  	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
>  	if (ret)
>  		goto out;
>  
> -	/*
> -	 * Yes, even DAX files can have page cache attached to them:  A zeroed
> -	 * page is inserted into the pagecache when we have to serve a write
> -	 * fault on a hole.  It should never be dirtied and can simply be
> -	 * dropped from the pagecache once we get real data for the page.
> -	 *
> -	 * XXX: This is racy against mmap, and there's nothing we can do about
> -	 * it. dax_do_io() should really do this invalidation internally as
> -	 * it will know if we've allocated over a holei for this specific IO and
> -	 * if so it needs to update the mapping tree and invalidate existing
> -	 * PTEs over the newly allocated range. Remove this invalidation when
> -	 * dax_do_io() is fixed up.
> -	 */
> -	if (mapping->nrpages) {
> -		loff_t end = iocb->ki_pos + iov_iter_count(from) - 1;
> -
> -		ret = invalidate_inode_pages2_range(mapping,
> -						    iocb->ki_pos >> PAGE_SHIFT,
> -						    end >> PAGE_SHIFT);
> -		WARN_ON_ONCE(ret);
> -	}
> +	pos = iocb->ki_pos;
> +	count = iov_iter_count(from);
>  
> -	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
> +	trace_xfs_file_dax_write(ip, count, pos);
>  
> -	data = *from;
> -	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
> -			xfs_end_io_direct_write, 0);
> -	if (ret > 0) {
> -		iocb->ki_pos += ret;
> -		iov_iter_advance(from, ret);
> +	ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops);
> +	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
> +		i_size_write(inode, iocb->ki_pos);
> +		error = xfs_setfilesize(ip, pos, count);

I think this should be xfs_setfilesize(ip, pos, ret)?  'count' and 'ret' are
the same in non-error cases, but in error cases where iomap_dax_rw() does some
work and then encounters an error, 'ret' could be smaller.  In error cases
like this using 'ret' instead of 'count' will also keep the value we use in
i_size_write() equal to what we write via xfs_setfilesize() because
iocb->ki_pos == pos+ret, not pos+count.

Aside from that, even though I'm sure you'll want a review from an XFS
developer:

Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/12] xfs: use iomap to implement DAX
@ 2016-09-14 17:32       ` Ross Zwisler
  0 siblings, 0 replies; 36+ messages in thread
From: Ross Zwisler @ 2016-09-14 17:32 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm, ross.zwisler

On Wed, Sep 14, 2016 at 12:01:29PM +0200, Christoph Hellwig wrote:
> Another users of buffer_heads bytes the dust.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_file.c  | 61 +++++++++++++++---------------------------------------
>  fs/xfs/xfs_iomap.c | 11 ++++++----
>  2 files changed, 24 insertions(+), 48 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 62649cc..663634f 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
<>
> @@ -711,52 +704,32 @@ xfs_file_dax_write(
>  	struct kiocb		*iocb,
>  	struct iov_iter		*from)
>  {
> -	struct address_space	*mapping = iocb->ki_filp->f_mapping;
> -	struct inode		*inode = mapping->host;
> +	struct inode		*inode = iocb->ki_filp->f_mapping->host;
>  	struct xfs_inode	*ip = XFS_I(inode);
> -	ssize_t			ret = 0;
>  	int			iolock = XFS_IOLOCK_EXCL;
> -	struct iov_iter		data;
> +	ssize_t			ret, error = 0;
> +	size_t			count;
> +	loff_t			pos;
>  
>  	xfs_rw_ilock(ip, iolock);
>  	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
>  	if (ret)
>  		goto out;
>  
> -	/*
> -	 * Yes, even DAX files can have page cache attached to them:  A zeroed
> -	 * page is inserted into the pagecache when we have to serve a write
> -	 * fault on a hole.  It should never be dirtied and can simply be
> -	 * dropped from the pagecache once we get real data for the page.
> -	 *
> -	 * XXX: This is racy against mmap, and there's nothing we can do about
> -	 * it. dax_do_io() should really do this invalidation internally as
> -	 * it will know if we've allocated over a holei for this specific IO and
> -	 * if so it needs to update the mapping tree and invalidate existing
> -	 * PTEs over the newly allocated range. Remove this invalidation when
> -	 * dax_do_io() is fixed up.
> -	 */
> -	if (mapping->nrpages) {
> -		loff_t end = iocb->ki_pos + iov_iter_count(from) - 1;
> -
> -		ret = invalidate_inode_pages2_range(mapping,
> -						    iocb->ki_pos >> PAGE_SHIFT,
> -						    end >> PAGE_SHIFT);
> -		WARN_ON_ONCE(ret);
> -	}
> +	pos = iocb->ki_pos;
> +	count = iov_iter_count(from);
>  
> -	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
> +	trace_xfs_file_dax_write(ip, count, pos);
>  
> -	data = *from;
> -	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
> -			xfs_end_io_direct_write, 0);
> -	if (ret > 0) {
> -		iocb->ki_pos += ret;
> -		iov_iter_advance(from, ret);
> +	ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops);
> +	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
> +		i_size_write(inode, iocb->ki_pos);
> +		error = xfs_setfilesize(ip, pos, count);

I think this should be xfs_setfilesize(ip, pos, ret)?  'count' and 'ret' are
the same in non-error cases, but in error cases where iomap_dax_rw() does some
work and then encounters an error, 'ret' could be smaller.  In error cases
like this using 'ret' instead of 'count' will also keep the value we use in
i_size_write() equal to what we write via xfs_setfilesize() because
iocb->ki_pos == pos+ret, not pos+count.

Aside from that, even though I'm sure you'll want a review from an XFS
developer:

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 11/12] ext2: stop passing buffer_head to ext2_get_blocks
  2016-09-14 10:01 ` [PATCH 11/12] ext2: stop passing buffer_head to ext2_get_blocks Christoph Hellwig
@ 2016-09-14 22:42       ` Ross Zwisler
  0 siblings, 0 replies; 36+ messages in thread
From: Ross Zwisler @ 2016-09-14 22:42 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Wed, Sep 14, 2016 at 12:01:30PM +0200, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>

Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 11/12] ext2: stop passing buffer_head to ext2_get_blocks
@ 2016-09-14 22:42       ` Ross Zwisler
  0 siblings, 0 replies; 36+ messages in thread
From: Ross Zwisler @ 2016-09-14 22:42 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm, ross.zwisler

On Wed, Sep 14, 2016 at 12:01:30PM +0200, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 12/12] ext2: use iomap to implement DAX
  2016-09-14 10:01     ` Christoph Hellwig
@ 2016-09-14 22:51         ` Ross Zwisler
  -1 siblings, 0 replies; 36+ messages in thread
From: Ross Zwisler @ 2016-09-14 22:51 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Wed, Sep 14, 2016 at 12:01:31PM +0200, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>

Now that ext2 is using IOMAP, we need to select FS_IOMAP when EXT2_FS is
selected else we get a build error:

fs/built-in.o: In function `ext2_dax_fault':
/home/rzwisler/project/review/fs/ext2/file.c:112: undefined reference to `iomap_dax_fault'
fs/built-in.o: In function `ext2_dax_write_iter':
/home/rzwisler/project/review/fs/ext2/file.c:75: undefined reference to `iomap_dax_rw'
fs/built-in.o: In function `ext2_dax_read_iter':
/home/rzwisler/project/review/fs/ext2/file.c:45: undefined reference to `iomap_dax_rw'
Makefile:949: recipe for target 'vmlinux' failed
make: *** [vmlinux] Error 1

With the following patch squashed into this commit:

Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>

---
>From 9c084641049990339b40bfd52a0aebf300d896ea Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Date: Wed, 14 Sep 2016 16:48:36 -0600
Subject: [PATCH] ext2: build in FS_IOMAP support needed by ext2

Signed-off-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
 fs/ext2/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
index c634874e..3391f48 100644
--- a/fs/ext2/Kconfig
+++ b/fs/ext2/Kconfig
@@ -1,5 +1,6 @@
 config EXT2_FS
 	tristate "Second extended fs support"
+	select FS_IOMAP
 	help
 	  Ext2 is a standard Linux file system for hard disks.
 
-- 
2.9.0

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 12/12] ext2: use iomap to implement DAX
@ 2016-09-14 22:51         ` Ross Zwisler
  0 siblings, 0 replies; 36+ messages in thread
From: Ross Zwisler @ 2016-09-14 22:51 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm, ross.zwisler

On Wed, Sep 14, 2016 at 12:01:31PM +0200, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Now that ext2 is using IOMAP, we need to select FS_IOMAP when EXT2_FS is
selected else we get a build error:

fs/built-in.o: In function `ext2_dax_fault':
/home/rzwisler/project/review/fs/ext2/file.c:112: undefined reference to `iomap_dax_fault'
fs/built-in.o: In function `ext2_dax_write_iter':
/home/rzwisler/project/review/fs/ext2/file.c:75: undefined reference to `iomap_dax_rw'
fs/built-in.o: In function `ext2_dax_read_iter':
/home/rzwisler/project/review/fs/ext2/file.c:45: undefined reference to `iomap_dax_rw'
Makefile:949: recipe for target 'vmlinux' failed
make: *** [vmlinux] Error 1

With the following patch squashed into this commit:

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>

---
>From 9c084641049990339b40bfd52a0aebf300d896ea Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Wed, 14 Sep 2016 16:48:36 -0600
Subject: [PATCH] ext2: build in FS_IOMAP support needed by ext2

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/ext2/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
index c634874e..3391f48 100644
--- a/fs/ext2/Kconfig
+++ b/fs/ext2/Kconfig
@@ -1,5 +1,6 @@
 config EXT2_FS
 	tristate "Second extended fs support"
+	select FS_IOMAP
 	help
 	  Ext2 is a standard Linux file system for hard disks.
 
-- 
2.9.0


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 06/12] dax: provide an iomap based fault handler
  2016-09-14 17:27   ` Ross Zwisler
@ 2016-09-15  5:13         ` Christoph Hellwig
  0 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-15  5:13 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig,
	linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Wed, Sep 14, 2016 at 11:27:17AM -0600, Ross Zwisler wrote:
> > +	case IOMAP_UNWRITTEN:
> > +	case IOMAP_HOLE:
> > +		if (!(vmf->flags & FAULT_FLAG_WRITE))
> > +			return dax_load_hole(mapping, entry, vmf);
> > +		/*FALLTHU*/
> 
> 		  FALLTHRU

Ooops, yes.

> 
> > +	default:
> > +		WARN_ON_ONCE(1);
> > +		error = -EIO;
> > +		break;
> 
> No need to break here.

Not really needed, but definitively more future-proof..

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 06/12] dax: provide an iomap based fault handler
@ 2016-09-15  5:13         ` Christoph Hellwig
  0 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-15  5:13 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig, linux-xfs, linux-fsdevel, linux-nvdimm

On Wed, Sep 14, 2016 at 11:27:17AM -0600, Ross Zwisler wrote:
> > +	case IOMAP_UNWRITTEN:
> > +	case IOMAP_HOLE:
> > +		if (!(vmf->flags & FAULT_FLAG_WRITE))
> > +			return dax_load_hole(mapping, entry, vmf);
> > +		/*FALLTHU*/
> 
> 		  FALLTHRU

Ooops, yes.

> 
> > +	default:
> > +		WARN_ON_ONCE(1);
> > +		error = -EIO;
> > +		break;
> 
> No need to break here.

Not really needed, but definitively more future-proof..

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/12] xfs: use iomap to implement DAX
  2016-09-14 17:32       ` Ross Zwisler
@ 2016-09-15  5:14           ` Christoph Hellwig
  -1 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-15  5:14 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig,
	linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Wed, Sep 14, 2016 at 11:32:47AM -0600, Ross Zwisler wrote:
> I think this should be xfs_setfilesize(ip, pos, ret)?  'count' and 'ret' are
> the same in non-error cases, but in error cases where iomap_dax_rw() does some
> work and then encounters an error, 'ret' could be smaller.  In error cases
> like this using 'ret' instead of 'count' will also keep the value we use in
> i_size_write() equal to what we write via xfs_setfilesize() because
> iocb->ki_pos == pos+ret, not pos+count.

True.  Now with DAX where we can actuall get short writes that
should be fixed.  I spent too much time with the direct I/O code where
this would not happen.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/12] xfs: use iomap to implement DAX
@ 2016-09-15  5:14           ` Christoph Hellwig
  0 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-15  5:14 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig, linux-xfs, linux-fsdevel, linux-nvdimm

On Wed, Sep 14, 2016 at 11:32:47AM -0600, Ross Zwisler wrote:
> I think this should be xfs_setfilesize(ip, pos, ret)?  'count' and 'ret' are
> the same in non-error cases, but in error cases where iomap_dax_rw() does some
> work and then encounters an error, 'ret' could be smaller.  In error cases
> like this using 'ret' instead of 'count' will also keep the value we use in
> i_size_write() equal to what we write via xfs_setfilesize() because
> iocb->ki_pos == pos+ret, not pos+count.

True.  Now with DAX where we can actuall get short writes that
should be fixed.  I spent too much time with the direct I/O code where
this would not happen.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 12/12] ext2: use iomap to implement DAX
  2016-09-14 22:51         ` Ross Zwisler
  (?)
@ 2016-09-15  5:14         ` Christoph Hellwig
  -1 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-15  5:14 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig, linux-xfs, linux-fsdevel, linux-nvdimm

On Wed, Sep 14, 2016 at 04:51:05PM -0600, Ross Zwisler wrote:
> diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
> index c634874e..3391f48 100644
> --- a/fs/ext2/Kconfig
> +++ b/fs/ext2/Kconfig
> @@ -1,5 +1,6 @@
>  config EXT2_FS
>  	tristate "Second extended fs support"
> +	select FS_IOMAP

This should probably grow and ... if FS_DAX

so that we only pull it in when needed.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/12] xfs: use iomap to implement DAX
  2016-09-14 10:01 ` [PATCH 10/12] xfs: use iomap to implement DAX Christoph Hellwig
       [not found]   ` <1473847291-18913-11-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
@ 2016-09-15  5:29   ` Darrick J. Wong
       [not found]     ` <20160915052933.GH9314-PTl6brltDGh4DFYR7WNSRA@public.gmane.org>
  1 sibling, 1 reply; 36+ messages in thread
From: Darrick J. Wong @ 2016-09-15  5:29 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs, linux-fsdevel, linux-nvdimm, ross.zwisler, Dave Chinner

On Wed, Sep 14, 2016 at 12:01:29PM +0200, Christoph Hellwig wrote:
> Another users of buffer_heads bytes the dust.

Yay!

Babbling below. :)

> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_file.c  | 61 +++++++++++++++---------------------------------------
>  fs/xfs/xfs_iomap.c | 11 ++++++----
>  2 files changed, 24 insertions(+), 48 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 62649cc..663634f 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -332,10 +332,7 @@ xfs_file_dax_read(
>  	struct kiocb		*iocb,
>  	struct iov_iter		*to)
>  {
> -	struct address_space	*mapping = iocb->ki_filp->f_mapping;
> -	struct inode		*inode = mapping->host;
> -	struct xfs_inode	*ip = XFS_I(inode);
> -	struct iov_iter		data = *to;
> +	struct xfs_inode	*ip = XFS_I(iocb->ki_filp->f_mapping->host);
>  	size_t			count = iov_iter_count(to);
>  	ssize_t			ret = 0;
>  
> @@ -345,11 +342,7 @@ xfs_file_dax_read(
>  		return 0; /* skip atime */
>  
>  	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
> -	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
> -	if (ret > 0) {
> -		iocb->ki_pos += ret;
> -		iov_iter_advance(to, ret);
> -	}
> +	ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops);
>  	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
>  
>  	file_accessed(iocb->ki_filp);
> @@ -711,52 +704,32 @@ xfs_file_dax_write(
>  	struct kiocb		*iocb,
>  	struct iov_iter		*from)
>  {
> -	struct address_space	*mapping = iocb->ki_filp->f_mapping;
> -	struct inode		*inode = mapping->host;
> +	struct inode		*inode = iocb->ki_filp->f_mapping->host;
>  	struct xfs_inode	*ip = XFS_I(inode);
> -	ssize_t			ret = 0;
>  	int			iolock = XFS_IOLOCK_EXCL;
> -	struct iov_iter		data;
> +	ssize_t			ret, error = 0;
> +	size_t			count;
> +	loff_t			pos;
>  
>  	xfs_rw_ilock(ip, iolock);
>  	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
>  	if (ret)
>  		goto out;
>  
> -	/*
> -	 * Yes, even DAX files can have page cache attached to them:  A zeroed
> -	 * page is inserted into the pagecache when we have to serve a write
> -	 * fault on a hole.  It should never be dirtied and can simply be
> -	 * dropped from the pagecache once we get real data for the page.
> -	 *
> -	 * XXX: This is racy against mmap, and there's nothing we can do about
> -	 * it. dax_do_io() should really do this invalidation internally as
> -	 * it will know if we've allocated over a holei for this specific IO and
> -	 * if so it needs to update the mapping tree and invalidate existing
> -	 * PTEs over the newly allocated range. Remove this invalidation when
> -	 * dax_do_io() is fixed up.
> -	 */
> -	if (mapping->nrpages) {
> -		loff_t end = iocb->ki_pos + iov_iter_count(from) - 1;
> -
> -		ret = invalidate_inode_pages2_range(mapping,
> -						    iocb->ki_pos >> PAGE_SHIFT,
> -						    end >> PAGE_SHIFT);
> -		WARN_ON_ONCE(ret);
> -	}
> +	pos = iocb->ki_pos;
> +	count = iov_iter_count(from);
>  
> -	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
> +	trace_xfs_file_dax_write(ip, count, pos);
>  
> -	data = *from;
> -	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
> -			xfs_end_io_direct_write, 0);
> -	if (ret > 0) {
> -		iocb->ki_pos += ret;
> -		iov_iter_advance(from, ret);
> +	ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops);
> +	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
> +		i_size_write(inode, iocb->ki_pos);
> +		error = xfs_setfilesize(ip, pos, count);
>  	}
> +
>  out:
>  	xfs_rw_iunlock(ip, iolock);
> -	return ret;
> +	return error ? error : ret;
>  }
>  
>  STATIC ssize_t
> @@ -1495,7 +1468,7 @@ xfs_filemap_page_mkwrite(
>  	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
>  
>  	if (IS_DAX(inode)) {
> -		ret = dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
> +		ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
>  	} else {
>  		ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
>  		ret = block_page_mkwrite_return(ret);
> @@ -1529,7 +1502,7 @@ xfs_filemap_fault(
>  		 * changes to xfs_get_blocks_direct() to map unwritten extent
>  		 * ioend for conversion on read-only mappings.
>  		 */
> -		ret = dax_fault(vma, vmf, xfs_get_blocks_dax_fault);
> +		ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
>  	} else
>  		ret = filemap_fault(vma, vmf);
>  	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index c3cc175..23f7811 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -935,11 +935,13 @@ error_on_bmapi_transaction:
>  	return error;
>  }
>  
> -static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps)
> +static inline bool imap_needs_alloc(struct inode *inode,
> +		struct xfs_bmbt_irec *imap, int nimaps)
>  {
>  	return !nimaps ||
>  		imap->br_startblock == HOLESTARTBLOCK ||
> -		imap->br_startblock == DELAYSTARTBLOCK;
> +		imap->br_startblock == DELAYSTARTBLOCK ||
> +		(IS_DAX(inode) && ISUNWRITTEN(imap));
>  }
>  
>  static int
> @@ -960,7 +962,8 @@ xfs_file_iomap_begin(
>  	if (XFS_FORCED_SHUTDOWN(mp))
>  		return -EIO;
>  
> -	if ((flags & IOMAP_WRITE) && !xfs_get_extsz_hint(ip)) {
> +	if ((flags & IOMAP_WRITE) &&
> +	    !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
>  		return xfs_file_iomap_begin_delay(inode, offset, length, flags,
>  				iomap);
>  	}

(Thinking ahead to copy on write with dax, dio, and iomap; I don't have
any objections to the patch itself at this time.)

I'm a little confused about xfs_file_iomap_begin() and IOMAP_WRITE --
prior to this patchset, it was only called via page_mkwrite and what
used to be write_begin, and all it did was create a delalloc reservation
to back the write (or actually allocate blocks if extsize was set).

Now I see that iomap_dax_rw() will also call iomap_begin with
IOMAP_WRITE set in flags.  But in this case we're about to send some
dirty data to pmem, so we really need the extent to map physical
storage, which is taken care of above.

Thinking ahead to integrating reflink with DAX (and presumably
directio) in both cases _file_iomap_begin has to return a real extent.
If the range we're writing is shared, then that means that I'd have to
ensure the range is reserved in the COW fork (the reflink patches already do
this).  Next, if the caller requires a firm mapping (i.e. dax or dio)
I'd have to allocate the range in the COW fork and have that mapping
returned to the caller.

So I guess it would look something like this:

/* reserve reflink cow range like the reflink patches already do */
if (flags & (IOMAP_WRITE | IOMAP_ZERO) && xfs_is_reflink_inode(ip))
	xfs_reflink_reserve_cow_range(ip, offset, length);

/* do the cow thing if need be */
if ((flags & IOMAP_WRITE) &&
    xfs_is_reflink_inode(ip) &&
    (IS_DAX(inode) || (flags & IOMAP_DIO)) {
	xfs_reflink_allocate_cow_range(ip, offset, length);

	if (xfs_reflink_is_cow_pending(ip, offset))
		xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc);
	else
		xfs_bmapi_read(offset, &imap)
} else
	xfs_bmapi_read(offset, &imap) /* non-cow io */

Does that look plausible?  If _iomap_begin will have one type of caller
that only needs a delalloc reservation and another type that actually
needs a firm mapping, should we make a new IOMAP flag to indicate that
the caller really needs a firm mapping?  IS_DAX is fine for detecting
the DAX case as this patchset shows, but what about directio?

(At this point Dave suggests on IRC that we just make a dio specific
iomap_begin.  That eliminates the flag, though it'd be a fairly similar
function.)

DAX has another twist in there -- prior to making a writable mapping,
we'd have to allocate the cow extent, copy the contents (if unaligned),
and invalidate all the old mappings so that everyone can fault in the
new storage location.  I think.  Sleep now.

--D

> @@ -980,7 +983,7 @@ xfs_file_iomap_begin(
>  		return error;
>  	}
>  
> -	if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) {
> +	if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
>  		/*
>  		 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
>  		 * pages to keep the chunks of work done where somewhat symmetric
> -- 
> 2.1.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/12] xfs: use iomap to implement DAX
  2016-09-15  5:29   ` Darrick J. Wong
@ 2016-09-15  6:43         ` Christoph Hellwig
  0 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-15  6:43 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: linux-nvdimm-y27Ovi1pjclAfugRpC6u6w, Dave Chinner,
	linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig

On Wed, Sep 14, 2016 at 10:29:33PM -0700, Darrick J. Wong wrote:
> I'm a little confused about xfs_file_iomap_begin() and IOMAP_WRITE --
> prior to this patchset, it was only called via page_mkwrite and what
> used to be write_begin, and all it did was create a delalloc reservation
> to back the write (or actually allocate blocks if extsize was set).

No, it was called from write itself in addition, of course.

> Thinking ahead to integrating reflink with DAX (and presumably
> directio) in both cases _file_iomap_begin has to return a real extent.
> If the range we're writing is shared, then that means that I'd have to
> ensure the range is reserved in the COW fork (the reflink patches already do
> this).  Next, if the caller requires a firm mapping (i.e. dax or dio)
> I'd have to allocate the range in the COW fork and have that mapping
> returned to the caller.

Yes.  No different than the existing buffered write case with an
extent size hint, really.

> So I guess it would look something like this:
> 
> /* reserve reflink cow range like the reflink patches already do */
> if (flags & (IOMAP_WRITE | IOMAP_ZERO) && xfs_is_reflink_inode(ip))
> 	xfs_reflink_reserve_cow_range(ip, offset, length);

For zeroing we don't need to reserve anything IFF we are finding a hole,
so this is too early.

> /* do the cow thing if need be */
> if ((flags & IOMAP_WRITE) &&
>     xfs_is_reflink_inode(ip) &&
>     (IS_DAX(inode) || (flags & IOMAP_DIO)) {
> 	xfs_reflink_allocate_cow_range(ip, offset, length);
> 
> 	if (xfs_reflink_is_cow_pending(ip, offset))
> 		xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc);
> 	else
> 		xfs_bmapi_read(offset, &imap)
> } else
> 	xfs_bmapi_read(offset, &imap) /* non-cow io */


We really have three write block allocation variants, of which the first
two look basically the same except for I/O completion handling:

 1) For DAX we need to allocate real extents and zero them before use.
 2) For buffered I/O with an extent size hint or direct I/O we need to
    allocate unwritten extents (and convert after the I/O succeeded for
    the range that succeeded)
 3) for buffered I/O without and extent size hint we need to created
    a delayed allocation

That logic exists both in the old __xfs_get_blocks and and the new
iomap code (although we don't check for the dio case yet).

So I don't think we should need to change anything in terms of COW
in relation to DAX or direct I/O here.  I do however need to fully
audit the usage of the reflink calls for the cases 1 and 2.  For one
it doesn't really seem nessecary to split the reserve/allocate case
ehre, and I hope that a lot of the bmap btree looks could be saved,
similar to my rewrite of the delalloc case.
    

> Does that look plausible?  If _iomap_begin will have one type of caller
> that only needs a delalloc reservation and another type that actually
> needs a firm mapping, should we make a new IOMAP flag to indicate that
> the caller really needs a firm mapping?  IS_DAX is fine for detecting
> the DAX case as this patchset shows, but what about directio?
> 
> (At this point Dave suggests on IRC that we just make a dio specific
> iomap_begin.  That eliminates the flag, though it'd be a fairly similar
> function.)

As explained above, DIO is exaxtly the same as buffered I/O with
an extent size hint.  We already handle it fine.  DAX is almost
the same, keyed off a DAX check in lower level code to convert unwritten
extents and zero the blocks.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/12] xfs: use iomap to implement DAX
@ 2016-09-15  6:43         ` Christoph Hellwig
  0 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-15  6:43 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: Christoph Hellwig, linux-xfs, linux-fsdevel, linux-nvdimm,
	ross.zwisler, Dave Chinner

On Wed, Sep 14, 2016 at 10:29:33PM -0700, Darrick J. Wong wrote:
> I'm a little confused about xfs_file_iomap_begin() and IOMAP_WRITE --
> prior to this patchset, it was only called via page_mkwrite and what
> used to be write_begin, and all it did was create a delalloc reservation
> to back the write (or actually allocate blocks if extsize was set).

No, it was called from write itself in addition, of course.

> Thinking ahead to integrating reflink with DAX (and presumably
> directio) in both cases _file_iomap_begin has to return a real extent.
> If the range we're writing is shared, then that means that I'd have to
> ensure the range is reserved in the COW fork (the reflink patches already do
> this).  Next, if the caller requires a firm mapping (i.e. dax or dio)
> I'd have to allocate the range in the COW fork and have that mapping
> returned to the caller.

Yes.  No different than the existing buffered write case with an
extent size hint, really.

> So I guess it would look something like this:
> 
> /* reserve reflink cow range like the reflink patches already do */
> if (flags & (IOMAP_WRITE | IOMAP_ZERO) && xfs_is_reflink_inode(ip))
> 	xfs_reflink_reserve_cow_range(ip, offset, length);

For zeroing we don't need to reserve anything IFF we are finding a hole,
so this is too early.

> /* do the cow thing if need be */
> if ((flags & IOMAP_WRITE) &&
>     xfs_is_reflink_inode(ip) &&
>     (IS_DAX(inode) || (flags & IOMAP_DIO)) {
> 	xfs_reflink_allocate_cow_range(ip, offset, length);
> 
> 	if (xfs_reflink_is_cow_pending(ip, offset))
> 		xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc);
> 	else
> 		xfs_bmapi_read(offset, &imap)
> } else
> 	xfs_bmapi_read(offset, &imap) /* non-cow io */


We really have three write block allocation variants, of which the first
two look basically the same except for I/O completion handling:

 1) For DAX we need to allocate real extents and zero them before use.
 2) For buffered I/O with an extent size hint or direct I/O we need to
    allocate unwritten extents (and convert after the I/O succeeded for
    the range that succeeded)
 3) for buffered I/O without and extent size hint we need to created
    a delayed allocation

That logic exists both in the old __xfs_get_blocks and and the new
iomap code (although we don't check for the dio case yet).

So I don't think we should need to change anything in terms of COW
in relation to DAX or direct I/O here.  I do however need to fully
audit the usage of the reflink calls for the cases 1 and 2.  For one
it doesn't really seem nessecary to split the reserve/allocate case
ehre, and I hope that a lot of the bmap btree looks could be saved,
similar to my rewrite of the delalloc case.
    

> Does that look plausible?  If _iomap_begin will have one type of caller
> that only needs a delalloc reservation and another type that actually
> needs a firm mapping, should we make a new IOMAP flag to indicate that
> the caller really needs a firm mapping?  IS_DAX is fine for detecting
> the DAX case as this patchset shows, but what about directio?
> 
> (At this point Dave suggests on IRC that we just make a dio specific
> iomap_begin.  That eliminates the flag, though it'd be a fairly similar
> function.)

As explained above, DIO is exaxtly the same as buffered I/O with
an extent size hint.  We already handle it fine.  DAX is almost
the same, keyed off a DAX check in lower level code to convert unwritten
extents and zero the blocks.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 01/12] iomap: add IOMAP_F_NEW flag
  2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
@ 2016-09-16 11:27     ` Christoph Hellwig
  0 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
 fs/xfs/xfs_iomap.c    | 2 ++
 include/linux/iomap.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f96c8ff..5d06a2d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -635,6 +635,7 @@ retry:
 	if (end_fsb != orig_end_fsb)
 		xfs_inode_set_eofblocks_tag(ip);
 
+	iomap->flags = IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
 done:
 	if (isnullstartblock(got.br_startblock))
@@ -1000,6 +1001,7 @@ xfs_file_iomap_begin(
 		if (error)
 			return error;
 
+		iomap->flags = IOMAP_F_NEW;
 		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
 	} else {
 		ASSERT(nimaps);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3d70ece..14d7067 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -22,6 +22,7 @@ struct vm_fault;
  * Flags for iomap mappings:
  */
 #define IOMAP_F_MERGED	0x01	/* contains multiple blocks/extents */
+#define IOMAP_F_NEW	0x02	/* blocks have been newly allocated */
 
 /*
  * Magic value for blkno:
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 01/12] iomap: add IOMAP_F_NEW flag
@ 2016-09-16 11:27     ` Christoph Hellwig
  0 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/xfs/xfs_iomap.c    | 2 ++
 include/linux/iomap.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f96c8ff..5d06a2d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -635,6 +635,7 @@ retry:
 	if (end_fsb != orig_end_fsb)
 		xfs_inode_set_eofblocks_tag(ip);
 
+	iomap->flags = IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
 done:
 	if (isnullstartblock(got.br_startblock))
@@ -1000,6 +1001,7 @@ xfs_file_iomap_begin(
 		if (error)
 			return error;
 
+		iomap->flags = IOMAP_F_NEW;
 		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
 	} else {
 		ASSERT(nimaps);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3d70ece..14d7067 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -22,6 +22,7 @@ struct vm_fault;
  * Flags for iomap mappings:
  */
 #define IOMAP_F_MERGED	0x01	/* contains multiple blocks/extents */
+#define IOMAP_F_NEW	0x02	/* blocks have been newly allocated */
 
 /*
  * Magic value for blkno:
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2016-09-16 11:27 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-14 10:01 iomap based DAX path V2 Christoph Hellwig
2016-09-14 10:01 ` Christoph Hellwig
     [not found] ` <1473847291-18913-1-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-14 10:01   ` [PATCH 01/12] iomap: add IOMAP_F_NEW flag Christoph Hellwig
2016-09-14 10:01     ` Christoph Hellwig
2016-09-14 10:01   ` [PATCH 02/12] iomap: expose iomap_apply outside iomap.c Christoph Hellwig
2016-09-14 10:01     ` Christoph Hellwig
2016-09-14 10:01   ` [PATCH 12/12] ext2: use iomap to implement DAX Christoph Hellwig
2016-09-14 10:01     ` Christoph Hellwig
     [not found]     ` <1473847291-18913-13-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-14 22:51       ` Ross Zwisler
2016-09-14 22:51         ` Ross Zwisler
2016-09-15  5:14         ` Christoph Hellwig
2016-09-14 10:01 ` [PATCH 03/12] dax: don't pass buffer_head to dax_insert_mapping Christoph Hellwig
2016-09-14 10:01 ` [PATCH 04/12] dax: don't pass buffer_head to copy_user_dax Christoph Hellwig
2016-09-14 10:01 ` [PATCH 05/12] dax: provide an iomap based dax read/write path Christoph Hellwig
     [not found]   ` <1473847291-18913-6-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-14 17:26     ` Ross Zwisler
2016-09-14 17:26       ` Ross Zwisler
2016-09-14 10:01 ` [PATCH 06/12] dax: provide an iomap based fault handler Christoph Hellwig
2016-09-14 17:27   ` Ross Zwisler
     [not found]     ` <20160914172717.GB30852-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-09-15  5:13       ` Christoph Hellwig
2016-09-15  5:13         ` Christoph Hellwig
2016-09-14 10:01 ` [PATCH 07/12] xfs: fix locking for DAX writes Christoph Hellwig
2016-09-14 10:01 ` [PATCH 08/12] xfs: take the ilock shared if possible in xfs_file_iomap_begin Christoph Hellwig
2016-09-14 10:01 ` [PATCH 09/12] xfs: refactor xfs_setfilesize Christoph Hellwig
2016-09-14 10:01 ` [PATCH 10/12] xfs: use iomap to implement DAX Christoph Hellwig
     [not found]   ` <1473847291-18913-11-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-14 17:32     ` Ross Zwisler
2016-09-14 17:32       ` Ross Zwisler
     [not found]       ` <20160914173247.GC30852-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-09-15  5:14         ` Christoph Hellwig
2016-09-15  5:14           ` Christoph Hellwig
2016-09-15  5:29   ` Darrick J. Wong
     [not found]     ` <20160915052933.GH9314-PTl6brltDGh4DFYR7WNSRA@public.gmane.org>
2016-09-15  6:43       ` Christoph Hellwig
2016-09-15  6:43         ` Christoph Hellwig
2016-09-14 10:01 ` [PATCH 11/12] ext2: stop passing buffer_head to ext2_get_blocks Christoph Hellwig
     [not found]   ` <1473847291-18913-12-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-14 22:42     ` Ross Zwisler
2016-09-14 22:42       ` Ross Zwisler
2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
     [not found] ` <1474025234-13804-1-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-16 11:27   ` [PATCH 01/12] iomap: add IOMAP_F_NEW flag Christoph Hellwig
2016-09-16 11:27     ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.