All of lore.kernel.org
 help / color / mirror / Atom feed
* iomap based DAX path
@ 2016-09-09 16:34 Christoph Hellwig
       [not found] ` <1473438884-674-1-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
                   ` (2 more replies)
  0 siblings, 3 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm

This series adds a DAX I/O path based on the iomap interface.  This
allows more efficient block mapping including defined hole semantics,
and is an important step toward getting rid of buffer_heads in XFS.


^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 01/10] iomap: add IOMAP_F_NEW flag
  2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34     ` Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
  2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 fs/xfs/xfs_iomap.c    | 2 ++
 include/linux/iomap.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f96c8ff..5d06a2d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -635,6 +635,7 @@ retry:
 	if (end_fsb != orig_end_fsb)
 		xfs_inode_set_eofblocks_tag(ip);
 
+	iomap->flags = IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
 done:
 	if (isnullstartblock(got.br_startblock))
@@ -1000,6 +1001,7 @@ xfs_file_iomap_begin(
 		if (error)
 			return error;
 
+		iomap->flags = IOMAP_F_NEW;
 		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
 	} else {
 		ASSERT(nimaps);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3d70ece..14d7067 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -22,6 +22,7 @@ struct vm_fault;
  * Flags for iomap mappings:
  */
 #define IOMAP_F_MERGED	0x01	/* contains multiple blocks/extents */
+#define IOMAP_F_NEW	0x02	/* blocks have been newly allocated */
 
 /*
  * Magic value for blkno:
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 01/10] iomap: add IOMAP_F_NEW flag
@ 2016-09-09 16:34     ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_iomap.c    | 2 ++
 include/linux/iomap.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f96c8ff..5d06a2d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -635,6 +635,7 @@ retry:
 	if (end_fsb != orig_end_fsb)
 		xfs_inode_set_eofblocks_tag(ip);
 
+	iomap->flags = IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
 done:
 	if (isnullstartblock(got.br_startblock))
@@ -1000,6 +1001,7 @@ xfs_file_iomap_begin(
 		if (error)
 			return error;
 
+		iomap->flags = IOMAP_F_NEW;
 		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
 	} else {
 		ASSERT(nimaps);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3d70ece..14d7067 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -22,6 +22,7 @@ struct vm_fault;
  * Flags for iomap mappings:
  */
 #define IOMAP_F_MERGED	0x01	/* contains multiple blocks/extents */
+#define IOMAP_F_NEW	0x02	/* blocks have been newly allocated */
 
 /*
  * Magic value for blkno:
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 02/10] iomap: expose iomap_apply outside iomap.c
  2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34     ` Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
  2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

This allows the DAX code to use it.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 fs/internal.h | 11 +++++++++++
 fs/iomap.c    |  5 +----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index ba07376..8591786 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -12,6 +12,7 @@
 struct super_block;
 struct file_system_type;
 struct iomap;
+struct iomap_ops;
 struct linux_binprm;
 struct path;
 struct mount;
@@ -164,3 +165,13 @@ extern struct dentry_operations ns_dentry_operations;
 extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
 		    unsigned long arg);
 extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
+/*
+ * iomap support:
+ */
+typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
+		void *data, struct iomap *iomap);
+
+loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
+		unsigned flags, struct iomap_ops *ops, void *data,
+		iomap_actor_t actor);
diff --git a/fs/iomap.c b/fs/iomap.c
index 706270f..f4df9c6 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -27,9 +27,6 @@
 #include <linux/dax.h>
 #include "internal.h"
 
-typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
-		void *data, struct iomap *iomap);
-
 /*
  * Execute a iomap write on a segment of the mapping that spans a
  * contiguous range of pages that have identical block mapping state.
@@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
  * resources they require in the iomap_begin call, and release them in the
  * iomap_end call.
  */
-static loff_t
+loff_t
 iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
 		struct iomap_ops *ops, void *data, iomap_actor_t actor)
 {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 02/10] iomap: expose iomap_apply outside iomap.c
@ 2016-09-09 16:34     ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm

This allows the DAX code to use it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/internal.h | 11 +++++++++++
 fs/iomap.c    |  5 +----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index ba07376..8591786 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -12,6 +12,7 @@
 struct super_block;
 struct file_system_type;
 struct iomap;
+struct iomap_ops;
 struct linux_binprm;
 struct path;
 struct mount;
@@ -164,3 +165,13 @@ extern struct dentry_operations ns_dentry_operations;
 extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
 		    unsigned long arg);
 extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
+/*
+ * iomap support:
+ */
+typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
+		void *data, struct iomap *iomap);
+
+loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
+		unsigned flags, struct iomap_ops *ops, void *data,
+		iomap_actor_t actor);
diff --git a/fs/iomap.c b/fs/iomap.c
index 706270f..f4df9c6 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -27,9 +27,6 @@
 #include <linux/dax.h>
 #include "internal.h"
 
-typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
-		void *data, struct iomap *iomap);
-
 /*
  * Execute a iomap write on a segment of the mapping that spans a
  * contiguous range of pages that have identical block mapping state.
@@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
  * resources they require in the iomap_begin call, and release them in the
  * iomap_end call.
  */
-static loff_t
+loff_t
 iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
 		struct iomap_ops *ops, void *data, iomap_actor_t actor)
 {
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 03/10] dax: don't pass buffer_head to dax_insert_mapping
  2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34     ` Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
  2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

This way we can use this helper for the iomap based DAX implementation
as well.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 fs/dax.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 993dc6f..98463bb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -790,14 +790,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
 
 static int dax_insert_mapping(struct address_space *mapping,
-			struct buffer_head *bh, void **entryp,
-			struct vm_area_struct *vma, struct vm_fault *vmf)
+		struct block_device *bdev, sector_t sector, size_t size,
+		void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	unsigned long vaddr = (unsigned long)vmf->virtual_address;
-	struct block_device *bdev = bh->b_bdev;
 	struct blk_dax_ctl dax = {
-		.sector = to_sector(bh, mapping->host),
-		.size = bh->b_size,
+		.sector = sector,
+		.size = size,
 	};
 	void *ret;
 	void *entry = *entryp;
@@ -898,7 +897,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
 	/* Filesystem should not return unwritten buffers to us! */
 	WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
-	error = dax_insert_mapping(mapping, &bh, &entry, vma, vmf);
+	error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
+			bh.b_size, &entry, vma, vmf);
  unlock_entry:
 	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
  out:
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 03/10] dax: don't pass buffer_head to dax_insert_mapping
@ 2016-09-09 16:34     ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm

This way we can use this helper for the iomap based DAX implementation
as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/dax.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 993dc6f..98463bb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -790,14 +790,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
 
 static int dax_insert_mapping(struct address_space *mapping,
-			struct buffer_head *bh, void **entryp,
-			struct vm_area_struct *vma, struct vm_fault *vmf)
+		struct block_device *bdev, sector_t sector, size_t size,
+		void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	unsigned long vaddr = (unsigned long)vmf->virtual_address;
-	struct block_device *bdev = bh->b_bdev;
 	struct blk_dax_ctl dax = {
-		.sector = to_sector(bh, mapping->host),
-		.size = bh->b_size,
+		.sector = sector,
+		.size = size,
 	};
 	void *ret;
 	void *entry = *entryp;
@@ -898,7 +897,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
 	/* Filesystem should not return unwritten buffers to us! */
 	WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
-	error = dax_insert_mapping(mapping, &bh, &entry, vma, vmf);
+	error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
+			bh.b_size, &entry, vma, vmf);
  unlock_entry:
 	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
  out:
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 04/10] dax: don't pass buffer_head to copy_user_dax
  2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34     ` Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
  2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

This way we can use this helper for the iomap based DAX implementation
as well.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 fs/dax.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 98463bb..84343ce 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -580,14 +580,13 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
 	return VM_FAULT_LOCKED;
 }
 
-static int copy_user_bh(struct page *to, struct inode *inode,
-		struct buffer_head *bh, unsigned long vaddr)
+static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
+		struct page *to, unsigned long vaddr)
 {
 	struct blk_dax_ctl dax = {
-		.sector = to_sector(bh, inode),
-		.size = bh->b_size,
+		.sector = sector,
+		.size = size,
 	};
-	struct block_device *bdev = bh->b_bdev;
 	void *vto;
 
 	if (dax_map_atomic(bdev, &dax) < 0)
@@ -867,7 +866,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	if (vmf->cow_page) {
 		struct page *new_page = vmf->cow_page;
 		if (buffer_written(&bh))
-			error = copy_user_bh(new_page, inode, &bh, vaddr);
+			error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode),
+					bh.b_size, new_page, vaddr);
 		else
 			clear_user_highpage(new_page, vaddr);
 		if (error)
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 04/10] dax: don't pass buffer_head to copy_user_dax
@ 2016-09-09 16:34     ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm

This way we can use this helper for the iomap based DAX implementation
as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/dax.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 98463bb..84343ce 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -580,14 +580,13 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
 	return VM_FAULT_LOCKED;
 }
 
-static int copy_user_bh(struct page *to, struct inode *inode,
-		struct buffer_head *bh, unsigned long vaddr)
+static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
+		struct page *to, unsigned long vaddr)
 {
 	struct blk_dax_ctl dax = {
-		.sector = to_sector(bh, inode),
-		.size = bh->b_size,
+		.sector = sector,
+		.size = size,
 	};
-	struct block_device *bdev = bh->b_bdev;
 	void *vto;
 
 	if (dax_map_atomic(bdev, &dax) < 0)
@@ -867,7 +866,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	if (vmf->cow_page) {
 		struct page *new_page = vmf->cow_page;
 		if (buffer_written(&bh))
-			error = copy_user_bh(new_page, inode, &bh, vaddr);
+			error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode),
+					bh.b_size, new_page, vaddr);
 		else
 			clear_user_highpage(new_page, vaddr);
 		if (error)
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 05/10] dax: provide an iomap based dax read/write path
  2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34     ` Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
  2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

This is a much simpler implementation of the DAX read/write path that makes
use of the iomap infrastructure.  It does not try to mirror the direct I/O
calling conventions and thus doesn't have to deal with i_dio_count or the
end_io handler, but instead leaves locking and filesystem-specific I/O
completion to the caller.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 fs/dax.c              | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/iomap.h |   2 +
 2 files changed, 105 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index 84343ce..57ad456 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -31,6 +31,8 @@
 #include <linux/vmstat.h>
 #include <linux/pfn_t.h>
 #include <linux/sizes.h>
+#include <linux/iomap.h>
+#include "internal.h"
 
 /*
  * We use lowest available bit in exceptional entry for locking, other two
@@ -1241,3 +1243,104 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
 	return dax_zero_page_range(inode, from, length, get_block);
 }
 EXPORT_SYMBOL_GPL(dax_truncate_page);
+
+#ifdef CONFIG_FS_IOMAP
+static loff_t
+iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+		struct iomap *iomap)
+{
+	struct iov_iter *iter = data;
+	loff_t end = pos + length, done = 0;
+	ssize_t ret = 0;
+
+	if (iov_iter_rw(iter) == READ) {
+		end = min(end, i_size_read(inode));
+		if (pos >= end)
+			return 0;
+
+		if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
+			return iov_iter_zero(min(length, end - pos), iter);
+	}
+
+	if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
+		return -EIO;
+
+	while (pos < end) {
+		unsigned offset = pos & (PAGE_SIZE - 1);
+		struct blk_dax_ctl dax = { 0 };
+		ssize_t map_len;
+
+		dax.sector = iomap->blkno +
+			(((pos & PAGE_MASK) - iomap->offset) >> 9);
+		dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
+		map_len = dax_map_atomic(iomap->bdev, &dax);
+		if (map_len < 0) {
+			ret = map_len;
+			break;
+		}
+
+		dax.addr += offset;
+		map_len -= offset;
+		if (map_len > end - pos)
+			map_len = end - pos;
+
+		if (iov_iter_rw(iter) == WRITE)
+			map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
+		else
+			map_len = copy_to_iter(dax.addr, map_len, iter);
+		dax_unmap_atomic(iomap->bdev, &dax);
+		if (map_len <= 0) {
+			ret = map_len ? map_len : -EFAULT;
+			break;
+		}
+
+		pos += map_len;
+		length -= map_len;
+		done += map_len;
+	}
+
+	return done ? done : ret;
+}
+
+/**
+ * iomap_dax_rw - Perform I/O to a DAX file
+ * @iocb: The control block for this I/O
+ * @iter: The addresses to do I/O from or to
+ * @ops: iomap ops passed from the file system
+ *
+ * This funtions performs read and write operations to directly mapped
+ * persistent memory.  The callers needs to take care of read/write exclusion
+ * and evicting any page cache pages in the region under I/O.
+ */
+ssize_t
+iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+		struct iomap_ops *ops)
+{
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
+	loff_t pos = iocb->ki_pos, ret = 0, done = 0;
+	size_t count = iov_iter_count(iter);
+	unsigned flags = 0;
+
+	if (!count)
+		return 0;
+
+	if (iov_iter_rw(iter) == WRITE)
+		flags |= IOMAP_WRITE;
+
+	do {
+		ret = iomap_apply(inode, pos, count, flags, ops, iter,
+				  iomap_dax_actor);
+		if (ret <= 0)
+			break;
+		pos += ret;
+		done += ret;
+	} while ((count = iov_iter_count(iter)));
+
+	if (!done)
+		return ret;
+
+	iocb->ki_pos += done;
+	return done;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_rw);
+#endif /* CONFIG_FS_IOMAP */
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 14d7067..3d5f785 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -65,6 +65,8 @@ struct iomap_ops {
 
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		struct iomap_ops *ops);
+ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+		struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
 		bool *did_zero, struct iomap_ops *ops);
 int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 05/10] dax: provide an iomap based dax read/write path
@ 2016-09-09 16:34     ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm

This is a much simpler implementation of the DAX read/write path that makes
use of the iomap infrastructure.  It does not try to mirror the direct I/O
calling conventions and thus doesn't have to deal with i_dio_count or the
end_io handler, but instead leaves locking and filesystem-specific I/O
completion to the caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/dax.c              | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/iomap.h |   2 +
 2 files changed, 105 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index 84343ce..57ad456 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -31,6 +31,8 @@
 #include <linux/vmstat.h>
 #include <linux/pfn_t.h>
 #include <linux/sizes.h>
+#include <linux/iomap.h>
+#include "internal.h"
 
 /*
  * We use lowest available bit in exceptional entry for locking, other two
@@ -1241,3 +1243,104 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
 	return dax_zero_page_range(inode, from, length, get_block);
 }
 EXPORT_SYMBOL_GPL(dax_truncate_page);
+
+#ifdef CONFIG_FS_IOMAP
+static loff_t
+iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+		struct iomap *iomap)
+{
+	struct iov_iter *iter = data;
+	loff_t end = pos + length, done = 0;
+	ssize_t ret = 0;
+
+	if (iov_iter_rw(iter) == READ) {
+		end = min(end, i_size_read(inode));
+		if (pos >= end)
+			return 0;
+
+		if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
+			return iov_iter_zero(min(length, end - pos), iter);
+	}
+
+	if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
+		return -EIO;
+
+	while (pos < end) {
+		unsigned offset = pos & (PAGE_SIZE - 1);
+		struct blk_dax_ctl dax = { 0 };
+		ssize_t map_len;
+
+		dax.sector = iomap->blkno +
+			(((pos & PAGE_MASK) - iomap->offset) >> 9);
+		dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
+		map_len = dax_map_atomic(iomap->bdev, &dax);
+		if (map_len < 0) {
+			ret = map_len;
+			break;
+		}
+
+		dax.addr += offset;
+		map_len -= offset;
+		if (map_len > end - pos)
+			map_len = end - pos;
+
+		if (iov_iter_rw(iter) == WRITE)
+			map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
+		else
+			map_len = copy_to_iter(dax.addr, map_len, iter);
+		dax_unmap_atomic(iomap->bdev, &dax);
+		if (map_len <= 0) {
+			ret = map_len ? map_len : -EFAULT;
+			break;
+		}
+
+		pos += map_len;
+		length -= map_len;
+		done += map_len;
+	}
+
+	return done ? done : ret;
+}
+
+/**
+ * iomap_dax_rw - Perform I/O to a DAX file
+ * @iocb: The control block for this I/O
+ * @iter: The addresses to do I/O from or to
+ * @ops: iomap ops passed from the file system
+ *
+ * This funtions performs read and write operations to directly mapped
+ * persistent memory.  The callers needs to take care of read/write exclusion
+ * and evicting any page cache pages in the region under I/O.
+ */
+ssize_t
+iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+		struct iomap_ops *ops)
+{
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
+	loff_t pos = iocb->ki_pos, ret = 0, done = 0;
+	size_t count = iov_iter_count(iter);
+	unsigned flags = 0;
+
+	if (!count)
+		return 0;
+
+	if (iov_iter_rw(iter) == WRITE)
+		flags |= IOMAP_WRITE;
+
+	do {
+		ret = iomap_apply(inode, pos, count, flags, ops, iter,
+				  iomap_dax_actor);
+		if (ret <= 0)
+			break;
+		pos += ret;
+		done += ret;
+	} while ((count = iov_iter_count(iter)));
+
+	if (!done)
+		return ret;
+
+	iocb->ki_pos += done;
+	return done;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_rw);
+#endif /* CONFIG_FS_IOMAP */
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 14d7067..3d5f785 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -65,6 +65,8 @@ struct iomap_ops {
 
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		struct iomap_ops *ops);
+ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+		struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
 		bool *did_zero, struct iomap_ops *ops);
 int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34     ` Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
  2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

Very similar to the existing dax_fault function, but instead of using
the get_block callback we rely on the iomap_ops vector from iomap.c.
That also avoids having to do two calls into the file system for write
faults.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 fs/dax.c              | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/iomap.h |   2 +
 2 files changed, 115 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index 57ad456..a170a94 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1343,4 +1343,117 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
 	return done;
 }
 EXPORT_SYMBOL_GPL(iomap_dax_rw);
+
+/**
+ * iomap_dax_fault - handle a page fault on a DAX file
+ * @vma: The virtual memory area where the fault occurred
+ * @vmf: The description of the fault
+ * @ops: iomap ops passed from the file system
+ *
+ * When a page fault occurs, filesystems may call this helper in their fault
+ * or mkwrite handler for DAX files. Sssumes the caller has done all the
+ * necessary locking for the page fault to proceed successfully.
+ */
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+			struct iomap_ops *ops)
+{
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	struct inode *inode = mapping->host;
+	unsigned long vaddr = (unsigned long)vmf->virtual_address;
+	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
+	sector_t sector;
+	struct iomap iomap = { 0 };
+	unsigned flags = 0;
+	int error, major = 0;
+	void *entry;
+
+	/*
+	 * Check whether offset isn't beyond end of file now. Caller is supposed
+	 * to hold locks serializing us with truncate / punch hole so this is
+	 * a reliable test.
+	 */
+	if (pos >= i_size_read(inode))
+		return VM_FAULT_SIGBUS;
+
+	entry = grab_mapping_entry(mapping, vmf->pgoff);
+	if (IS_ERR(entry)) {
+		error = PTR_ERR(entry);
+		goto out;
+	}
+
+	if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
+		flags |= IOMAP_WRITE;
+
+	/*
+	 * Note that we don't bother to use iomap_apply here: DAX required
+	 * the file system block size to be equal the page size, which means
+	 * that we never have to deal with more than a single extent here.
+	 */
+	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
+	if (error)
+		goto unlock_entry;
+	if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
+		error = -EIO;		/* fs corruption? */
+		goto unlock_entry;
+	}
+
+	sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9);
+
+	if (vmf->cow_page) {
+		switch (iomap.type) {
+		case IOMAP_HOLE:
+		case IOMAP_UNWRITTEN:
+			clear_user_highpage(vmf->cow_page, vaddr);
+			break;
+		case IOMAP_MAPPED:
+			error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
+					vmf->cow_page, vaddr);
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			error = -EIO;
+			break;
+		}
+
+		if (error)
+			goto unlock_entry;
+		if (!radix_tree_exceptional_entry(entry)) {
+			vmf->page = entry;
+			return VM_FAULT_LOCKED;
+		}
+		vmf->entry = entry;
+		return VM_FAULT_DAX_LOCKED;
+	}
+
+	switch (iomap.type) {
+	case IOMAP_MAPPED:
+		if (iomap.flags & IOMAP_F_NEW) {
+			count_vm_event(PGMAJFAULT);
+			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+			major = VM_FAULT_MAJOR;
+		}
+		break;
+	case IOMAP_UNWRITTEN:
+	case IOMAP_HOLE:
+		if (!(vmf->flags & FAULT_FLAG_WRITE))
+			return dax_load_hole(mapping, entry, vmf);
+	default:
+		WARN_ON_ONCE(1);
+		error = -EIO;
+	}
+
+	/* Filesystem should not return unwritten buffers to us! */
+	error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE,
+			&entry, vma, vmf);
+unlock_entry:
+	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+out:
+	if (error == -ENOMEM)
+		return VM_FAULT_OOM | major;
+	/* -EBUSY is fine, somebody else faulted on the same PTE */
+	if (error < 0 && error != -EBUSY)
+		return VM_FAULT_SIGBUS | major;
+	return VM_FAULT_NOPAGE | major;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_fault);
 #endif /* CONFIG_FS_IOMAP */
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3d5f785..a4ef953 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -73,6 +73,8 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 		struct iomap_ops *ops);
 int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 		struct iomap_ops *ops);
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+			struct iomap_ops *ops);
 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		loff_t start, loff_t len, struct iomap_ops *ops);
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-09 16:34     ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm

Very similar to the existing dax_fault function, but instead of using
the get_block callback we rely on the iomap_ops vector from iomap.c.
That also avoids having to do two calls into the file system for write
faults.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/dax.c              | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/iomap.h |   2 +
 2 files changed, 115 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index 57ad456..a170a94 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1343,4 +1343,117 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
 	return done;
 }
 EXPORT_SYMBOL_GPL(iomap_dax_rw);
+
+/**
+ * iomap_dax_fault - handle a page fault on a DAX file
+ * @vma: The virtual memory area where the fault occurred
+ * @vmf: The description of the fault
+ * @ops: iomap ops passed from the file system
+ *
+ * When a page fault occurs, filesystems may call this helper in their fault
+ * or mkwrite handler for DAX files. Sssumes the caller has done all the
+ * necessary locking for the page fault to proceed successfully.
+ */
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+			struct iomap_ops *ops)
+{
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	struct inode *inode = mapping->host;
+	unsigned long vaddr = (unsigned long)vmf->virtual_address;
+	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
+	sector_t sector;
+	struct iomap iomap = { 0 };
+	unsigned flags = 0;
+	int error, major = 0;
+	void *entry;
+
+	/*
+	 * Check whether offset isn't beyond end of file now. Caller is supposed
+	 * to hold locks serializing us with truncate / punch hole so this is
+	 * a reliable test.
+	 */
+	if (pos >= i_size_read(inode))
+		return VM_FAULT_SIGBUS;
+
+	entry = grab_mapping_entry(mapping, vmf->pgoff);
+	if (IS_ERR(entry)) {
+		error = PTR_ERR(entry);
+		goto out;
+	}
+
+	if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
+		flags |= IOMAP_WRITE;
+
+	/*
+	 * Note that we don't bother to use iomap_apply here: DAX required
+	 * the file system block size to be equal the page size, which means
+	 * that we never have to deal with more than a single extent here.
+	 */
+	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
+	if (error)
+		goto unlock_entry;
+	if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
+		error = -EIO;		/* fs corruption? */
+		goto unlock_entry;
+	}
+
+	sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9);
+
+	if (vmf->cow_page) {
+		switch (iomap.type) {
+		case IOMAP_HOLE:
+		case IOMAP_UNWRITTEN:
+			clear_user_highpage(vmf->cow_page, vaddr);
+			break;
+		case IOMAP_MAPPED:
+			error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
+					vmf->cow_page, vaddr);
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			error = -EIO;
+			break;
+		}
+
+		if (error)
+			goto unlock_entry;
+		if (!radix_tree_exceptional_entry(entry)) {
+			vmf->page = entry;
+			return VM_FAULT_LOCKED;
+		}
+		vmf->entry = entry;
+		return VM_FAULT_DAX_LOCKED;
+	}
+
+	switch (iomap.type) {
+	case IOMAP_MAPPED:
+		if (iomap.flags & IOMAP_F_NEW) {
+			count_vm_event(PGMAJFAULT);
+			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+			major = VM_FAULT_MAJOR;
+		}
+		break;
+	case IOMAP_UNWRITTEN:
+	case IOMAP_HOLE:
+		if (!(vmf->flags & FAULT_FLAG_WRITE))
+			return dax_load_hole(mapping, entry, vmf);
+	default:
+		WARN_ON_ONCE(1);
+		error = -EIO;
+	}
+
+	/* Filesystem should not return unwritten buffers to us! */
+	error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE,
+			&entry, vma, vmf);
+unlock_entry:
+	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+out:
+	if (error == -ENOMEM)
+		return VM_FAULT_OOM | major;
+	/* -EBUSY is fine, somebody else faulted on the same PTE */
+	if (error < 0 && error != -EBUSY)
+		return VM_FAULT_SIGBUS | major;
+	return VM_FAULT_NOPAGE | major;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_fault);
 #endif /* CONFIG_FS_IOMAP */
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3d5f785..a4ef953 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -73,6 +73,8 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 		struct iomap_ops *ops);
 int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 		struct iomap_ops *ops);
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+			struct iomap_ops *ops);
 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		loff_t start, loff_t len, struct iomap_ops *ops);
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 07/10] xfs: fix locking for DAX writes
  2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34     ` Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
  2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

So far DAX writes inherited the locking from direct I/O writes, but the direct
I/O model of using shared locks for writes is actually wrong for DAX.  For
direct I/O we're out of any standards and don't have to provide the Posix
required exclusion between writers, but for DAX which gets transparently
enable on applications without any knowledge of it we can't simply drop the
requirement.  Even worse this only happens for aligned writes and thus
doesn't show up for many typical use cases.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 fs/xfs/xfs_file.c | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e612a02..62649cc 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -714,24 +714,11 @@ xfs_file_dax_write(
 	struct address_space	*mapping = iocb->ki_filp->f_mapping;
 	struct inode		*inode = mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			ret = 0;
-	int			unaligned_io = 0;
-	int			iolock;
+	int			iolock = XFS_IOLOCK_EXCL;
 	struct iov_iter		data;
 
-	/* "unaligned" here means not aligned to a filesystem block */
-	if ((iocb->ki_pos & mp->m_blockmask) ||
-	    ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
-		unaligned_io = 1;
-		iolock = XFS_IOLOCK_EXCL;
-	} else if (mapping->nrpages) {
-		iolock = XFS_IOLOCK_EXCL;
-	} else {
-		iolock = XFS_IOLOCK_SHARED;
-	}
 	xfs_rw_ilock(ip, iolock);
-
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out;
@@ -758,11 +745,6 @@ xfs_file_dax_write(
 		WARN_ON_ONCE(ret);
 	}
 
-	if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
-		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-		iolock = XFS_IOLOCK_SHARED;
-	}
-
 	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
 
 	data = *from;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 07/10] xfs: fix locking for DAX writes
@ 2016-09-09 16:34     ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm

So far DAX writes inherited the locking from direct I/O writes, but the direct
I/O model of using shared locks for writes is actually wrong for DAX.  For
direct I/O we're out of any standards and don't have to provide the Posix
required exclusion between writers, but for DAX which gets transparently
enable on applications without any knowledge of it we can't simply drop the
requirement.  Even worse this only happens for aligned writes and thus
doesn't show up for many typical use cases.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_file.c | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e612a02..62649cc 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -714,24 +714,11 @@ xfs_file_dax_write(
 	struct address_space	*mapping = iocb->ki_filp->f_mapping;
 	struct inode		*inode = mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			ret = 0;
-	int			unaligned_io = 0;
-	int			iolock;
+	int			iolock = XFS_IOLOCK_EXCL;
 	struct iov_iter		data;
 
-	/* "unaligned" here means not aligned to a filesystem block */
-	if ((iocb->ki_pos & mp->m_blockmask) ||
-	    ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
-		unaligned_io = 1;
-		iolock = XFS_IOLOCK_EXCL;
-	} else if (mapping->nrpages) {
-		iolock = XFS_IOLOCK_EXCL;
-	} else {
-		iolock = XFS_IOLOCK_SHARED;
-	}
 	xfs_rw_ilock(ip, iolock);
-
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out;
@@ -758,11 +745,6 @@ xfs_file_dax_write(
 		WARN_ON_ONCE(ret);
 	}
 
-	if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
-		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-		iolock = XFS_IOLOCK_SHARED;
-	}
-
 	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
 
 	data = *from;
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 08/10] xfs: take the ilock shared if possible in xfs_file_iomap_begin
  2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34     ` Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
  2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
  2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

We always just read the extent first, and will later lock exlusively
after first dropping the lock in case we actually allocate blocks.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 fs/xfs/xfs_iomap.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5d06a2d..c3cc175 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -955,6 +955,7 @@ xfs_file_iomap_begin(
 	struct xfs_bmbt_irec	imap;
 	xfs_fileoff_t		offset_fsb, end_fsb;
 	int			nimaps = 1, error = 0;
+	unsigned		lockmode;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
@@ -964,7 +965,7 @@ xfs_file_iomap_begin(
 				iomap);
 	}
 
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	lockmode = xfs_ilock_data_map_shared(ip);
 
 	ASSERT(offset <= mp->m_super->s_maxbytes);
 	if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
@@ -975,7 +976,7 @@ xfs_file_iomap_begin(
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
 			       &nimaps, XFS_BMAPI_ENTIRE);
 	if (error) {
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_iunlock(ip, lockmode);
 		return error;
 	}
 
@@ -995,7 +996,8 @@ xfs_file_iomap_begin(
 		 * xfs_iomap_write_direct() expects the shared lock. It
 		 * is unlocked on return.
 		 */
-		xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+		if (lockmode == XFS_ILOCK_EXCL)
+			xfs_ilock_demote(ip, lockmode);
 		error = xfs_iomap_write_direct(ip, offset, length, &imap,
 				nimaps);
 		if (error)
@@ -1006,7 +1008,7 @@ xfs_file_iomap_begin(
 	} else {
 		ASSERT(nimaps);
 
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_iunlock(ip, lockmode);
 		trace_xfs_iomap_found(ip, offset, length, 0, &imap);
 	}
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 08/10] xfs: take the ilock shared if possible in xfs_file_iomap_begin
@ 2016-09-09 16:34     ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm

We always just read the extent first, and will later lock exlusively
after first dropping the lock in case we actually allocate blocks.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_iomap.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5d06a2d..c3cc175 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -955,6 +955,7 @@ xfs_file_iomap_begin(
 	struct xfs_bmbt_irec	imap;
 	xfs_fileoff_t		offset_fsb, end_fsb;
 	int			nimaps = 1, error = 0;
+	unsigned		lockmode;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
@@ -964,7 +965,7 @@ xfs_file_iomap_begin(
 				iomap);
 	}
 
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	lockmode = xfs_ilock_data_map_shared(ip);
 
 	ASSERT(offset <= mp->m_super->s_maxbytes);
 	if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
@@ -975,7 +976,7 @@ xfs_file_iomap_begin(
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
 			       &nimaps, XFS_BMAPI_ENTIRE);
 	if (error) {
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_iunlock(ip, lockmode);
 		return error;
 	}
 
@@ -995,7 +996,8 @@ xfs_file_iomap_begin(
 		 * xfs_iomap_write_direct() expects the shared lock. It
 		 * is unlocked on return.
 		 */
-		xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+		if (lockmode == XFS_ILOCK_EXCL)
+			xfs_ilock_demote(ip, lockmode);
 		error = xfs_iomap_write_direct(ip, offset, length, &imap,
 				nimaps);
 		if (error)
@@ -1006,7 +1008,7 @@ xfs_file_iomap_begin(
 	} else {
 		ASSERT(nimaps);
 
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_iunlock(ip, lockmode);
 		trace_xfs_iomap_found(ip, offset, length, 0, &imap);
 	}
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 09/10] xfs: refactor xfs_setfilesize
  2016-09-09 16:34 iomap based DAX path Christoph Hellwig
       [not found] ` <1473438884-674-1-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
@ 2016-09-09 16:34 ` Christoph Hellwig
       [not found]   ` <1473438884-674-10-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
  2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
  2 siblings, 1 reply; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm

Rename the current function to __xfs_setfilesize and add a non-static
wrapper that also takes care of creating the transaction.  This new
helper will be used by the new iomap-based DAX path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c | 33 ++++++++++++++++++++++-----------
 fs/xfs/xfs_aops.h |  1 +
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 7575cfc..0bd3e27 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -199,8 +199,8 @@ xfs_setfilesize_trans_alloc(
 /*
  * Update on-disk file size now that data has been written to disk.
  */
-STATIC int
-xfs_setfilesize(
+int
+__xfs_setfilesize(
 	struct xfs_inode	*ip,
 	struct xfs_trans	*tp,
 	xfs_off_t		offset,
@@ -225,6 +225,23 @@ xfs_setfilesize(
 	return xfs_trans_commit(tp);
 }
 
+int
+xfs_setfilesize(
+	struct xfs_inode	*ip,
+	xfs_off_t		offset,
+	size_t			size)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_trans	*tp;
+	int			error;
+
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
+	if (error)
+		return error;
+
+	return __xfs_setfilesize(ip, tp, offset, size);
+}
+
 STATIC int
 xfs_setfilesize_ioend(
 	struct xfs_ioend	*ioend,
@@ -247,7 +264,7 @@ xfs_setfilesize_ioend(
 		return error;
 	}
 
-	return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
+	return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
 }
 
 /*
@@ -1336,13 +1353,12 @@ xfs_end_io_direct_write(
 {
 	struct inode		*inode = file_inode(iocb->ki_filp);
 	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
 	uintptr_t		flags = (uintptr_t)private;
 	int			error = 0;
 
 	trace_xfs_end_io_direct_write(ip, offset, size);
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		return -EIO;
 
 	if (size <= 0)
@@ -1380,14 +1396,9 @@ xfs_end_io_direct_write(
 
 		error = xfs_iomap_write_unwritten(ip, offset, size);
 	} else if (flags & XFS_DIO_FLAG_APPEND) {
-		struct xfs_trans *tp;
-
 		trace_xfs_end_io_direct_write_append(ip, offset, size);
 
-		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0,
-				&tp);
-		if (!error)
-			error = xfs_setfilesize(ip, tp, offset, size);
+		error = xfs_setfilesize(ip, offset, size);
 	}
 
 	return error;
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index bf2d9a1..1950e3b 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -62,6 +62,7 @@ int	xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
 
 int	xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset,
 		ssize_t size, void *private);
+int	xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
 
 extern void xfs_count_page_state(struct page *, int *, int *);
 extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 10/10] xfs: use iomap to implement DAX
  2016-09-09 16:34 iomap based DAX path Christoph Hellwig
       [not found] ` <1473438884-674-1-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
  2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
@ 2016-09-09 16:34 ` Christoph Hellwig
  2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm

Another users of buffer_heads bytes the dust.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_file.c  | 44 +++++++++++++++++++-------------------------
 fs/xfs/xfs_iomap.c | 11 +++++++----
 2 files changed, 26 insertions(+), 29 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 62649cc..b5aef20 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -332,10 +332,7 @@ xfs_file_dax_read(
 	struct kiocb		*iocb,
 	struct iov_iter		*to)
 {
-	struct address_space	*mapping = iocb->ki_filp->f_mapping;
-	struct inode		*inode = mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct iov_iter		data = *to;
+	struct xfs_inode	*ip = XFS_I(iocb->ki_filp->f_mapping->host);
 	size_t			count = iov_iter_count(to);
 	ssize_t			ret = 0;
 
@@ -345,11 +342,7 @@ xfs_file_dax_read(
 		return 0; /* skip atime */
 
 	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
-	if (ret > 0) {
-		iocb->ki_pos += ret;
-		iov_iter_advance(to, ret);
-	}
+	ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops);
 	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 
 	file_accessed(iocb->ki_filp);
@@ -714,15 +707,19 @@ xfs_file_dax_write(
 	struct address_space	*mapping = iocb->ki_filp->f_mapping;
 	struct inode		*inode = mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
-	ssize_t			ret = 0;
 	int			iolock = XFS_IOLOCK_EXCL;
-	struct iov_iter		data;
+	ssize_t			ret, error = 0;
+	size_t			count;
+	loff_t			pos;
 
 	xfs_rw_ilock(ip, iolock);
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out;
 
+	pos = iocb->ki_pos;
+	count = iov_iter_count(from);
+
 	/*
 	 * Yes, even DAX files can have page cache attached to them:  A zeroed
 	 * page is inserted into the pagecache when we have to serve a write
@@ -737,26 +734,23 @@ xfs_file_dax_write(
 	 * dax_do_io() is fixed up.
 	 */
 	if (mapping->nrpages) {
-		loff_t end = iocb->ki_pos + iov_iter_count(from) - 1;
-
 		ret = invalidate_inode_pages2_range(mapping,
-						    iocb->ki_pos >> PAGE_SHIFT,
-						    end >> PAGE_SHIFT);
+				pos >> PAGE_SHIFT,
+				(pos + count - 1) >> PAGE_SHIFT);
 		WARN_ON_ONCE(ret);
 	}
 
-	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
+	trace_xfs_file_dax_write(ip, count, pos);
 
-	data = *from;
-	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
-			xfs_end_io_direct_write, 0);
-	if (ret > 0) {
-		iocb->ki_pos += ret;
-		iov_iter_advance(from, ret);
+	ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops);
+	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
+		i_size_write(inode, iocb->ki_pos);
+		error = xfs_setfilesize(ip, pos, count);
 	}
+
 out:
 	xfs_rw_iunlock(ip, iolock);
-	return ret;
+	return error ? error : ret;
 }
 
 STATIC ssize_t
@@ -1495,7 +1489,7 @@ xfs_filemap_page_mkwrite(
 	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 
 	if (IS_DAX(inode)) {
-		ret = dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
+		ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
 	} else {
 		ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
 		ret = block_page_mkwrite_return(ret);
@@ -1529,7 +1523,7 @@ xfs_filemap_fault(
 		 * changes to xfs_get_blocks_direct() to map unwritten extent
 		 * ioend for conversion on read-only mappings.
 		 */
-		ret = dax_fault(vma, vmf, xfs_get_blocks_dax_fault);
+		ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
 	} else
 		ret = filemap_fault(vma, vmf);
 	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index c3cc175..23f7811 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -935,11 +935,13 @@ error_on_bmapi_transaction:
 	return error;
 }
 
-static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps)
+static inline bool imap_needs_alloc(struct inode *inode,
+		struct xfs_bmbt_irec *imap, int nimaps)
 {
 	return !nimaps ||
 		imap->br_startblock == HOLESTARTBLOCK ||
-		imap->br_startblock == DELAYSTARTBLOCK;
+		imap->br_startblock == DELAYSTARTBLOCK ||
+		(IS_DAX(inode) && ISUNWRITTEN(imap));
 }
 
 static int
@@ -960,7 +962,8 @@ xfs_file_iomap_begin(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	if ((flags & IOMAP_WRITE) && !xfs_get_extsz_hint(ip)) {
+	if ((flags & IOMAP_WRITE) &&
+	    !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
 		return xfs_file_iomap_begin_delay(inode, offset, length, flags,
 				iomap);
 	}
@@ -980,7 +983,7 @@ xfs_file_iomap_begin(
 		return error;
 	}
 
-	if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) {
+	if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
 		/*
 		 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
 		 * pages to keep the chunks of work done where somewhat symmetric
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-09 16:34     ` Christoph Hellwig
@ 2016-09-09 22:55         ` Dave Chinner
  -1 siblings, 0 replies; 58+ messages in thread
From: Dave Chinner @ 2016-09-09 22:55 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Fri, Sep 09, 2016 at 06:34:40PM +0200, Christoph Hellwig wrote:
> Very similar to the existing dax_fault function, but instead of using
> the get_block callback we rely on the iomap_ops vector from iomap.c.
> That also avoids having to do two calls into the file system for write
> faults.
> 
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>

Just noticed this on a quick initial browse...

> +	if (vmf->cow_page) {
> +		switch (iomap.type) {
> +		case IOMAP_HOLE:
> +		case IOMAP_UNWRITTEN:
> +			clear_user_highpage(vmf->cow_page, vaddr);
> +			break;
> +		case IOMAP_MAPPED:
> +			error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
> +					vmf->cow_page, vaddr);
> +			break;
> +		default:
> +			WARN_ON_ONCE(1);
> +			error = -EIO;
> +			break;
> +		}
> +
> +		if (error)
> +			goto unlock_entry;
> +		if (!radix_tree_exceptional_entry(entry)) {
> +			vmf->page = entry;
> +			return VM_FAULT_LOCKED;
> +		}
> +		vmf->entry = entry;
> +		return VM_FAULT_DAX_LOCKED;
> +	}
> +
> +	switch (iomap.type) {
> +	case IOMAP_MAPPED:
> +		if (iomap.flags & IOMAP_F_NEW) {
> +			count_vm_event(PGMAJFAULT);
> +			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
> +			major = VM_FAULT_MAJOR;
> +		}
> +		break;
> +	case IOMAP_UNWRITTEN:
> +	case IOMAP_HOLE:
> +		if (!(vmf->flags & FAULT_FLAG_WRITE))
> +			return dax_load_hole(mapping, entry, vmf);
> +	default:
> +		WARN_ON_ONCE(1);
> +		error = -EIO;
> +	}

THe errors from the above two cases are not acted on. they are
immediately overwritten by:

> +
> +	/* Filesystem should not return unwritten buffers to us! */
> +	error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE,
> +			&entry, vma, vmf);
> +unlock_entry:
> +	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
> +out:
> +	if (error == -ENOMEM)
> +		return VM_FAULT_OOM | major;
> +	/* -EBUSY is fine, somebody else faulted on the same PTE */
> +	if (error < 0 && error != -EBUSY)
> +		return VM_FAULT_SIGBUS | major;
> +	return VM_FAULT_NOPAGE | major;
> +}

Is there a missing "if (error) goto out;" check somewhere here?

I'm also wondering if you've looked at supporting the PMD fault case
with iomap?

Cheers,

Dave.
-- 
Dave Chinner
david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-09 22:55         ` Dave Chinner
  0 siblings, 0 replies; 58+ messages in thread
From: Dave Chinner @ 2016-09-09 22:55 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm

On Fri, Sep 09, 2016 at 06:34:40PM +0200, Christoph Hellwig wrote:
> Very similar to the existing dax_fault function, but instead of using
> the get_block callback we rely on the iomap_ops vector from iomap.c.
> That also avoids having to do two calls into the file system for write
> faults.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Just noticed this on a quick initial browse...

> +	if (vmf->cow_page) {
> +		switch (iomap.type) {
> +		case IOMAP_HOLE:
> +		case IOMAP_UNWRITTEN:
> +			clear_user_highpage(vmf->cow_page, vaddr);
> +			break;
> +		case IOMAP_MAPPED:
> +			error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
> +					vmf->cow_page, vaddr);
> +			break;
> +		default:
> +			WARN_ON_ONCE(1);
> +			error = -EIO;
> +			break;
> +		}
> +
> +		if (error)
> +			goto unlock_entry;
> +		if (!radix_tree_exceptional_entry(entry)) {
> +			vmf->page = entry;
> +			return VM_FAULT_LOCKED;
> +		}
> +		vmf->entry = entry;
> +		return VM_FAULT_DAX_LOCKED;
> +	}
> +
> +	switch (iomap.type) {
> +	case IOMAP_MAPPED:
> +		if (iomap.flags & IOMAP_F_NEW) {
> +			count_vm_event(PGMAJFAULT);
> +			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
> +			major = VM_FAULT_MAJOR;
> +		}
> +		break;
> +	case IOMAP_UNWRITTEN:
> +	case IOMAP_HOLE:
> +		if (!(vmf->flags & FAULT_FLAG_WRITE))
> +			return dax_load_hole(mapping, entry, vmf);
> +	default:
> +		WARN_ON_ONCE(1);
> +		error = -EIO;
> +	}

THe errors from the above two cases are not acted on. they are
immediately overwritten by:

> +
> +	/* Filesystem should not return unwritten buffers to us! */
> +	error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE,
> +			&entry, vma, vmf);
> +unlock_entry:
> +	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
> +out:
> +	if (error == -ENOMEM)
> +		return VM_FAULT_OOM | major;
> +	/* -EBUSY is fine, somebody else faulted on the same PTE */
> +	if (error < 0 && error != -EBUSY)
> +		return VM_FAULT_SIGBUS | major;
> +	return VM_FAULT_NOPAGE | major;
> +}

Is there a missing "if (error) goto out;" check somewhere here?

I'm also wondering if you've looked at supporting the PMD fault case
with iomap?

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 58+ messages in thread

* RE: [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-09 16:34     ` Christoph Hellwig
@ 2016-09-10  1:38         ` Elliott, Robert (Persistent Memory)
  -1 siblings, 0 replies; 58+ messages in thread
From: Elliott, Robert (Persistent Memory) @ 2016-09-10  1:38 UTC (permalink / raw)
  To: Christoph Hellwig, linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

> -----Original Message-----
> From: Linux-nvdimm [mailto:linux-nvdimm-bounces-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org] On
> Behalf Of Christoph Hellwig
> Sent: Friday, September 09, 2016 11:35 AM
> Subject: [PATCH 06/10] dax: provide an iomap based fault handler
> 
...
> diff --git a/fs/dax.c b/fs/dax.c
> @@ -1343,4 +1343,117 @@ iomap_dax_rw(struct kiocb *iocb, struct
...
> +	default:
> +		WARN_ON_ONCE(1);
> +		error = -EIO;
> +	}
> +
> +	/* Filesystem should not return unwritten buffers to us! */
> +	error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE,
> +			&entry, vma, vmf);

That -EIO value would be immediately overwritten.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* RE: [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-10  1:38         ` Elliott, Robert (Persistent Memory)
  0 siblings, 0 replies; 58+ messages in thread
From: Elliott, Robert (Persistent Memory) @ 2016-09-10  1:38 UTC (permalink / raw)
  To: Christoph Hellwig, linux-xfs, linux-fsdevel, linux-nvdimm

> -----Original Message-----
> From: Linux-nvdimm [mailto:linux-nvdimm-bounces@lists.01.org] On
> Behalf Of Christoph Hellwig
> Sent: Friday, September 09, 2016 11:35 AM
> Subject: [PATCH 06/10] dax: provide an iomap based fault handler
> 
...
> diff --git a/fs/dax.c b/fs/dax.c
> @@ -1343,4 +1343,117 @@ iomap_dax_rw(struct kiocb *iocb, struct
...
> +	default:
> +		WARN_ON_ONCE(1);
> +		error = -EIO;
> +	}
> +
> +	/* Filesystem should not return unwritten buffers to us! */
> +	error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE,
> +			&entry, vma, vmf);

That -EIO value would be immediately overwritten.



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-09 22:55         ` Dave Chinner
@ 2016-09-10  7:36           ` Christoph Hellwig
  -1 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-10  7:36 UTC (permalink / raw)
  To: Dave Chinner
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Sat, Sep 10, 2016 at 08:55:57AM +1000, Dave Chinner wrote:
> THe errors from the above two cases are not acted on. they are
> immediately overwritten by:

Yes, Robert also pointed this out.  Fix below.

> Is there a missing "if (error) goto out;" check somewhere here?

Just the one above.

> I'm also wondering if you've looked at supporting the PMD fault case
> with iomap?

PMD faults currently don't work at all.  Ross has a series to resurrect
them, but we'll need to coordinate between the two series somehow.  My
preference would be to not resurrect them for the bh path and only do
it for the iomap version.


diff --git a/fs/dax.c b/fs/dax.c
index a170a94..5534594 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1440,6 +1440,7 @@ int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	default:
 		WARN_ON_ONCE(1);
 		error = -EIO;
+		goto unlock_entry;
 	}
 
 	/* Filesystem should not return unwritten buffers to us! */

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-10  7:36           ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-10  7:36 UTC (permalink / raw)
  To: Dave Chinner
  Cc: Christoph Hellwig, linux-xfs, linux-fsdevel, linux-nvdimm, elliott

On Sat, Sep 10, 2016 at 08:55:57AM +1000, Dave Chinner wrote:
> THe errors from the above two cases are not acted on. they are
> immediately overwritten by:

Yes, Robert also pointed this out.  Fix below.

> Is there a missing "if (error) goto out;" check somewhere here?

Just the one above.

> I'm also wondering if you've looked at supporting the PMD fault case
> with iomap?

PMD faults currently don't work at all.  Ross has a series to resurrect
them, but we'll need to coordinate between the two series somehow.  My
preference would be to not resurrect them for the bh path and only do
it for the iomap version.


diff --git a/fs/dax.c b/fs/dax.c
index a170a94..5534594 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1440,6 +1440,7 @@ int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	default:
 		WARN_ON_ONCE(1);
 		error = -EIO;
+		goto unlock_entry;
 	}
 
 	/* Filesystem should not return unwritten buffers to us! */

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-10  7:36           ` Christoph Hellwig
  (?)
@ 2016-09-13 15:51           ` Ross Zwisler
       [not found]             ` <20160913155126.GA10622-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
  -1 siblings, 1 reply; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 15:51 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Dave Chinner, linux-xfs, linux-fsdevel, linux-nvdimm

On Sat, Sep 10, 2016 at 09:36:46AM +0200, Christoph Hellwig wrote:
> On Sat, Sep 10, 2016 at 08:55:57AM +1000, Dave Chinner wrote:
> > THe errors from the above two cases are not acted on. they are
> > immediately overwritten by:
> 
> Yes, Robert also pointed this out.  Fix below.
> 
> > Is there a missing "if (error) goto out;" check somewhere here?
> 
> Just the one above.
> 
> > I'm also wondering if you've looked at supporting the PMD fault case
> > with iomap?
> 
> PMD faults currently don't work at all.  Ross has a series to resurrect
> them, but we'll need to coordinate between the two series somehow.  My
> preference would be to not resurrect them for the bh path and only do
> it for the iomap version.

I'm working on this right now.  I expect that most/all of the infrastructure
between the bh+get_block_t version and the iomap version to be shared, it'll
just be a matter of having a PMD version of the iomap fault handler.  This
should be pretty minor.

Let's see how it goes, but right now my plan is to have both - I'd like to
keep feature parity between ext2/ext4 and XFS, and that means having PMD
faults in ext4 via bh+get_block_t until they move over to iomap.

Regarding coordination, the PMD v2 series hasn't gotten much review so far, so
I'm not sure it'll go in for v4.9.  At this point I'm planning on just
rebasing on top of your iomap series, though if it gets taken sooner I
wouldn't object.

> diff --git a/fs/dax.c b/fs/dax.c
> index a170a94..5534594 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1440,6 +1440,7 @@ int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
>  	default:
>  		WARN_ON_ONCE(1);
>  		error = -EIO;
> +		goto unlock_entry;
>  	}
>  
>  	/* Filesystem should not return unwritten buffers to us! */
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 01/10] iomap: add IOMAP_F_NEW flag
  2016-09-09 16:34     ` Christoph Hellwig
@ 2016-09-13 22:43         ` Ross Zwisler
  -1 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 22:43 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Fri, Sep 09, 2016 at 06:34:35PM +0200, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>

Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>

Though I guess I've never seen a patch with a completely empty changelog
before?  :)   Maybe it's okay for really easy patches?  I just thought I'd get
shouted down for doing it.

> ---
>  fs/xfs/xfs_iomap.c    | 2 ++
>  include/linux/iomap.h | 1 +
>  2 files changed, 3 insertions(+)
> 
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index f96c8ff..5d06a2d 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -635,6 +635,7 @@ retry:
>  	if (end_fsb != orig_end_fsb)
>  		xfs_inode_set_eofblocks_tag(ip);
>  
> +	iomap->flags = IOMAP_F_NEW;
>  	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
>  done:
>  	if (isnullstartblock(got.br_startblock))
> @@ -1000,6 +1001,7 @@ xfs_file_iomap_begin(
>  		if (error)
>  			return error;
>  
> +		iomap->flags = IOMAP_F_NEW;
>  		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
>  	} else {
>  		ASSERT(nimaps);
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 3d70ece..14d7067 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -22,6 +22,7 @@ struct vm_fault;
>   * Flags for iomap mappings:
>   */
>  #define IOMAP_F_MERGED	0x01	/* contains multiple blocks/extents */
> +#define IOMAP_F_NEW	0x02	/* blocks have been newly allocated */
>  
>  /*
>   * Magic value for blkno:
> -- 
> 2.1.4
> 
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 01/10] iomap: add IOMAP_F_NEW flag
@ 2016-09-13 22:43         ` Ross Zwisler
  0 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 22:43 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm

On Fri, Sep 09, 2016 at 06:34:35PM +0200, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>

Though I guess I've never seen a patch with a completely empty changelog
before?  :)   Maybe it's okay for really easy patches?  I just thought I'd get
shouted down for doing it.

> ---
>  fs/xfs/xfs_iomap.c    | 2 ++
>  include/linux/iomap.h | 1 +
>  2 files changed, 3 insertions(+)
> 
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index f96c8ff..5d06a2d 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -635,6 +635,7 @@ retry:
>  	if (end_fsb != orig_end_fsb)
>  		xfs_inode_set_eofblocks_tag(ip);
>  
> +	iomap->flags = IOMAP_F_NEW;
>  	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
>  done:
>  	if (isnullstartblock(got.br_startblock))
> @@ -1000,6 +1001,7 @@ xfs_file_iomap_begin(
>  		if (error)
>  			return error;
>  
> +		iomap->flags = IOMAP_F_NEW;
>  		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
>  	} else {
>  		ASSERT(nimaps);
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 3d70ece..14d7067 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -22,6 +22,7 @@ struct vm_fault;
>   * Flags for iomap mappings:
>   */
>  #define IOMAP_F_MERGED	0x01	/* contains multiple blocks/extents */
> +#define IOMAP_F_NEW	0x02	/* blocks have been newly allocated */
>  
>  /*
>   * Magic value for blkno:
> -- 
> 2.1.4
> 
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 02/10] iomap: expose iomap_apply outside iomap.c
  2016-09-09 16:34     ` Christoph Hellwig
@ 2016-09-13 22:48         ` Ross Zwisler
  -1 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 22:48 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Fri, Sep 09, 2016 at 06:34:36PM +0200, Christoph Hellwig wrote:
> This allows the DAX code to use it.
> 
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>

Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>

> ---
>  fs/internal.h | 11 +++++++++++
>  fs/iomap.c    |  5 +----
>  2 files changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/internal.h b/fs/internal.h
> index ba07376..8591786 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -12,6 +12,7 @@
>  struct super_block;
>  struct file_system_type;
>  struct iomap;
> +struct iomap_ops;
>  struct linux_binprm;
>  struct path;
>  struct mount;
> @@ -164,3 +165,13 @@ extern struct dentry_operations ns_dentry_operations;
>  extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
>  		    unsigned long arg);
>  extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
> +
> +/*
> + * iomap support:
> + */
> +typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
> +		void *data, struct iomap *iomap);
> +
> +loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
> +		unsigned flags, struct iomap_ops *ops, void *data,
> +		iomap_actor_t actor);
> diff --git a/fs/iomap.c b/fs/iomap.c
> index 706270f..f4df9c6 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -27,9 +27,6 @@
>  #include <linux/dax.h>
>  #include "internal.h"
>  
> -typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
> -		void *data, struct iomap *iomap);
> -
>  /*
>   * Execute a iomap write on a segment of the mapping that spans a
>   * contiguous range of pages that have identical block mapping state.
> @@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
>   * resources they require in the iomap_begin call, and release them in the
>   * iomap_end call.
>   */
> -static loff_t
> +loff_t
>  iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
>  		struct iomap_ops *ops, void *data, iomap_actor_t actor)
>  {
> -- 
> 2.1.4
> 
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 02/10] iomap: expose iomap_apply outside iomap.c
@ 2016-09-13 22:48         ` Ross Zwisler
  0 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 22:48 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm

On Fri, Sep 09, 2016 at 06:34:36PM +0200, Christoph Hellwig wrote:
> This allows the DAX code to use it.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>

> ---
>  fs/internal.h | 11 +++++++++++
>  fs/iomap.c    |  5 +----
>  2 files changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/internal.h b/fs/internal.h
> index ba07376..8591786 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -12,6 +12,7 @@
>  struct super_block;
>  struct file_system_type;
>  struct iomap;
> +struct iomap_ops;
>  struct linux_binprm;
>  struct path;
>  struct mount;
> @@ -164,3 +165,13 @@ extern struct dentry_operations ns_dentry_operations;
>  extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
>  		    unsigned long arg);
>  extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
> +
> +/*
> + * iomap support:
> + */
> +typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
> +		void *data, struct iomap *iomap);
> +
> +loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
> +		unsigned flags, struct iomap_ops *ops, void *data,
> +		iomap_actor_t actor);
> diff --git a/fs/iomap.c b/fs/iomap.c
> index 706270f..f4df9c6 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -27,9 +27,6 @@
>  #include <linux/dax.h>
>  #include "internal.h"
>  
> -typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
> -		void *data, struct iomap *iomap);
> -
>  /*
>   * Execute a iomap write on a segment of the mapping that spans a
>   * contiguous range of pages that have identical block mapping state.
> @@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
>   * resources they require in the iomap_begin call, and release them in the
>   * iomap_end call.
>   */
> -static loff_t
> +loff_t
>  iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
>  		struct iomap_ops *ops, void *data, iomap_actor_t actor)
>  {
> -- 
> 2.1.4
> 
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 03/10] dax: don't pass buffer_head to dax_insert_mapping
  2016-09-09 16:34     ` Christoph Hellwig
@ 2016-09-13 22:53         ` Ross Zwisler
  -1 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 22:53 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Fri, Sep 09, 2016 at 06:34:37PM +0200, Christoph Hellwig wrote:
> This way we can use this helper for the iomap based DAX implementation
> as well.
> 
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>

Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>

> ---
>  fs/dax.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 993dc6f..98463bb 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -790,14 +790,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
>  EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
>  
>  static int dax_insert_mapping(struct address_space *mapping,
> -			struct buffer_head *bh, void **entryp,
> -			struct vm_area_struct *vma, struct vm_fault *vmf)
> +		struct block_device *bdev, sector_t sector, size_t size,
> +		void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf)
>  {
>  	unsigned long vaddr = (unsigned long)vmf->virtual_address;
> -	struct block_device *bdev = bh->b_bdev;
>  	struct blk_dax_ctl dax = {
> -		.sector = to_sector(bh, mapping->host),
> -		.size = bh->b_size,
> +		.sector = sector,
> +		.size = size,
>  	};
>  	void *ret;
>  	void *entry = *entryp;
> @@ -898,7 +897,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
>  
>  	/* Filesystem should not return unwritten buffers to us! */
>  	WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
> -	error = dax_insert_mapping(mapping, &bh, &entry, vma, vmf);
> +	error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
> +			bh.b_size, &entry, vma, vmf);
>   unlock_entry:
>  	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
>   out:
> -- 
> 2.1.4
> 
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 03/10] dax: don't pass buffer_head to dax_insert_mapping
@ 2016-09-13 22:53         ` Ross Zwisler
  0 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 22:53 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm

On Fri, Sep 09, 2016 at 06:34:37PM +0200, Christoph Hellwig wrote:
> This way we can use this helper for the iomap based DAX implementation
> as well.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>

> ---
>  fs/dax.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 993dc6f..98463bb 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -790,14 +790,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
>  EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
>  
>  static int dax_insert_mapping(struct address_space *mapping,
> -			struct buffer_head *bh, void **entryp,
> -			struct vm_area_struct *vma, struct vm_fault *vmf)
> +		struct block_device *bdev, sector_t sector, size_t size,
> +		void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf)
>  {
>  	unsigned long vaddr = (unsigned long)vmf->virtual_address;
> -	struct block_device *bdev = bh->b_bdev;
>  	struct blk_dax_ctl dax = {
> -		.sector = to_sector(bh, mapping->host),
> -		.size = bh->b_size,
> +		.sector = sector,
> +		.size = size,
>  	};
>  	void *ret;
>  	void *entry = *entryp;
> @@ -898,7 +897,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
>  
>  	/* Filesystem should not return unwritten buffers to us! */
>  	WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
> -	error = dax_insert_mapping(mapping, &bh, &entry, vma, vmf);
> +	error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
> +			bh.b_size, &entry, vma, vmf);
>   unlock_entry:
>  	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
>   out:
> -- 
> 2.1.4
> 
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 04/10] dax: don't pass buffer_head to copy_user_dax
  2016-09-09 16:34     ` Christoph Hellwig
  (?)
@ 2016-09-13 22:54     ` Ross Zwisler
  -1 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 22:54 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm

On Fri, Sep 09, 2016 at 06:34:38PM +0200, Christoph Hellwig wrote:
> This way we can use this helper for the iomap based DAX implementation
> as well.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>

> ---
>  fs/dax.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 98463bb..84343ce 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -580,14 +580,13 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
>  	return VM_FAULT_LOCKED;
>  }
>  
> -static int copy_user_bh(struct page *to, struct inode *inode,
> -		struct buffer_head *bh, unsigned long vaddr)
> +static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
> +		struct page *to, unsigned long vaddr)
>  {
>  	struct blk_dax_ctl dax = {
> -		.sector = to_sector(bh, inode),
> -		.size = bh->b_size,
> +		.sector = sector,
> +		.size = size,
>  	};
> -	struct block_device *bdev = bh->b_bdev;
>  	void *vto;
>  
>  	if (dax_map_atomic(bdev, &dax) < 0)
> @@ -867,7 +866,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
>  	if (vmf->cow_page) {
>  		struct page *new_page = vmf->cow_page;
>  		if (buffer_written(&bh))
> -			error = copy_user_bh(new_page, inode, &bh, vaddr);
> +			error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode),
> +					bh.b_size, new_page, vaddr);
>  		else
>  			clear_user_highpage(new_page, vaddr);
>  		if (error)
> -- 
> 2.1.4
> 
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 05/10] dax: provide an iomap based dax read/write path
  2016-09-09 16:34     ` Christoph Hellwig
@ 2016-09-13 23:00         ` Ross Zwisler
  -1 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 23:00 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Fri, Sep 09, 2016 at 06:34:39PM +0200, Christoph Hellwig wrote:
> This is a much simpler implementation of the DAX read/write path that makes
> use of the iomap infrastructure.  It does not try to mirror the direct I/O
> calling conventions and thus doesn't have to deal with i_dio_count or the
> end_io handler, but instead leaves locking and filesystem-specific I/O
> completion to the caller.
> 
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
> ---
>  fs/dax.c              | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/iomap.h |   2 +
>  2 files changed, 105 insertions(+)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 84343ce..57ad456 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -31,6 +31,8 @@
>  #include <linux/vmstat.h>
>  #include <linux/pfn_t.h>
>  #include <linux/sizes.h>
> +#include <linux/iomap.h>
> +#include "internal.h"
>  
>  /*
>   * We use lowest available bit in exceptional entry for locking, other two
> @@ -1241,3 +1243,104 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
>  	return dax_zero_page_range(inode, from, length, get_block);
>  }
>  EXPORT_SYMBOL_GPL(dax_truncate_page);
> +
> +#ifdef CONFIG_FS_IOMAP
> +static loff_t
> +iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> +		struct iomap *iomap)
> +{
> +	struct iov_iter *iter = data;
> +	loff_t end = pos + length, done = 0;
> +	ssize_t ret = 0;
> +
> +	if (iov_iter_rw(iter) == READ) {
> +		end = min(end, i_size_read(inode));
> +		if (pos >= end)
> +			return 0;
> +
> +		if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
> +			return iov_iter_zero(min(length, end - pos), iter);
> +	}
> +
> +	if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
> +		return -EIO;
> +
> +	while (pos < end) {
> +		unsigned offset = pos & (PAGE_SIZE - 1);
> +		struct blk_dax_ctl dax = { 0 };
> +		ssize_t map_len;
> +
> +		dax.sector = iomap->blkno +
> +			(((pos & PAGE_MASK) - iomap->offset) >> 9);
> +		dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
> +		map_len = dax_map_atomic(iomap->bdev, &dax);
> +		if (map_len < 0) {
> +			ret = map_len;
> +			break;
> +		}
> +
> +		dax.addr += offset;
> +		map_len -= offset;
> +		if (map_len > end - pos)
> +			map_len = end - pos;
> +
> +		if (iov_iter_rw(iter) == WRITE)
> +			map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
> +		else
> +			map_len = copy_to_iter(dax.addr, map_len, iter);
> +		dax_unmap_atomic(iomap->bdev, &dax);
> +		if (map_len <= 0) {
> +			ret = map_len ? map_len : -EFAULT;
> +			break;
> +		}
> +
> +		pos += map_len;
> +		length -= map_len;
> +		done += map_len;
> +	}
> +
> +	return done ? done : ret;
> +}
> +
> +/**
> + * iomap_dax_rw - Perform I/O to a DAX file
> + * @iocb: The control block for this I/O
> + * @iter: The addresses to do I/O from or to
> + * @ops: iomap ops passed from the file system
> + *
> + * This funtions performs read and write operations to directly mapped

	   function

> + * persistent memory.  The callers needs to take care of read/write exclusion
> + * and evicting any page cache pages in the region under I/O.
> + */
> +ssize_t
> +iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
> +		struct iomap_ops *ops)
> +{
> +	struct inode *inode = iocb->ki_filp->f_mapping->host;
> +	loff_t pos = iocb->ki_pos, ret = 0, done = 0;

Just a note that 'ret' is loff_t about half the time in the iomap code and
ssize_t the other half.  I guess it doesn't really matter since they should
both be big unsigned values (64 bits on x96_64), but it's a bit inconsistent.

> +	size_t count = iov_iter_count(iter);
> +	unsigned flags = 0;
> +
> +	if (!count)
> +		return 0;
> +
> +	if (iov_iter_rw(iter) == WRITE)
> +		flags |= IOMAP_WRITE;
> +
> +	do {
> +		ret = iomap_apply(inode, pos, count, flags, ops, iter,
> +				  iomap_dax_actor);
> +		if (ret <= 0)
> +			break;
> +		pos += ret;
> +		done += ret;
> +	} while ((count = iov_iter_count(iter)));
> +
> +	if (!done)
> +		return ret;
> +
> +	iocb->ki_pos += done;
> +	return done;
> +}

I think you can remove the special casing around 'done' and 'count' and make
this a bit simpler:

ssize_t
iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
		struct iomap_ops *ops)
{
	struct inode *inode = iocb->ki_filp->f_mapping->host;
	loff_t pos = iocb->ki_pos, ret = 0, done = 0;
	unsigned flags = 0;
	size_t count;

	if (iov_iter_rw(iter) == WRITE)
		flags |= IOMAP_WRITE;

	 while ((count = iov_iter_count(iter))) {
		ret = iomap_apply(inode, pos, count, flags, ops, iter,
				  iomap_dax_actor);
		if (ret <= 0)
			break;
		pos += ret;
		done += ret;
	}

	iocb->ki_pos += done;
	return done ? done : ret;
}

This is now very similar to iomap_file_buffered_write().

> +EXPORT_SYMBOL_GPL(iomap_dax_rw);
> +#endif /* CONFIG_FS_IOMAP */
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 14d7067..3d5f785 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -65,6 +65,8 @@ struct iomap_ops {
>  
>  ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
>  		struct iomap_ops *ops);
> +ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
> +		struct iomap_ops *ops);
>  int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
>  		bool *did_zero, struct iomap_ops *ops);
>  int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
> -- 
> 2.1.4
> 
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 05/10] dax: provide an iomap based dax read/write path
@ 2016-09-13 23:00         ` Ross Zwisler
  0 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 23:00 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm

On Fri, Sep 09, 2016 at 06:34:39PM +0200, Christoph Hellwig wrote:
> This is a much simpler implementation of the DAX read/write path that makes
> use of the iomap infrastructure.  It does not try to mirror the direct I/O
> calling conventions and thus doesn't have to deal with i_dio_count or the
> end_io handler, but instead leaves locking and filesystem-specific I/O
> completion to the caller.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/dax.c              | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/iomap.h |   2 +
>  2 files changed, 105 insertions(+)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 84343ce..57ad456 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -31,6 +31,8 @@
>  #include <linux/vmstat.h>
>  #include <linux/pfn_t.h>
>  #include <linux/sizes.h>
> +#include <linux/iomap.h>
> +#include "internal.h"
>  
>  /*
>   * We use lowest available bit in exceptional entry for locking, other two
> @@ -1241,3 +1243,104 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
>  	return dax_zero_page_range(inode, from, length, get_block);
>  }
>  EXPORT_SYMBOL_GPL(dax_truncate_page);
> +
> +#ifdef CONFIG_FS_IOMAP
> +static loff_t
> +iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> +		struct iomap *iomap)
> +{
> +	struct iov_iter *iter = data;
> +	loff_t end = pos + length, done = 0;
> +	ssize_t ret = 0;
> +
> +	if (iov_iter_rw(iter) == READ) {
> +		end = min(end, i_size_read(inode));
> +		if (pos >= end)
> +			return 0;
> +
> +		if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
> +			return iov_iter_zero(min(length, end - pos), iter);
> +	}
> +
> +	if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
> +		return -EIO;
> +
> +	while (pos < end) {
> +		unsigned offset = pos & (PAGE_SIZE - 1);
> +		struct blk_dax_ctl dax = { 0 };
> +		ssize_t map_len;
> +
> +		dax.sector = iomap->blkno +
> +			(((pos & PAGE_MASK) - iomap->offset) >> 9);
> +		dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
> +		map_len = dax_map_atomic(iomap->bdev, &dax);
> +		if (map_len < 0) {
> +			ret = map_len;
> +			break;
> +		}
> +
> +		dax.addr += offset;
> +		map_len -= offset;
> +		if (map_len > end - pos)
> +			map_len = end - pos;
> +
> +		if (iov_iter_rw(iter) == WRITE)
> +			map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
> +		else
> +			map_len = copy_to_iter(dax.addr, map_len, iter);
> +		dax_unmap_atomic(iomap->bdev, &dax);
> +		if (map_len <= 0) {
> +			ret = map_len ? map_len : -EFAULT;
> +			break;
> +		}
> +
> +		pos += map_len;
> +		length -= map_len;
> +		done += map_len;
> +	}
> +
> +	return done ? done : ret;
> +}
> +
> +/**
> + * iomap_dax_rw - Perform I/O to a DAX file
> + * @iocb: The control block for this I/O
> + * @iter: The addresses to do I/O from or to
> + * @ops: iomap ops passed from the file system
> + *
> + * This funtions performs read and write operations to directly mapped

	   function

> + * persistent memory.  The callers needs to take care of read/write exclusion
> + * and evicting any page cache pages in the region under I/O.
> + */
> +ssize_t
> +iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
> +		struct iomap_ops *ops)
> +{
> +	struct inode *inode = iocb->ki_filp->f_mapping->host;
> +	loff_t pos = iocb->ki_pos, ret = 0, done = 0;

Just a note that 'ret' is loff_t about half the time in the iomap code and
ssize_t the other half.  I guess it doesn't really matter since they should
both be big unsigned values (64 bits on x96_64), but it's a bit inconsistent.

> +	size_t count = iov_iter_count(iter);
> +	unsigned flags = 0;
> +
> +	if (!count)
> +		return 0;
> +
> +	if (iov_iter_rw(iter) == WRITE)
> +		flags |= IOMAP_WRITE;
> +
> +	do {
> +		ret = iomap_apply(inode, pos, count, flags, ops, iter,
> +				  iomap_dax_actor);
> +		if (ret <= 0)
> +			break;
> +		pos += ret;
> +		done += ret;
> +	} while ((count = iov_iter_count(iter)));
> +
> +	if (!done)
> +		return ret;
> +
> +	iocb->ki_pos += done;
> +	return done;
> +}

I think you can remove the special casing around 'done' and 'count' and make
this a bit simpler:

ssize_t
iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
		struct iomap_ops *ops)
{
	struct inode *inode = iocb->ki_filp->f_mapping->host;
	loff_t pos = iocb->ki_pos, ret = 0, done = 0;
	unsigned flags = 0;
	size_t count;

	if (iov_iter_rw(iter) == WRITE)
		flags |= IOMAP_WRITE;

	 while ((count = iov_iter_count(iter))) {
		ret = iomap_apply(inode, pos, count, flags, ops, iter,
				  iomap_dax_actor);
		if (ret <= 0)
			break;
		pos += ret;
		done += ret;
	}

	iocb->ki_pos += done;
	return done ? done : ret;
}

This is now very similar to iomap_file_buffered_write().

> +EXPORT_SYMBOL_GPL(iomap_dax_rw);
> +#endif /* CONFIG_FS_IOMAP */
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 14d7067..3d5f785 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -65,6 +65,8 @@ struct iomap_ops {
>  
>  ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
>  		struct iomap_ops *ops);
> +ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
> +		struct iomap_ops *ops);
>  int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
>  		bool *did_zero, struct iomap_ops *ops);
>  int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
> -- 
> 2.1.4
> 
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-09 16:34     ` Christoph Hellwig
  (?)
  (?)
@ 2016-09-13 23:10     ` Ross Zwisler
       [not found]       ` <20160913231039.GF26002-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
  -1 siblings, 1 reply; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 23:10 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm

On Fri, Sep 09, 2016 at 06:34:40PM +0200, Christoph Hellwig wrote:
> Very similar to the existing dax_fault function, but instead of using
> the get_block callback we rely on the iomap_ops vector from iomap.c.
> That also avoids having to do two calls into the file system for write
> faults.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/dax.c              | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/iomap.h |   2 +
>  2 files changed, 115 insertions(+)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 57ad456..a170a94 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1343,4 +1343,117 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
>  	return done;
>  }
>  EXPORT_SYMBOL_GPL(iomap_dax_rw);
> +
> +/**
> + * iomap_dax_fault - handle a page fault on a DAX file
> + * @vma: The virtual memory area where the fault occurred
> + * @vmf: The description of the fault
> + * @ops: iomap ops passed from the file system
> + *
> + * When a page fault occurs, filesystems may call this helper in their fault
> + * or mkwrite handler for DAX files. Sssumes the caller has done all the

					Assumes

> + * necessary locking for the page fault to proceed successfully.
> + */
> +int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> +			struct iomap_ops *ops)
> +{
> +	struct address_space *mapping = vma->vm_file->f_mapping;
> +	struct inode *inode = mapping->host;
> +	unsigned long vaddr = (unsigned long)vmf->virtual_address;
> +	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
> +	sector_t sector;
> +	struct iomap iomap = { 0 };
> +	unsigned flags = 0;
> +	int error, major = 0;
> +	void *entry;
> +
> +	/*
> +	 * Check whether offset isn't beyond end of file now. Caller is supposed
> +	 * to hold locks serializing us with truncate / punch hole so this is
> +	 * a reliable test.
> +	 */
> +	if (pos >= i_size_read(inode))
> +		return VM_FAULT_SIGBUS;
> +
> +	entry = grab_mapping_entry(mapping, vmf->pgoff);
> +	if (IS_ERR(entry)) {
> +		error = PTR_ERR(entry);
> +		goto out;
> +	}
> +
> +	if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
> +		flags |= IOMAP_WRITE;
> +
> +	/*
> +	 * Note that we don't bother to use iomap_apply here: DAX required
> +	 * the file system block size to be equal the page size, which means
> +	 * that we never have to deal with more than a single extent here.
> +	 */
> +	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
> +	if (error)
> +		goto unlock_entry;
> +	if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
> +		error = -EIO;		/* fs corruption? */
> +		goto unlock_entry;
> +	}
> +
> +	sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9);
> +
> +	if (vmf->cow_page) {
> +		switch (iomap.type) {
> +		case IOMAP_HOLE:
> +		case IOMAP_UNWRITTEN:
> +			clear_user_highpage(vmf->cow_page, vaddr);
> +			break;
> +		case IOMAP_MAPPED:
> +			error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
> +					vmf->cow_page, vaddr);
> +			break;
> +		default:
> +			WARN_ON_ONCE(1);
> +			error = -EIO;
> +			break;
> +		}
> +
> +		if (error)
> +			goto unlock_entry;
> +		if (!radix_tree_exceptional_entry(entry)) {
> +			vmf->page = entry;
> +			return VM_FAULT_LOCKED;
> +		}
> +		vmf->entry = entry;
> +		return VM_FAULT_DAX_LOCKED;
> +	}
> +
> +	switch (iomap.type) {
> +	case IOMAP_MAPPED:
> +		if (iomap.flags & IOMAP_F_NEW) {
> +			count_vm_event(PGMAJFAULT);
> +			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
> +			major = VM_FAULT_MAJOR;
> +		}
> +		break;
> +	case IOMAP_UNWRITTEN:
> +	case IOMAP_HOLE:
> +		if (!(vmf->flags & FAULT_FLAG_WRITE))
> +			return dax_load_hole(mapping, entry, vmf);
> +	default:
> +		WARN_ON_ONCE(1);
> +		error = -EIO;
> +	}
> +
> +	/* Filesystem should not return unwritten buffers to us! */

This comment is above a WARN_ON_ONCE() that checks for unwritten buffers in
dax_fault(), but that test isn't present in this code nor do we disallow
unwritten buffers (apparently, based on the IOMAP_UNWRITTEN handling above).
This comment should probably be removed.

> +	error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE,
> +			&entry, vma, vmf);
> +unlock_entry:

If you stick a space in front of the labels (as is done in the rest of dax.c)
it prevents future patches from using them at the beginning of hunks.  Here's a
patch adding a random space as an example:

@@ -908,6 +908,7 @@ out:
                return VM_FAULT_OOM | major;
        /* -EBUSY is fine, somebody else faulted on the same PTE */
        if ((error < 0) && (error != -EBUSY))
                return VM_FAULT_SIGBUS | major;
+
        return VM_FAULT_NOPAGE | major;
 }

vs

@@ -908,6 +908,7 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                return VM_FAULT_OOM | major;
        /* -EBUSY is fine, somebody else faulted on the same PTE */
        if ((error < 0) && (error != -EBUSY))
                return VM_FAULT_SIGBUS | major;
+
        return VM_FAULT_NOPAGE | major;
 }

where 'out' is a label without a leading space in the first case and with a
leading space in the second.

With these few nits addressed:

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>

> +	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
> +out:
> +	if (error == -ENOMEM)
> +		return VM_FAULT_OOM | major;
> +	/* -EBUSY is fine, somebody else faulted on the same PTE */
> +	if (error < 0 && error != -EBUSY)
> +		return VM_FAULT_SIGBUS | major;
> +	return VM_FAULT_NOPAGE | major;
> +}
> +EXPORT_SYMBOL_GPL(iomap_dax_fault);
>  #endif /* CONFIG_FS_IOMAP */
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 3d5f785..a4ef953 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -73,6 +73,8 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
>  		struct iomap_ops *ops);
>  int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
>  		struct iomap_ops *ops);
> +int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> +			struct iomap_ops *ops);
>  int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
>  		loff_t start, loff_t len, struct iomap_ops *ops);
>  
> -- 
> 2.1.4
> 
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 09/10] xfs: refactor xfs_setfilesize
  2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
@ 2016-09-13 23:12       ` Ross Zwisler
  0 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 23:12 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Fri, Sep 09, 2016 at 06:34:43PM +0200, Christoph Hellwig wrote:
> Rename the current function to __xfs_setfilesize and add a non-static
> wrapper that also takes care of creating the transaction.  This new
> helper will be used by the new iomap-based DAX path.
> 
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
> ---
>  fs/xfs/xfs_aops.c | 33 ++++++++++++++++++++++-----------
>  fs/xfs/xfs_aops.h |  1 +
>  2 files changed, 23 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 7575cfc..0bd3e27 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -199,8 +199,8 @@ xfs_setfilesize_trans_alloc(
>  /*
>   * Update on-disk file size now that data has been written to disk.
>   */
> -STATIC int

This can remain STATIC, as it is only used in this file.

> -xfs_setfilesize(
> +int
> +__xfs_setfilesize(
>  	struct xfs_inode	*ip,
>  	struct xfs_trans	*tp,
>  	xfs_off_t		offset,

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 09/10] xfs: refactor xfs_setfilesize
@ 2016-09-13 23:12       ` Ross Zwisler
  0 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 23:12 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm

On Fri, Sep 09, 2016 at 06:34:43PM +0200, Christoph Hellwig wrote:
> Rename the current function to __xfs_setfilesize and add a non-static
> wrapper that also takes care of creating the transaction.  This new
> helper will be used by the new iomap-based DAX path.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_aops.c | 33 ++++++++++++++++++++++-----------
>  fs/xfs/xfs_aops.h |  1 +
>  2 files changed, 23 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 7575cfc..0bd3e27 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -199,8 +199,8 @@ xfs_setfilesize_trans_alloc(
>  /*
>   * Update on-disk file size now that data has been written to disk.
>   */
> -STATIC int

This can remain STATIC, as it is only used in this file.

> -xfs_setfilesize(
> +int
> +__xfs_setfilesize(
>  	struct xfs_inode	*ip,
>  	struct xfs_trans	*tp,
>  	xfs_off_t		offset,

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-13 15:51           ` Ross Zwisler
@ 2016-09-14  7:06                 ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-14  7:06 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig, Dave Chinner,
	linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Tue, Sep 13, 2016 at 09:51:26AM -0600, Ross Zwisler wrote:
> I'm working on this right now.  I expect that most/all of the infrastructure
> between the bh+get_block_t version and the iomap version to be shared, it'll
> just be a matter of having a PMD version of the iomap fault handler.  This
> should be pretty minor.

Yes, I looked at it (although I didn't do any work yet), and the work
should be fairly easy.

> Let's see how it goes, but right now my plan is to have both - I'd like to
> keep feature parity between ext2/ext4 and XFS, and that means having PMD
> faults in ext4 via bh+get_block_t until they move over to iomap.
> 
> Regarding coordination, the PMD v2 series hasn't gotten much review so far, so
> I'm not sure it'll go in for v4.9.  At this point I'm planning on just
> rebasing on top of your iomap series, though if it gets taken sooner I
> wouldn't object.

So let's do iomap first.  I've got stable ext2 support, as well as support
for the block device, although I'm not sure what the proper testing
protocol for that is.  I've started ext4 and read / zero was easy, but
now I'm stuck in the convoluted mess that is the ext4 direct I/O and
DAX path.

Maybe we should get the iomap work into 4.9 and then convert over ext4
as well as adding PMD fault support in the next release.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-14  7:06                 ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-14  7:06 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig, Dave Chinner, linux-xfs,
	linux-fsdevel, linux-nvdimm

On Tue, Sep 13, 2016 at 09:51:26AM -0600, Ross Zwisler wrote:
> I'm working on this right now.  I expect that most/all of the infrastructure
> between the bh+get_block_t version and the iomap version to be shared, it'll
> just be a matter of having a PMD version of the iomap fault handler.  This
> should be pretty minor.

Yes, I looked at it (although I didn't do any work yet), and the work
should be fairly easy.

> Let's see how it goes, but right now my plan is to have both - I'd like to
> keep feature parity between ext2/ext4 and XFS, and that means having PMD
> faults in ext4 via bh+get_block_t until they move over to iomap.
> 
> Regarding coordination, the PMD v2 series hasn't gotten much review so far, so
> I'm not sure it'll go in for v4.9.  At this point I'm planning on just
> rebasing on top of your iomap series, though if it gets taken sooner I
> wouldn't object.

So let's do iomap first.  I've got stable ext2 support, as well as support
for the block device, although I'm not sure what the proper testing
protocol for that is.  I've started ext4 and read / zero was easy, but
now I'm stuck in the convoluted mess that is the ext4 direct I/O and
DAX path.

Maybe we should get the iomap work into 4.9 and then convert over ext4
as well as adding PMD fault support in the next release.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 01/10] iomap: add IOMAP_F_NEW flag
  2016-09-13 22:43         ` Ross Zwisler
  (?)
@ 2016-09-14  7:08         ` Christoph Hellwig
  -1 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-14  7:08 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig, linux-xfs, linux-fsdevel, linux-nvdimm

On Tue, Sep 13, 2016 at 04:43:57PM -0600, Ross Zwisler wrote:
> On Fri, Sep 09, 2016 at 06:34:35PM +0200, Christoph Hellwig wrote:
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> 
> Though I guess I've never seen a patch with a completely empty changelog
> before?  :)   Maybe it's okay for really easy patches?  I just thought I'd get
> shouted down for doing it.

I think it's ok and others do it a lot too (e.g. Al Viro).  But I've been
shouted at for it from others nevertheless..

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-13 23:10     ` Ross Zwisler
@ 2016-09-14  7:19           ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-14  7:19 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig,
	linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Tue, Sep 13, 2016 at 05:10:39PM -0600, Ross Zwisler wrote:
> If you stick a space in front of the labels (as is done in the rest of dax.c)
> it prevents future patches from using them at the beginning of hunks.  Here's a
> patch adding a random space as an example:

The spaces in front of labels are a fairly horrible style.  But given
that the rest of the file uses them I'll add them back.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-14  7:19           ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-14  7:19 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig, linux-xfs, linux-fsdevel, linux-nvdimm

On Tue, Sep 13, 2016 at 05:10:39PM -0600, Ross Zwisler wrote:
> If you stick a space in front of the labels (as is done in the rest of dax.c)
> it prevents future patches from using them at the beginning of hunks.  Here's a
> patch adding a random space as an example:

The spaces in front of labels are a fairly horrible style.  But given
that the rest of the file uses them I'll add them back.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-14  7:06                 ` Christoph Hellwig
@ 2016-09-14  9:53                     ` Christoph Hellwig
  -1 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-14  9:53 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig, Dave Chinner,
	linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Wed, Sep 14, 2016 at 09:06:33AM +0200, Christoph Hellwig wrote:
> So let's do iomap first.  I've got stable ext2 support, as well as support
> for the block device, although I'm not sure what the proper testing
> protocol for that is.

Well, turns out DAX on block devices actually is dead despite a few
leftovers and there is no way to actually test it.  That was easy :)

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-14  9:53                     ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-14  9:53 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig, Dave Chinner, linux-xfs,
	linux-fsdevel, linux-nvdimm

On Wed, Sep 14, 2016 at 09:06:33AM +0200, Christoph Hellwig wrote:
> So let's do iomap first.  I've got stable ext2 support, as well as support
> for the block device, although I'm not sure what the proper testing
> protocol for that is.

Well, turns out DAX on block devices actually is dead despite a few
leftovers and there is no way to actually test it.  That was easy :)

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-14  7:19           ` Christoph Hellwig
@ 2016-09-14 17:07               ` Ross Zwisler
  -1 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-14 17:07 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Wed, Sep 14, 2016 at 09:19:10AM +0200, Christoph Hellwig wrote:
> On Tue, Sep 13, 2016 at 05:10:39PM -0600, Ross Zwisler wrote:
> > If you stick a space in front of the labels (as is done in the rest of dax.c)
> > it prevents future patches from using them at the beginning of hunks.  Here's a
> > patch adding a random space as an example:
> 
> The spaces in front of labels are a fairly horrible style.  But given
> that the rest of the file uses them I'll add them back.

I'll bite - why do you think adding a space before labels is a "fairly
horrible style"?  Adding them gives a tangible benefit for unified diffs and
patches because it's much more useful to know that a change is in a given
function than that it follows a label called "out", which could be defined
many times in a given file.  Again, the example:

@@ -908,6 +908,7 @@ out:
                return VM_FAULT_OOM | major;
        /* -EBUSY is fine, somebody else faulted on the same PTE */
        if ((error < 0) && (error != -EBUSY))
                return VM_FAULT_SIGBUS | major;
+
        return VM_FAULT_NOPAGE | major;
 }

vs

@@ -908,6 +908,7 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                return VM_FAULT_OOM | major;
        /* -EBUSY is fine, somebody else faulted on the same PTE */
        if ((error < 0) && (error != -EBUSY))
                return VM_FAULT_SIGBUS | major;
+
        return VM_FAULT_NOPAGE | major;
 }

where 'out' is a label without a leading space in the first case and with a
leading space in the second.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-14 17:07               ` Ross Zwisler
  0 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-14 17:07 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Ross Zwisler, linux-xfs, linux-fsdevel, linux-nvdimm

On Wed, Sep 14, 2016 at 09:19:10AM +0200, Christoph Hellwig wrote:
> On Tue, Sep 13, 2016 at 05:10:39PM -0600, Ross Zwisler wrote:
> > If you stick a space in front of the labels (as is done in the rest of dax.c)
> > it prevents future patches from using them at the beginning of hunks.  Here's a
> > patch adding a random space as an example:
> 
> The spaces in front of labels are a fairly horrible style.  But given
> that the rest of the file uses them I'll add them back.

I'll bite - why do you think adding a space before labels is a "fairly
horrible style"?  Adding them gives a tangible benefit for unified diffs and
patches because it's much more useful to know that a change is in a given
function than that it follows a label called "out", which could be defined
many times in a given file.  Again, the example:

@@ -908,6 +908,7 @@ out:
                return VM_FAULT_OOM | major;
        /* -EBUSY is fine, somebody else faulted on the same PTE */
        if ((error < 0) && (error != -EBUSY))
                return VM_FAULT_SIGBUS | major;
+
        return VM_FAULT_NOPAGE | major;
 }

vs

@@ -908,6 +908,7 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                return VM_FAULT_OOM | major;
        /* -EBUSY is fine, somebody else faulted on the same PTE */
        if ((error < 0) && (error != -EBUSY))
                return VM_FAULT_SIGBUS | major;
+
        return VM_FAULT_NOPAGE | major;
 }

where 'out' is a label without a leading space in the first case and with a
leading space in the second.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-14 17:07               ` Ross Zwisler
@ 2016-09-15  5:12                   ` Christoph Hellwig
  -1 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-15  5:12 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig,
	linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Wed, Sep 14, 2016 at 11:07:59AM -0600, Ross Zwisler wrote:
> I'll bite - why do you think adding a space before labels is a "fairly
> horrible style"?  Adding them gives a tangible benefit for unified diffs and
> patches because it's much more useful to know that a change is in a given
> function than that it follows a label called "out", which could be defined
> many times in a given file.  Again, the example:

It's a space based indent in a tab based world.  And with git you can
get your enhanced diff output with just a small tweak to the git setting.
Which we're incidentally about to finally get for the kernel tree.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-15  5:12                   ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-15  5:12 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig, linux-xfs, linux-fsdevel, linux-nvdimm

On Wed, Sep 14, 2016 at 11:07:59AM -0600, Ross Zwisler wrote:
> I'll bite - why do you think adding a space before labels is a "fairly
> horrible style"?  Adding them gives a tangible benefit for unified diffs and
> patches because it's much more useful to know that a change is in a given
> function than that it follows a label called "out", which could be defined
> many times in a given file.  Again, the example:

It's a space based indent in a tab based world.  And with git you can
get your enhanced diff output with just a small tweak to the git setting.
Which we're incidentally about to finally get for the kernel tree.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-15  5:12                   ` Christoph Hellwig
@ 2016-09-15  5:30                       ` Darrick J. Wong
  -1 siblings, 0 replies; 58+ messages in thread
From: Darrick J. Wong @ 2016-09-15  5:30 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Thu, Sep 15, 2016 at 07:12:29AM +0200, Christoph Hellwig wrote:
> On Wed, Sep 14, 2016 at 11:07:59AM -0600, Ross Zwisler wrote:
> > I'll bite - why do you think adding a space before labels is a "fairly
> > horrible style"?  Adding them gives a tangible benefit for unified diffs and
> > patches because it's much more useful to know that a change is in a given
> > function than that it follows a label called "out", which could be defined
> > many times in a given file.  Again, the example:
> 
> It's a space based indent in a tab based world.  And with git you can
> get your enhanced diff output with just a small tweak to the git setting.
> Which we're incidentally about to finally get for the kernel tree.

What setting /is/ that, by the way?

--D

> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-15  5:30                       ` Darrick J. Wong
  0 siblings, 0 replies; 58+ messages in thread
From: Darrick J. Wong @ 2016-09-15  5:30 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Ross Zwisler, linux-xfs, linux-fsdevel, linux-nvdimm

On Thu, Sep 15, 2016 at 07:12:29AM +0200, Christoph Hellwig wrote:
> On Wed, Sep 14, 2016 at 11:07:59AM -0600, Ross Zwisler wrote:
> > I'll bite - why do you think adding a space before labels is a "fairly
> > horrible style"?  Adding them gives a tangible benefit for unified diffs and
> > patches because it's much more useful to know that a change is in a given
> > function than that it follows a label called "out", which could be defined
> > many times in a given file.  Again, the example:
> 
> It's a space based indent in a tab based world.  And with git you can
> get your enhanced diff output with just a small tweak to the git setting.
> Which we're incidentally about to finally get for the kernel tree.

What setting /is/ that, by the way?

--D

> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-14  7:06                 ` Christoph Hellwig
@ 2016-09-23 21:02                     ` Ross Zwisler
  -1 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-23 21:02 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Dave Chinner,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Wed, Sep 14, 2016 at 09:06:33AM +0200, Christoph Hellwig wrote:
> On Tue, Sep 13, 2016 at 09:51:26AM -0600, Ross Zwisler wrote:
> > I'm working on this right now.  I expect that most/all of the infrastructure
> > between the bh+get_block_t version and the iomap version to be shared, it'll
> > just be a matter of having a PMD version of the iomap fault handler.  This
> > should be pretty minor.
> 
> Yes, I looked at it (although I didn't do any work yet), and the work
> should be fairly easy.
> 
> > Let's see how it goes, but right now my plan is to have both - I'd like to
> > keep feature parity between ext2/ext4 and XFS, and that means having PMD
> > faults in ext4 via bh+get_block_t until they move over to iomap.
> > 
> > Regarding coordination, the PMD v2 series hasn't gotten much review so far, so
> > I'm not sure it'll go in for v4.9.  At this point I'm planning on just
> > rebasing on top of your iomap series, though if it gets taken sooner I
> > wouldn't object.
> 
> So let's do iomap first.  I've got stable ext2 support, as well as support
> for the block device, although I'm not sure what the proper testing
> protocol for that is.  I've started ext4 and read / zero was easy, but
> now I'm stuck in the convoluted mess that is the ext4 direct I/O and
> DAX path.
> 
> Maybe we should get the iomap work into 4.9 and then convert over ext4
> as well as adding PMD fault support in the next release.

I was doing some testing of my PMD patches, and was surprised to see that
ext4 + PMDs + generic/074 now takes 23 minutes to complete.  With ext4 and
PTEs faults this test takes ~50 seconds in the same setup, and XFS + PMDs
takes 27 seconds.

The root cause is that ext4 is using the direct I/O path for reads and writes,
and that the DIO path thinks it needs to flush dirty data from the radix tree
on each I/O.

Each read ends up writing back PMDs via:

  vfs_read()
    __vfs_read()
      new_sync_read
        generic_file_read_iter()
          filemap_write_and_wait_range()

I believe we have an analogous problem for writes.

This results in us flushing 12 TiB worth of data during the generic/074 test,
one cache line at a time...

With your recent changes to detangle the DAX and DIO faults paths in XFS, we
avoid this because XFS no longer uses generic_file_read_iter(), but instead
uses xfs_file_write_iter() which skips the writeback and instead just calls
xfs_file_dax_write() to do the I/O.

This is obviously an existing issue in ext4 that we need to address.  Even
with the PTE path we are doing tons of unnecessary flushing, but the move from
flushing PTEs to flushing PMDs is what killed us.

I can just add a hack to hop over the writeback in generic_file_read_iter(),
but I hesitate to do this because it seems like the correct thing to do is to
separate the ext4 DAX & DIO paths, which I think you are already doing.

I believe that my DAX PMD patches are ready to go, but because of this issue
they currently only support XFS.  I'm tempted to send them out as they are
right now since they add a bunch of complexity to DAX that we need to review,
and that review can fully happen with only XFS support. We can add ext4
support back in later when it's ready.

Thoughts?

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-23 21:02                     ` Ross Zwisler
  0 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-23 21:02 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Ross Zwisler, Dave Chinner, linux-xfs, linux-fsdevel, linux-nvdimm

On Wed, Sep 14, 2016 at 09:06:33AM +0200, Christoph Hellwig wrote:
> On Tue, Sep 13, 2016 at 09:51:26AM -0600, Ross Zwisler wrote:
> > I'm working on this right now.  I expect that most/all of the infrastructure
> > between the bh+get_block_t version and the iomap version to be shared, it'll
> > just be a matter of having a PMD version of the iomap fault handler.  This
> > should be pretty minor.
> 
> Yes, I looked at it (although I didn't do any work yet), and the work
> should be fairly easy.
> 
> > Let's see how it goes, but right now my plan is to have both - I'd like to
> > keep feature parity between ext2/ext4 and XFS, and that means having PMD
> > faults in ext4 via bh+get_block_t until they move over to iomap.
> > 
> > Regarding coordination, the PMD v2 series hasn't gotten much review so far, so
> > I'm not sure it'll go in for v4.9.  At this point I'm planning on just
> > rebasing on top of your iomap series, though if it gets taken sooner I
> > wouldn't object.
> 
> So let's do iomap first.  I've got stable ext2 support, as well as support
> for the block device, although I'm not sure what the proper testing
> protocol for that is.  I've started ext4 and read / zero was easy, but
> now I'm stuck in the convoluted mess that is the ext4 direct I/O and
> DAX path.
> 
> Maybe we should get the iomap work into 4.9 and then convert over ext4
> as well as adding PMD fault support in the next release.

I was doing some testing of my PMD patches, and was surprised to see that
ext4 + PMDs + generic/074 now takes 23 minutes to complete.  With ext4 and
PTEs faults this test takes ~50 seconds in the same setup, and XFS + PMDs
takes 27 seconds.

The root cause is that ext4 is using the direct I/O path for reads and writes,
and that the DIO path thinks it needs to flush dirty data from the radix tree
on each I/O.

Each read ends up writing back PMDs via:

  vfs_read()
    __vfs_read()
      new_sync_read
        generic_file_read_iter()
          filemap_write_and_wait_range()

I believe we have an analogous problem for writes.

This results in us flushing 12 TiB worth of data during the generic/074 test,
one cache line at a time...

With your recent changes to detangle the DAX and DIO faults paths in XFS, we
avoid this because XFS no longer uses generic_file_read_iter(), but instead
uses xfs_file_write_iter() which skips the writeback and instead just calls
xfs_file_dax_write() to do the I/O.

This is obviously an existing issue in ext4 that we need to address.  Even
with the PTE path we are doing tons of unnecessary flushing, but the move from
flushing PTEs to flushing PMDs is what killed us.

I can just add a hack to hop over the writeback in generic_file_read_iter(),
but I hesitate to do this because it seems like the correct thing to do is to
separate the ext4 DAX & DIO paths, which I think you are already doing.

I believe that my DAX PMD patches are ready to go, but because of this issue
they currently only support XFS.  I'm tempted to send them out as they are
right now since they add a bunch of complexity to DAX that we need to review,
and that review can fully happen with only XFS support. We can add ext4
support back in later when it's ready.

Thoughts?

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-14 17:07               ` Ross Zwisler
  (?)
  (?)
@ 2016-09-26  0:05               ` Christoph Hellwig
  -1 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-26  0:05 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig, linux-xfs, linux-fsdevel, linux-nvdimm

On Wed, Sep 14, 2016 at 11:07:59AM -0600, Ross Zwisler wrote:
> I'll bite - why do you think adding a space before labels is a "fairly
> horrible style"?

It introduces single whitespace indentation in a source base that is
all about tabs and uses whitespaces very sparingly (some people use
it to align function arguments, while others also consistently use tabs).

It's also a bit annoying when touching these error label chains for any
sort of major change.

> Adding them gives a tangible benefit for unified diffs and
> patches because it's much more useful to know that a change is in a given
> function than that it follows a label called "out", which could be defined
> many times in a given file.  Again, the example:

Yes, but this is just diff beeing stupid.  There is no reason why diff
couldn't handle this properly even without the space, and in fact at
least git diff can be tought easily to do just that.

There are a few threads about this label style going on on lkml in
the last week, and one of them mentions the git diff tweak.  Try searching
for "Clarify and complete chapter 7".

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-23 21:02                     ` Ross Zwisler
@ 2016-09-26  0:08                         ` Christoph Hellwig
  -1 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-26  0:08 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig, Dave Chinner,
	linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Fri, Sep 23, 2016 at 03:02:37PM -0600, Ross Zwisler wrote:
> I can just add a hack to hop over the writeback in generic_file_read_iter(),
> but I hesitate to do this because it seems like the correct thing to do is to
> separate the ext4 DAX & DIO paths, which I think you are already doing.

I've started on it, and the read path works fine.  I'm a bit stuck on
the write side as my attempts at it seems to corrupt data fairly easily
when running xfstests.

> I believe that my DAX PMD patches are ready to go, but because of this issue
> they currently only support XFS.  I'm tempted to send them out as they are
> right now since they add a bunch of complexity to DAX that we need to review,
> and that review can fully happen with only XFS support. We can add ext4
> support back in later when it's ready.

Yes, please send them out.  We really should move ext4 over to iomap for
DAX.  I'd love to get some help from someone more familar with ext4,
and can send my code dump to get started.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-26  0:08                         ` Christoph Hellwig
  0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-26  0:08 UTC (permalink / raw)
  To: Ross Zwisler, Christoph Hellwig, Dave Chinner, linux-xfs,
	linux-fsdevel, linux-nvdimm

On Fri, Sep 23, 2016 at 03:02:37PM -0600, Ross Zwisler wrote:
> I can just add a hack to hop over the writeback in generic_file_read_iter(),
> but I hesitate to do this because it seems like the correct thing to do is to
> separate the ext4 DAX & DIO paths, which I think you are already doing.

I've started on it, and the read path works fine.  I'm a bit stuck on
the write side as my attempts at it seems to corrupt data fairly easily
when running xfstests.

> I believe that my DAX PMD patches are ready to go, but because of this issue
> they currently only support XFS.  I'm tempted to send them out as they are
> right now since they add a bunch of complexity to DAX that we need to review,
> and that review can fully happen with only XFS support. We can add ext4
> support back in later when it's ready.

Yes, please send them out.  We really should move ext4 over to iomap for
DAX.  I'd love to get some help from someone more familar with ext4,
and can send my code dump to get started.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
  2016-09-26  0:08                         ` Christoph Hellwig
@ 2016-09-26 14:28                             ` Jan Kara
  -1 siblings, 0 replies; 58+ messages in thread
From: Jan Kara @ 2016-09-26 14:28 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Dave Chinner,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Mon 26-09-16 02:08:05, Christoph Hellwig wrote:
> On Fri, Sep 23, 2016 at 03:02:37PM -0600, Ross Zwisler wrote:
> > I can just add a hack to hop over the writeback in generic_file_read_iter(),
> > but I hesitate to do this because it seems like the correct thing to do is to
> > separate the ext4 DAX & DIO paths, which I think you are already doing.
> 
> I've started on it, and the read path works fine.  I'm a bit stuck on
> the write side as my attempts at it seems to corrupt data fairly easily
> when running xfstests.

Well, I have a bunch of data corruption fixes for DAX sitting in my tree
which I was able to trigger. E.g. there are some missing invalidations of
blockdev aliases, also some races between fault vs write path the can lead
to data loss. But at least the second fix requires clearing of dirty bits
in the radix tree for DAX and that is a big pile of patches I have to sift
through mm people (and related mm parts are in constant churn due to PMD
pages in pagecache work so it is a mess).

> > I believe that my DAX PMD patches are ready to go, but because of this issue
> > they currently only support XFS.  I'm tempted to send them out as they are
> > right now since they add a bunch of complexity to DAX that we need to review,
> > and that review can fully happen with only XFS support. We can add ext4
> > support back in later when it's ready.
> 
> Yes, please send them out.  We really should move ext4 over to iomap for
> DAX.  I'd love to get some help from someone more familar with ext4,
> and can send my code dump to get started.

Yeah, I'd also just implement it for iomap path and XFS as a start and then
I'll port ext4 over to iomap infrastructure for DAX. There should be no
principial problem.

									Honza
-- 
Jan Kara <jack-IBi9RG/b67k@public.gmane.org>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-26 14:28                             ` Jan Kara
  0 siblings, 0 replies; 58+ messages in thread
From: Jan Kara @ 2016-09-26 14:28 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Ross Zwisler, Dave Chinner, linux-xfs, linux-fsdevel, linux-nvdimm

On Mon 26-09-16 02:08:05, Christoph Hellwig wrote:
> On Fri, Sep 23, 2016 at 03:02:37PM -0600, Ross Zwisler wrote:
> > I can just add a hack to hop over the writeback in generic_file_read_iter(),
> > but I hesitate to do this because it seems like the correct thing to do is to
> > separate the ext4 DAX & DIO paths, which I think you are already doing.
> 
> I've started on it, and the read path works fine.  I'm a bit stuck on
> the write side as my attempts at it seems to corrupt data fairly easily
> when running xfstests.

Well, I have a bunch of data corruption fixes for DAX sitting in my tree
which I was able to trigger. E.g. there are some missing invalidations of
blockdev aliases, also some races between fault vs write path the can lead
to data loss. But at least the second fix requires clearing of dirty bits
in the radix tree for DAX and that is a big pile of patches I have to sift
through mm people (and related mm parts are in constant churn due to PMD
pages in pagecache work so it is a mess).

> > I believe that my DAX PMD patches are ready to go, but because of this issue
> > they currently only support XFS.  I'm tempted to send them out as they are
> > right now since they add a bunch of complexity to DAX that we need to review,
> > and that review can fully happen with only XFS support. We can add ext4
> > support back in later when it's ready.
> 
> Yes, please send them out.  We really should move ext4 over to iomap for
> DAX.  I'd love to get some help from someone more familar with ext4,
> and can send my code dump to get started.

Yeah, I'd also just implement it for iomap path and XFS as a start and then
I'll port ext4 over to iomap infrastructure for DAX. There should be no
principial problem.

									Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 58+ messages in thread

end of thread, other threads:[~2016-09-26 14:28 UTC | newest]

Thread overview: 58+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-09 16:34 iomap based DAX path Christoph Hellwig
     [not found] ` <1473438884-674-1-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-09 16:34   ` [PATCH 01/10] iomap: add IOMAP_F_NEW flag Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
     [not found]     ` <1473438884-674-2-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-13 22:43       ` Ross Zwisler
2016-09-13 22:43         ` Ross Zwisler
2016-09-14  7:08         ` Christoph Hellwig
2016-09-09 16:34   ` [PATCH 02/10] iomap: expose iomap_apply outside iomap.c Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
     [not found]     ` <1473438884-674-3-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-13 22:48       ` Ross Zwisler
2016-09-13 22:48         ` Ross Zwisler
2016-09-09 16:34   ` [PATCH 03/10] dax: don't pass buffer_head to dax_insert_mapping Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
     [not found]     ` <1473438884-674-4-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-13 22:53       ` Ross Zwisler
2016-09-13 22:53         ` Ross Zwisler
2016-09-09 16:34   ` [PATCH 04/10] dax: don't pass buffer_head to copy_user_dax Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
2016-09-13 22:54     ` Ross Zwisler
2016-09-09 16:34   ` [PATCH 05/10] dax: provide an iomap based dax read/write path Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
     [not found]     ` <1473438884-674-6-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-13 23:00       ` Ross Zwisler
2016-09-13 23:00         ` Ross Zwisler
2016-09-09 16:34   ` [PATCH 06/10] dax: provide an iomap based fault handler Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
     [not found]     ` <1473438884-674-7-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-09 22:55       ` Dave Chinner
2016-09-09 22:55         ` Dave Chinner
2016-09-10  7:36         ` Christoph Hellwig
2016-09-10  7:36           ` Christoph Hellwig
2016-09-13 15:51           ` Ross Zwisler
     [not found]             ` <20160913155126.GA10622-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-09-14  7:06               ` Christoph Hellwig
2016-09-14  7:06                 ` Christoph Hellwig
     [not found]                 ` <20160914070633.GA17278-jcswGhMUV9g@public.gmane.org>
2016-09-14  9:53                   ` Christoph Hellwig
2016-09-14  9:53                     ` Christoph Hellwig
2016-09-23 21:02                   ` Ross Zwisler
2016-09-23 21:02                     ` Ross Zwisler
     [not found]                     ` <20160923210237.GA23346-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-09-26  0:08                       ` Christoph Hellwig
2016-09-26  0:08                         ` Christoph Hellwig
     [not found]                         ` <20160926000805.GA32252-jcswGhMUV9g@public.gmane.org>
2016-09-26 14:28                           ` Jan Kara
2016-09-26 14:28                             ` Jan Kara
2016-09-10  1:38       ` Elliott, Robert (Persistent Memory)
2016-09-10  1:38         ` Elliott, Robert (Persistent Memory)
2016-09-13 23:10     ` Ross Zwisler
     [not found]       ` <20160913231039.GF26002-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-09-14  7:19         ` Christoph Hellwig
2016-09-14  7:19           ` Christoph Hellwig
     [not found]           ` <20160914071910.GC17278-jcswGhMUV9g@public.gmane.org>
2016-09-14 17:07             ` Ross Zwisler
2016-09-14 17:07               ` Ross Zwisler
     [not found]               ` <20160914170759.GA14196-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-09-15  5:12                 ` Christoph Hellwig
2016-09-15  5:12                   ` Christoph Hellwig
     [not found]                   ` <20160915051229.GD6188-jcswGhMUV9g@public.gmane.org>
2016-09-15  5:30                     ` Darrick J. Wong
2016-09-15  5:30                       ` Darrick J. Wong
2016-09-26  0:05               ` Christoph Hellwig
2016-09-09 16:34   ` [PATCH 07/10] xfs: fix locking for DAX writes Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
2016-09-09 16:34   ` [PATCH 08/10] xfs: take the ilock shared if possible in xfs_file_iomap_begin Christoph Hellwig
2016-09-09 16:34     ` Christoph Hellwig
2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
     [not found]   ` <1473438884-674-10-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-13 23:12     ` Ross Zwisler
2016-09-13 23:12       ` Ross Zwisler
2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.