All of lore.kernel.org
 help / color / mirror / Atom feed
* iomap based DAX path V3
@ 2016-09-16 11:27 Christoph Hellwig
       [not found] ` <1474025234-13804-1-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
                   ` (3 more replies)
  0 siblings, 4 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

This series adds a DAX I/O path based on the iomap interface.  This
allows more efficient block mapping including defined hole semantics,
and is an important step toward getting rid of buffer_heads in XFS.

Changes since V2:
 - feedback to various small comments from Ross
 - added Reviewed-by: tags

Changes since V1:
 - added a conversion of ext2 to the iomap interface
 - addresse feedback from Ross, Dave and Robert


^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 01/12] iomap: add IOMAP_F_NEW flag
  2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
@ 2016-09-16 11:27     ` Christoph Hellwig
  2016-09-16 11:27 ` [PATCH 05/12] dax: provide an iomap based dax read/write path Christoph Hellwig
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
 fs/xfs/xfs_iomap.c    | 2 ++
 include/linux/iomap.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f96c8ff..5d06a2d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -635,6 +635,7 @@ retry:
 	if (end_fsb != orig_end_fsb)
 		xfs_inode_set_eofblocks_tag(ip);
 
+	iomap->flags = IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
 done:
 	if (isnullstartblock(got.br_startblock))
@@ -1000,6 +1001,7 @@ xfs_file_iomap_begin(
 		if (error)
 			return error;
 
+		iomap->flags = IOMAP_F_NEW;
 		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
 	} else {
 		ASSERT(nimaps);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3d70ece..14d7067 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -22,6 +22,7 @@ struct vm_fault;
  * Flags for iomap mappings:
  */
 #define IOMAP_F_MERGED	0x01	/* contains multiple blocks/extents */
+#define IOMAP_F_NEW	0x02	/* blocks have been newly allocated */
 
 /*
  * Magic value for blkno:
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 01/12] iomap: add IOMAP_F_NEW flag
@ 2016-09-16 11:27     ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/xfs/xfs_iomap.c    | 2 ++
 include/linux/iomap.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f96c8ff..5d06a2d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -635,6 +635,7 @@ retry:
 	if (end_fsb != orig_end_fsb)
 		xfs_inode_set_eofblocks_tag(ip);
 
+	iomap->flags = IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
 done:
 	if (isnullstartblock(got.br_startblock))
@@ -1000,6 +1001,7 @@ xfs_file_iomap_begin(
 		if (error)
 			return error;
 
+		iomap->flags = IOMAP_F_NEW;
 		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
 	} else {
 		ASSERT(nimaps);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3d70ece..14d7067 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -22,6 +22,7 @@ struct vm_fault;
  * Flags for iomap mappings:
  */
 #define IOMAP_F_MERGED	0x01	/* contains multiple blocks/extents */
+#define IOMAP_F_NEW	0x02	/* blocks have been newly allocated */
 
 /*
  * Magic value for blkno:
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 02/12] iomap: expose iomap_apply outside iomap.c
  2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
@ 2016-09-16 11:27     ` Christoph Hellwig
  2016-09-16 11:27 ` [PATCH 05/12] dax: provide an iomap based dax read/write path Christoph Hellwig
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

This allows the DAX code to use it.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
 fs/internal.h | 11 +++++++++++
 fs/iomap.c    |  5 +----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index ba07376..8591786 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -12,6 +12,7 @@
 struct super_block;
 struct file_system_type;
 struct iomap;
+struct iomap_ops;
 struct linux_binprm;
 struct path;
 struct mount;
@@ -164,3 +165,13 @@ extern struct dentry_operations ns_dentry_operations;
 extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
 		    unsigned long arg);
 extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
+/*
+ * iomap support:
+ */
+typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
+		void *data, struct iomap *iomap);
+
+loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
+		unsigned flags, struct iomap_ops *ops, void *data,
+		iomap_actor_t actor);
diff --git a/fs/iomap.c b/fs/iomap.c
index 706270f..f4df9c6 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -27,9 +27,6 @@
 #include <linux/dax.h>
 #include "internal.h"
 
-typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
-		void *data, struct iomap *iomap);
-
 /*
  * Execute a iomap write on a segment of the mapping that spans a
  * contiguous range of pages that have identical block mapping state.
@@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
  * resources they require in the iomap_begin call, and release them in the
  * iomap_end call.
  */
-static loff_t
+loff_t
 iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
 		struct iomap_ops *ops, void *data, iomap_actor_t actor)
 {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 02/12] iomap: expose iomap_apply outside iomap.c
@ 2016-09-16 11:27     ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

This allows the DAX code to use it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/internal.h | 11 +++++++++++
 fs/iomap.c    |  5 +----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index ba07376..8591786 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -12,6 +12,7 @@
 struct super_block;
 struct file_system_type;
 struct iomap;
+struct iomap_ops;
 struct linux_binprm;
 struct path;
 struct mount;
@@ -164,3 +165,13 @@ extern struct dentry_operations ns_dentry_operations;
 extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
 		    unsigned long arg);
 extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
+/*
+ * iomap support:
+ */
+typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
+		void *data, struct iomap *iomap);
+
+loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
+		unsigned flags, struct iomap_ops *ops, void *data,
+		iomap_actor_t actor);
diff --git a/fs/iomap.c b/fs/iomap.c
index 706270f..f4df9c6 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -27,9 +27,6 @@
 #include <linux/dax.h>
 #include "internal.h"
 
-typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
-		void *data, struct iomap *iomap);
-
 /*
  * Execute a iomap write on a segment of the mapping that spans a
  * contiguous range of pages that have identical block mapping state.
@@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
  * resources they require in the iomap_begin call, and release them in the
  * iomap_end call.
  */
-static loff_t
+loff_t
 iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
 		struct iomap_ops *ops, void *data, iomap_actor_t actor)
 {
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 03/12] dax: don't pass buffer_head to dax_insert_mapping
  2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
@ 2016-09-16 11:27     ` Christoph Hellwig
  2016-09-16 11:27 ` [PATCH 05/12] dax: provide an iomap based dax read/write path Christoph Hellwig
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

This way we can use this helper for the iomap based DAX implementation
as well.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
 fs/dax.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 993dc6f..98463bb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -790,14 +790,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
 
 static int dax_insert_mapping(struct address_space *mapping,
-			struct buffer_head *bh, void **entryp,
-			struct vm_area_struct *vma, struct vm_fault *vmf)
+		struct block_device *bdev, sector_t sector, size_t size,
+		void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	unsigned long vaddr = (unsigned long)vmf->virtual_address;
-	struct block_device *bdev = bh->b_bdev;
 	struct blk_dax_ctl dax = {
-		.sector = to_sector(bh, mapping->host),
-		.size = bh->b_size,
+		.sector = sector,
+		.size = size,
 	};
 	void *ret;
 	void *entry = *entryp;
@@ -898,7 +897,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
 	/* Filesystem should not return unwritten buffers to us! */
 	WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
-	error = dax_insert_mapping(mapping, &bh, &entry, vma, vmf);
+	error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
+			bh.b_size, &entry, vma, vmf);
  unlock_entry:
 	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
  out:
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 03/12] dax: don't pass buffer_head to dax_insert_mapping
@ 2016-09-16 11:27     ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

This way we can use this helper for the iomap based DAX implementation
as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/dax.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 993dc6f..98463bb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -790,14 +790,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
 
 static int dax_insert_mapping(struct address_space *mapping,
-			struct buffer_head *bh, void **entryp,
-			struct vm_area_struct *vma, struct vm_fault *vmf)
+		struct block_device *bdev, sector_t sector, size_t size,
+		void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	unsigned long vaddr = (unsigned long)vmf->virtual_address;
-	struct block_device *bdev = bh->b_bdev;
 	struct blk_dax_ctl dax = {
-		.sector = to_sector(bh, mapping->host),
-		.size = bh->b_size,
+		.sector = sector,
+		.size = size,
 	};
 	void *ret;
 	void *entry = *entryp;
@@ -898,7 +897,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
 	/* Filesystem should not return unwritten buffers to us! */
 	WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
-	error = dax_insert_mapping(mapping, &bh, &entry, vma, vmf);
+	error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
+			bh.b_size, &entry, vma, vmf);
  unlock_entry:
 	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
  out:
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 04/12] dax: don't pass buffer_head to copy_user_dax
  2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
@ 2016-09-16 11:27     ` Christoph Hellwig
  2016-09-16 11:27 ` [PATCH 05/12] dax: provide an iomap based dax read/write path Christoph Hellwig
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

This way we can use this helper for the iomap based DAX implementation
as well.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
 fs/dax.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 98463bb..84343ce 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -580,14 +580,13 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
 	return VM_FAULT_LOCKED;
 }
 
-static int copy_user_bh(struct page *to, struct inode *inode,
-		struct buffer_head *bh, unsigned long vaddr)
+static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
+		struct page *to, unsigned long vaddr)
 {
 	struct blk_dax_ctl dax = {
-		.sector = to_sector(bh, inode),
-		.size = bh->b_size,
+		.sector = sector,
+		.size = size,
 	};
-	struct block_device *bdev = bh->b_bdev;
 	void *vto;
 
 	if (dax_map_atomic(bdev, &dax) < 0)
@@ -867,7 +866,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	if (vmf->cow_page) {
 		struct page *new_page = vmf->cow_page;
 		if (buffer_written(&bh))
-			error = copy_user_bh(new_page, inode, &bh, vaddr);
+			error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode),
+					bh.b_size, new_page, vaddr);
 		else
 			clear_user_highpage(new_page, vaddr);
 		if (error)
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 04/12] dax: don't pass buffer_head to copy_user_dax
@ 2016-09-16 11:27     ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

This way we can use this helper for the iomap based DAX implementation
as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/dax.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 98463bb..84343ce 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -580,14 +580,13 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
 	return VM_FAULT_LOCKED;
 }
 
-static int copy_user_bh(struct page *to, struct inode *inode,
-		struct buffer_head *bh, unsigned long vaddr)
+static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
+		struct page *to, unsigned long vaddr)
 {
 	struct blk_dax_ctl dax = {
-		.sector = to_sector(bh, inode),
-		.size = bh->b_size,
+		.sector = sector,
+		.size = size,
 	};
-	struct block_device *bdev = bh->b_bdev;
 	void *vto;
 
 	if (dax_map_atomic(bdev, &dax) < 0)
@@ -867,7 +866,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	if (vmf->cow_page) {
 		struct page *new_page = vmf->cow_page;
 		if (buffer_written(&bh))
-			error = copy_user_bh(new_page, inode, &bh, vaddr);
+			error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode),
+					bh.b_size, new_page, vaddr);
 		else
 			clear_user_highpage(new_page, vaddr);
 		if (error)
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 05/12] dax: provide an iomap based dax read/write path
  2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
       [not found] ` <1474025234-13804-1-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
@ 2016-09-16 11:27 ` Christoph Hellwig
  2016-09-16 11:27 ` [PATCH 08/12] xfs: take the ilock shared if possible in xfs_file_iomap_begin Christoph Hellwig
  2016-09-16 11:27 ` [PATCH 09/12] xfs: refactor xfs_setfilesize Christoph Hellwig
  3 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

This is a much simpler implementation of the DAX read/write path that makes
use of the iomap infrastructure.  It does not try to mirror the direct I/O
calling conventions and thus doesn't have to deal with i_dio_count or the
end_io handler, but instead leaves locking and filesystem-specific I/O
completion to the caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/dax.c            | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dax.h |   4 ++
 2 files changed, 118 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index 84343ce..1f9f2d4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -31,6 +31,8 @@
 #include <linux/vmstat.h>
 #include <linux/pfn_t.h>
 #include <linux/sizes.h>
+#include <linux/iomap.h>
+#include "internal.h"
 
 /*
  * We use lowest available bit in exceptional entry for locking, other two
@@ -1241,3 +1243,115 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
 	return dax_zero_page_range(inode, from, length, get_block);
 }
 EXPORT_SYMBOL_GPL(dax_truncate_page);
+
+#ifdef CONFIG_FS_IOMAP
+static loff_t
+iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+		struct iomap *iomap)
+{
+	struct iov_iter *iter = data;
+	loff_t end = pos + length, done = 0;
+	ssize_t ret = 0;
+
+	if (iov_iter_rw(iter) == READ) {
+		end = min(end, i_size_read(inode));
+		if (pos >= end)
+			return 0;
+
+		if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
+			return iov_iter_zero(min(length, end - pos), iter);
+	}
+
+	if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
+		return -EIO;
+
+	while (pos < end) {
+		unsigned offset = pos & (PAGE_SIZE - 1);
+		struct blk_dax_ctl dax = { 0 };
+		ssize_t map_len;
+
+		dax.sector = iomap->blkno +
+			(((pos & PAGE_MASK) - iomap->offset) >> 9);
+		dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
+		map_len = dax_map_atomic(iomap->bdev, &dax);
+		if (map_len < 0) {
+			ret = map_len;
+			break;
+		}
+
+		dax.addr += offset;
+		map_len -= offset;
+		if (map_len > end - pos)
+			map_len = end - pos;
+
+		if (iov_iter_rw(iter) == WRITE)
+			map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
+		else
+			map_len = copy_to_iter(dax.addr, map_len, iter);
+		dax_unmap_atomic(iomap->bdev, &dax);
+		if (map_len <= 0) {
+			ret = map_len ? map_len : -EFAULT;
+			break;
+		}
+
+		pos += map_len;
+		length -= map_len;
+		done += map_len;
+	}
+
+	return done ? done : ret;
+}
+
+/**
+ * iomap_dax_rw - Perform I/O to a DAX file
+ * @iocb:	The control block for this I/O
+ * @iter:	The addresses to do I/O from or to
+ * @ops:	iomap ops passed from the file system
+ *
+ * This function performs read and write operations to directly mapped
+ * persistent memory.  The callers needs to take care of read/write exclusion
+ * and evicting any page cache pages in the region under I/O.
+ */
+ssize_t
+iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+		struct iomap_ops *ops)
+{
+	struct address_space *mapping = iocb->ki_filp->f_mapping;
+	struct inode *inode = mapping->host;
+	loff_t pos = iocb->ki_pos, ret = 0, done = 0;
+	unsigned flags = 0;
+
+	if (iov_iter_rw(iter) == WRITE)
+		flags |= IOMAP_WRITE;
+
+	/*
+	 * Yes, even DAX files can have page cache attached to them:  A zeroed
+	 * page is inserted into the pagecache when we have to serve a write
+	 * fault on a hole.  It should never be dirtied and can simply be
+	 * dropped from the pagecache once we get real data for the page.
+	 *
+	 * XXX: This is racy against mmap, and there's nothing we can do about
+	 * it. We'll eventually need to shift this down even further so that
+	 * we can check if we allocated blocks over a hole first.
+	 */
+	if (mapping->nrpages) {
+		ret = invalidate_inode_pages2_range(mapping,
+				pos >> PAGE_SHIFT,
+				(pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT);
+		WARN_ON_ONCE(ret);
+	}
+
+	while (iov_iter_count(iter)) {
+		ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
+				iter, iomap_dax_actor);
+		if (ret <= 0)
+			break;
+		pos += ret;
+		done += ret;
+	}
+
+	iocb->ki_pos += done;
+	return done ? done : ret;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_rw);
+#endif /* CONFIG_FS_IOMAP */
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9c6dc77..a0595b4 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -6,9 +6,13 @@
 #include <linux/radix-tree.h>
 #include <asm/pgtable.h>
 
+struct iomap_ops;
+
 /* We use lowest available exceptional entry bit for locking */
 #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
 
+ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+		struct iomap_ops *ops);
 ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
 		  get_block_t, dio_iodone_t, int flags);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 06/12] dax: provide an iomap based fault handler
  2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
@ 2016-09-16 11:27     ` Christoph Hellwig
  2016-09-16 11:27 ` [PATCH 05/12] dax: provide an iomap based dax read/write path Christoph Hellwig
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

Very similar to the existing dax_fault function, but instead of using
the get_block callback we rely on the iomap_ops vector from iomap.c.
That also avoids having to do two calls into the file system for write
faults.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
 fs/dax.c            | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dax.h |   2 +
 2 files changed, 116 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index 1f9f2d4..cc025f8 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1354,4 +1354,118 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
 	return done ? done : ret;
 }
 EXPORT_SYMBOL_GPL(iomap_dax_rw);
+
+/**
+ * iomap_dax_fault - handle a page fault on a DAX file
+ * @vma: The virtual memory area where the fault occurred
+ * @vmf: The description of the fault
+ * @ops: iomap ops passed from the file system
+ *
+ * When a page fault occurs, filesystems may call this helper in their fault
+ * or mkwrite handler for DAX files. Assumes the caller has done all the
+ * necessary locking for the page fault to proceed successfully.
+ */
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+			struct iomap_ops *ops)
+{
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	struct inode *inode = mapping->host;
+	unsigned long vaddr = (unsigned long)vmf->virtual_address;
+	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
+	sector_t sector;
+	struct iomap iomap = { 0 };
+	unsigned flags = 0;
+	int error, major = 0;
+	void *entry;
+
+	/*
+	 * Check whether offset isn't beyond end of file now. Caller is supposed
+	 * to hold locks serializing us with truncate / punch hole so this is
+	 * a reliable test.
+	 */
+	if (pos >= i_size_read(inode))
+		return VM_FAULT_SIGBUS;
+
+	entry = grab_mapping_entry(mapping, vmf->pgoff);
+	if (IS_ERR(entry)) {
+		error = PTR_ERR(entry);
+		goto out;
+	}
+
+	if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
+		flags |= IOMAP_WRITE;
+
+	/*
+	 * Note that we don't bother to use iomap_apply here: DAX required
+	 * the file system block size to be equal the page size, which means
+	 * that we never have to deal with more than a single extent here.
+	 */
+	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
+	if (error)
+		goto unlock_entry;
+	if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
+		error = -EIO;		/* fs corruption? */
+		goto unlock_entry;
+	}
+
+	sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9);
+
+	if (vmf->cow_page) {
+		switch (iomap.type) {
+		case IOMAP_HOLE:
+		case IOMAP_UNWRITTEN:
+			clear_user_highpage(vmf->cow_page, vaddr);
+			break;
+		case IOMAP_MAPPED:
+			error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
+					vmf->cow_page, vaddr);
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			error = -EIO;
+			break;
+		}
+
+		if (error)
+			goto unlock_entry;
+		if (!radix_tree_exceptional_entry(entry)) {
+			vmf->page = entry;
+			return VM_FAULT_LOCKED;
+		}
+		vmf->entry = entry;
+		return VM_FAULT_DAX_LOCKED;
+	}
+
+	switch (iomap.type) {
+	case IOMAP_MAPPED:
+		if (iomap.flags & IOMAP_F_NEW) {
+			count_vm_event(PGMAJFAULT);
+			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+			major = VM_FAULT_MAJOR;
+		}
+		error = dax_insert_mapping(mapping, iomap.bdev, sector,
+				PAGE_SIZE, &entry, vma, vmf);
+		break;
+	case IOMAP_UNWRITTEN:
+	case IOMAP_HOLE:
+		if (!(vmf->flags & FAULT_FLAG_WRITE))
+			return dax_load_hole(mapping, entry, vmf);
+		/*FALLTHRU*/
+	default:
+		WARN_ON_ONCE(1);
+		error = -EIO;
+		break;
+	}
+
+ unlock_entry:
+	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+ out:
+	if (error == -ENOMEM)
+		return VM_FAULT_OOM | major;
+	/* -EBUSY is fine, somebody else faulted on the same PTE */
+	if (error < 0 && error != -EBUSY)
+		return VM_FAULT_SIGBUS | major;
+	return VM_FAULT_NOPAGE | major;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_fault);
 #endif /* CONFIG_FS_IOMAP */
diff --git a/include/linux/dax.h b/include/linux/dax.h
index a0595b4..add6c4b 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -17,6 +17,8 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
 		  get_block_t, dio_iodone_t, int flags);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+			struct iomap_ops *ops);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
 void dax_wake_mapping_entry_waiter(struct address_space *mapping,
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 06/12] dax: provide an iomap based fault handler
@ 2016-09-16 11:27     ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

Very similar to the existing dax_fault function, but instead of using
the get_block callback we rely on the iomap_ops vector from iomap.c.
That also avoids having to do two calls into the file system for write
faults.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/dax.c            | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dax.h |   2 +
 2 files changed, 116 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index 1f9f2d4..cc025f8 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1354,4 +1354,118 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
 	return done ? done : ret;
 }
 EXPORT_SYMBOL_GPL(iomap_dax_rw);
+
+/**
+ * iomap_dax_fault - handle a page fault on a DAX file
+ * @vma: The virtual memory area where the fault occurred
+ * @vmf: The description of the fault
+ * @ops: iomap ops passed from the file system
+ *
+ * When a page fault occurs, filesystems may call this helper in their fault
+ * or mkwrite handler for DAX files. Assumes the caller has done all the
+ * necessary locking for the page fault to proceed successfully.
+ */
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+			struct iomap_ops *ops)
+{
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	struct inode *inode = mapping->host;
+	unsigned long vaddr = (unsigned long)vmf->virtual_address;
+	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
+	sector_t sector;
+	struct iomap iomap = { 0 };
+	unsigned flags = 0;
+	int error, major = 0;
+	void *entry;
+
+	/*
+	 * Check whether offset isn't beyond end of file now. Caller is supposed
+	 * to hold locks serializing us with truncate / punch hole so this is
+	 * a reliable test.
+	 */
+	if (pos >= i_size_read(inode))
+		return VM_FAULT_SIGBUS;
+
+	entry = grab_mapping_entry(mapping, vmf->pgoff);
+	if (IS_ERR(entry)) {
+		error = PTR_ERR(entry);
+		goto out;
+	}
+
+	if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
+		flags |= IOMAP_WRITE;
+
+	/*
+	 * Note that we don't bother to use iomap_apply here: DAX required
+	 * the file system block size to be equal the page size, which means
+	 * that we never have to deal with more than a single extent here.
+	 */
+	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
+	if (error)
+		goto unlock_entry;
+	if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
+		error = -EIO;		/* fs corruption? */
+		goto unlock_entry;
+	}
+
+	sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9);
+
+	if (vmf->cow_page) {
+		switch (iomap.type) {
+		case IOMAP_HOLE:
+		case IOMAP_UNWRITTEN:
+			clear_user_highpage(vmf->cow_page, vaddr);
+			break;
+		case IOMAP_MAPPED:
+			error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
+					vmf->cow_page, vaddr);
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			error = -EIO;
+			break;
+		}
+
+		if (error)
+			goto unlock_entry;
+		if (!radix_tree_exceptional_entry(entry)) {
+			vmf->page = entry;
+			return VM_FAULT_LOCKED;
+		}
+		vmf->entry = entry;
+		return VM_FAULT_DAX_LOCKED;
+	}
+
+	switch (iomap.type) {
+	case IOMAP_MAPPED:
+		if (iomap.flags & IOMAP_F_NEW) {
+			count_vm_event(PGMAJFAULT);
+			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+			major = VM_FAULT_MAJOR;
+		}
+		error = dax_insert_mapping(mapping, iomap.bdev, sector,
+				PAGE_SIZE, &entry, vma, vmf);
+		break;
+	case IOMAP_UNWRITTEN:
+	case IOMAP_HOLE:
+		if (!(vmf->flags & FAULT_FLAG_WRITE))
+			return dax_load_hole(mapping, entry, vmf);
+		/*FALLTHRU*/
+	default:
+		WARN_ON_ONCE(1);
+		error = -EIO;
+		break;
+	}
+
+ unlock_entry:
+	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+ out:
+	if (error == -ENOMEM)
+		return VM_FAULT_OOM | major;
+	/* -EBUSY is fine, somebody else faulted on the same PTE */
+	if (error < 0 && error != -EBUSY)
+		return VM_FAULT_SIGBUS | major;
+	return VM_FAULT_NOPAGE | major;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_fault);
 #endif /* CONFIG_FS_IOMAP */
diff --git a/include/linux/dax.h b/include/linux/dax.h
index a0595b4..add6c4b 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -17,6 +17,8 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
 		  get_block_t, dio_iodone_t, int flags);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+			struct iomap_ops *ops);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
 void dax_wake_mapping_entry_waiter(struct address_space *mapping,
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 07/12] xfs: fix locking for DAX writes
  2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
@ 2016-09-16 11:27     ` Christoph Hellwig
  2016-09-16 11:27 ` [PATCH 05/12] dax: provide an iomap based dax read/write path Christoph Hellwig
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

So far DAX writes inherited the locking from direct I/O writes, but the direct
I/O model of using shared locks for writes is actually wrong for DAX.  For
direct I/O we're out of any standards and don't have to provide the Posix
required exclusion between writers, but for DAX which gets transparently
enable on applications without any knowledge of it we can't simply drop the
requirement.  Even worse this only happens for aligned writes and thus
doesn't show up for many typical use cases.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 fs/xfs/xfs_file.c | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e612a02..62649cc 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -714,24 +714,11 @@ xfs_file_dax_write(
 	struct address_space	*mapping = iocb->ki_filp->f_mapping;
 	struct inode		*inode = mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			ret = 0;
-	int			unaligned_io = 0;
-	int			iolock;
+	int			iolock = XFS_IOLOCK_EXCL;
 	struct iov_iter		data;
 
-	/* "unaligned" here means not aligned to a filesystem block */
-	if ((iocb->ki_pos & mp->m_blockmask) ||
-	    ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
-		unaligned_io = 1;
-		iolock = XFS_IOLOCK_EXCL;
-	} else if (mapping->nrpages) {
-		iolock = XFS_IOLOCK_EXCL;
-	} else {
-		iolock = XFS_IOLOCK_SHARED;
-	}
 	xfs_rw_ilock(ip, iolock);
-
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out;
@@ -758,11 +745,6 @@ xfs_file_dax_write(
 		WARN_ON_ONCE(ret);
 	}
 
-	if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
-		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-		iolock = XFS_IOLOCK_SHARED;
-	}
-
 	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
 
 	data = *from;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 07/12] xfs: fix locking for DAX writes
@ 2016-09-16 11:27     ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

So far DAX writes inherited the locking from direct I/O writes, but the direct
I/O model of using shared locks for writes is actually wrong for DAX.  For
direct I/O we're out of any standards and don't have to provide the Posix
required exclusion between writers, but for DAX which gets transparently
enable on applications without any knowledge of it we can't simply drop the
requirement.  Even worse this only happens for aligned writes and thus
doesn't show up for many typical use cases.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_file.c | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e612a02..62649cc 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -714,24 +714,11 @@ xfs_file_dax_write(
 	struct address_space	*mapping = iocb->ki_filp->f_mapping;
 	struct inode		*inode = mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			ret = 0;
-	int			unaligned_io = 0;
-	int			iolock;
+	int			iolock = XFS_IOLOCK_EXCL;
 	struct iov_iter		data;
 
-	/* "unaligned" here means not aligned to a filesystem block */
-	if ((iocb->ki_pos & mp->m_blockmask) ||
-	    ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
-		unaligned_io = 1;
-		iolock = XFS_IOLOCK_EXCL;
-	} else if (mapping->nrpages) {
-		iolock = XFS_IOLOCK_EXCL;
-	} else {
-		iolock = XFS_IOLOCK_SHARED;
-	}
 	xfs_rw_ilock(ip, iolock);
-
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out;
@@ -758,11 +745,6 @@ xfs_file_dax_write(
 		WARN_ON_ONCE(ret);
 	}
 
-	if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
-		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-		iolock = XFS_IOLOCK_SHARED;
-	}
-
 	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
 
 	data = *from;
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 08/12] xfs: take the ilock shared if possible in xfs_file_iomap_begin
  2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
       [not found] ` <1474025234-13804-1-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
  2016-09-16 11:27 ` [PATCH 05/12] dax: provide an iomap based dax read/write path Christoph Hellwig
@ 2016-09-16 11:27 ` Christoph Hellwig
  2016-09-16 11:27 ` [PATCH 09/12] xfs: refactor xfs_setfilesize Christoph Hellwig
  3 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

We always just read the extent first, and will later lock exlusively
after first dropping the lock in case we actually allocate blocks.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_iomap.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5d06a2d..c3cc175 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -955,6 +955,7 @@ xfs_file_iomap_begin(
 	struct xfs_bmbt_irec	imap;
 	xfs_fileoff_t		offset_fsb, end_fsb;
 	int			nimaps = 1, error = 0;
+	unsigned		lockmode;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
@@ -964,7 +965,7 @@ xfs_file_iomap_begin(
 				iomap);
 	}
 
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	lockmode = xfs_ilock_data_map_shared(ip);
 
 	ASSERT(offset <= mp->m_super->s_maxbytes);
 	if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
@@ -975,7 +976,7 @@ xfs_file_iomap_begin(
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
 			       &nimaps, XFS_BMAPI_ENTIRE);
 	if (error) {
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_iunlock(ip, lockmode);
 		return error;
 	}
 
@@ -995,7 +996,8 @@ xfs_file_iomap_begin(
 		 * xfs_iomap_write_direct() expects the shared lock. It
 		 * is unlocked on return.
 		 */
-		xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+		if (lockmode == XFS_ILOCK_EXCL)
+			xfs_ilock_demote(ip, lockmode);
 		error = xfs_iomap_write_direct(ip, offset, length, &imap,
 				nimaps);
 		if (error)
@@ -1006,7 +1008,7 @@ xfs_file_iomap_begin(
 	} else {
 		ASSERT(nimaps);
 
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_iunlock(ip, lockmode);
 		trace_xfs_iomap_found(ip, offset, length, 0, &imap);
 	}
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 09/12] xfs: refactor xfs_setfilesize
  2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
                   ` (2 preceding siblings ...)
  2016-09-16 11:27 ` [PATCH 08/12] xfs: take the ilock shared if possible in xfs_file_iomap_begin Christoph Hellwig
@ 2016-09-16 11:27 ` Christoph Hellwig
  3 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

Rename the current function to __xfs_setfilesize and add a non-static
wrapper that also takes care of creating the transaction.  This new
helper will be used by the new iomap-based DAX path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c | 31 +++++++++++++++++++++----------
 fs/xfs/xfs_aops.h |  1 +
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 7575cfc..4a28fa9 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -200,7 +200,7 @@ xfs_setfilesize_trans_alloc(
  * Update on-disk file size now that data has been written to disk.
  */
 STATIC int
-xfs_setfilesize(
+__xfs_setfilesize(
 	struct xfs_inode	*ip,
 	struct xfs_trans	*tp,
 	xfs_off_t		offset,
@@ -225,6 +225,23 @@ xfs_setfilesize(
 	return xfs_trans_commit(tp);
 }
 
+int
+xfs_setfilesize(
+	struct xfs_inode	*ip,
+	xfs_off_t		offset,
+	size_t			size)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_trans	*tp;
+	int			error;
+
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
+	if (error)
+		return error;
+
+	return __xfs_setfilesize(ip, tp, offset, size);
+}
+
 STATIC int
 xfs_setfilesize_ioend(
 	struct xfs_ioend	*ioend,
@@ -247,7 +264,7 @@ xfs_setfilesize_ioend(
 		return error;
 	}
 
-	return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
+	return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
 }
 
 /*
@@ -1336,13 +1353,12 @@ xfs_end_io_direct_write(
 {
 	struct inode		*inode = file_inode(iocb->ki_filp);
 	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
 	uintptr_t		flags = (uintptr_t)private;
 	int			error = 0;
 
 	trace_xfs_end_io_direct_write(ip, offset, size);
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		return -EIO;
 
 	if (size <= 0)
@@ -1380,14 +1396,9 @@ xfs_end_io_direct_write(
 
 		error = xfs_iomap_write_unwritten(ip, offset, size);
 	} else if (flags & XFS_DIO_FLAG_APPEND) {
-		struct xfs_trans *tp;
-
 		trace_xfs_end_io_direct_write_append(ip, offset, size);
 
-		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0,
-				&tp);
-		if (!error)
-			error = xfs_setfilesize(ip, tp, offset, size);
+		error = xfs_setfilesize(ip, offset, size);
 	}
 
 	return error;
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index bf2d9a1..1950e3b 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -62,6 +62,7 @@ int	xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
 
 int	xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset,
 		ssize_t size, void *private);
+int	xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
 
 extern void xfs_count_page_state(struct page *, int *, int *);
 extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 10/12] xfs: use iomap to implement DAX
  2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
@ 2016-09-16 11:27     ` Christoph Hellwig
  2016-09-16 11:27 ` [PATCH 05/12] dax: provide an iomap based dax read/write path Christoph Hellwig
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

Another users of buffer_heads bytes the dust.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
 fs/xfs/xfs_file.c  | 61 +++++++++++++++---------------------------------------
 fs/xfs/xfs_iomap.c | 11 ++++++----
 2 files changed, 24 insertions(+), 48 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 62649cc..f99d7fa 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -332,10 +332,7 @@ xfs_file_dax_read(
 	struct kiocb		*iocb,
 	struct iov_iter		*to)
 {
-	struct address_space	*mapping = iocb->ki_filp->f_mapping;
-	struct inode		*inode = mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct iov_iter		data = *to;
+	struct xfs_inode	*ip = XFS_I(iocb->ki_filp->f_mapping->host);
 	size_t			count = iov_iter_count(to);
 	ssize_t			ret = 0;
 
@@ -345,11 +342,7 @@ xfs_file_dax_read(
 		return 0; /* skip atime */
 
 	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
-	if (ret > 0) {
-		iocb->ki_pos += ret;
-		iov_iter_advance(to, ret);
-	}
+	ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops);
 	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 
 	file_accessed(iocb->ki_filp);
@@ -711,52 +704,32 @@ xfs_file_dax_write(
 	struct kiocb		*iocb,
 	struct iov_iter		*from)
 {
-	struct address_space	*mapping = iocb->ki_filp->f_mapping;
-	struct inode		*inode = mapping->host;
+	struct inode		*inode = iocb->ki_filp->f_mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
-	ssize_t			ret = 0;
 	int			iolock = XFS_IOLOCK_EXCL;
-	struct iov_iter		data;
+	ssize_t			ret, error = 0;
+	size_t			count;
+	loff_t			pos;
 
 	xfs_rw_ilock(ip, iolock);
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out;
 
-	/*
-	 * Yes, even DAX files can have page cache attached to them:  A zeroed
-	 * page is inserted into the pagecache when we have to serve a write
-	 * fault on a hole.  It should never be dirtied and can simply be
-	 * dropped from the pagecache once we get real data for the page.
-	 *
-	 * XXX: This is racy against mmap, and there's nothing we can do about
-	 * it. dax_do_io() should really do this invalidation internally as
-	 * it will know if we've allocated over a holei for this specific IO and
-	 * if so it needs to update the mapping tree and invalidate existing
-	 * PTEs over the newly allocated range. Remove this invalidation when
-	 * dax_do_io() is fixed up.
-	 */
-	if (mapping->nrpages) {
-		loff_t end = iocb->ki_pos + iov_iter_count(from) - 1;
-
-		ret = invalidate_inode_pages2_range(mapping,
-						    iocb->ki_pos >> PAGE_SHIFT,
-						    end >> PAGE_SHIFT);
-		WARN_ON_ONCE(ret);
-	}
+	pos = iocb->ki_pos;
+	count = iov_iter_count(from);
 
-	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
+	trace_xfs_file_dax_write(ip, count, pos);
 
-	data = *from;
-	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
-			xfs_end_io_direct_write, 0);
-	if (ret > 0) {
-		iocb->ki_pos += ret;
-		iov_iter_advance(from, ret);
+	ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops);
+	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
+		i_size_write(inode, iocb->ki_pos);
+		error = xfs_setfilesize(ip, pos, ret);
 	}
+
 out:
 	xfs_rw_iunlock(ip, iolock);
-	return ret;
+	return error ? error : ret;
 }
 
 STATIC ssize_t
@@ -1495,7 +1468,7 @@ xfs_filemap_page_mkwrite(
 	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 
 	if (IS_DAX(inode)) {
-		ret = dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
+		ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
 	} else {
 		ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
 		ret = block_page_mkwrite_return(ret);
@@ -1529,7 +1502,7 @@ xfs_filemap_fault(
 		 * changes to xfs_get_blocks_direct() to map unwritten extent
 		 * ioend for conversion on read-only mappings.
 		 */
-		ret = dax_fault(vma, vmf, xfs_get_blocks_dax_fault);
+		ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
 	} else
 		ret = filemap_fault(vma, vmf);
 	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index c3cc175..23f7811 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -935,11 +935,13 @@ error_on_bmapi_transaction:
 	return error;
 }
 
-static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps)
+static inline bool imap_needs_alloc(struct inode *inode,
+		struct xfs_bmbt_irec *imap, int nimaps)
 {
 	return !nimaps ||
 		imap->br_startblock == HOLESTARTBLOCK ||
-		imap->br_startblock == DELAYSTARTBLOCK;
+		imap->br_startblock == DELAYSTARTBLOCK ||
+		(IS_DAX(inode) && ISUNWRITTEN(imap));
 }
 
 static int
@@ -960,7 +962,8 @@ xfs_file_iomap_begin(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	if ((flags & IOMAP_WRITE) && !xfs_get_extsz_hint(ip)) {
+	if ((flags & IOMAP_WRITE) &&
+	    !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
 		return xfs_file_iomap_begin_delay(inode, offset, length, flags,
 				iomap);
 	}
@@ -980,7 +983,7 @@ xfs_file_iomap_begin(
 		return error;
 	}
 
-	if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) {
+	if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
 		/*
 		 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
 		 * pages to keep the chunks of work done where somewhat symmetric
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 10/12] xfs: use iomap to implement DAX
@ 2016-09-16 11:27     ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

Another users of buffer_heads bytes the dust.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/xfs/xfs_file.c  | 61 +++++++++++++++---------------------------------------
 fs/xfs/xfs_iomap.c | 11 ++++++----
 2 files changed, 24 insertions(+), 48 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 62649cc..f99d7fa 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -332,10 +332,7 @@ xfs_file_dax_read(
 	struct kiocb		*iocb,
 	struct iov_iter		*to)
 {
-	struct address_space	*mapping = iocb->ki_filp->f_mapping;
-	struct inode		*inode = mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct iov_iter		data = *to;
+	struct xfs_inode	*ip = XFS_I(iocb->ki_filp->f_mapping->host);
 	size_t			count = iov_iter_count(to);
 	ssize_t			ret = 0;
 
@@ -345,11 +342,7 @@ xfs_file_dax_read(
 		return 0; /* skip atime */
 
 	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
-	if (ret > 0) {
-		iocb->ki_pos += ret;
-		iov_iter_advance(to, ret);
-	}
+	ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops);
 	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 
 	file_accessed(iocb->ki_filp);
@@ -711,52 +704,32 @@ xfs_file_dax_write(
 	struct kiocb		*iocb,
 	struct iov_iter		*from)
 {
-	struct address_space	*mapping = iocb->ki_filp->f_mapping;
-	struct inode		*inode = mapping->host;
+	struct inode		*inode = iocb->ki_filp->f_mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
-	ssize_t			ret = 0;
 	int			iolock = XFS_IOLOCK_EXCL;
-	struct iov_iter		data;
+	ssize_t			ret, error = 0;
+	size_t			count;
+	loff_t			pos;
 
 	xfs_rw_ilock(ip, iolock);
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out;
 
-	/*
-	 * Yes, even DAX files can have page cache attached to them:  A zeroed
-	 * page is inserted into the pagecache when we have to serve a write
-	 * fault on a hole.  It should never be dirtied and can simply be
-	 * dropped from the pagecache once we get real data for the page.
-	 *
-	 * XXX: This is racy against mmap, and there's nothing we can do about
-	 * it. dax_do_io() should really do this invalidation internally as
-	 * it will know if we've allocated over a holei for this specific IO and
-	 * if so it needs to update the mapping tree and invalidate existing
-	 * PTEs over the newly allocated range. Remove this invalidation when
-	 * dax_do_io() is fixed up.
-	 */
-	if (mapping->nrpages) {
-		loff_t end = iocb->ki_pos + iov_iter_count(from) - 1;
-
-		ret = invalidate_inode_pages2_range(mapping,
-						    iocb->ki_pos >> PAGE_SHIFT,
-						    end >> PAGE_SHIFT);
-		WARN_ON_ONCE(ret);
-	}
+	pos = iocb->ki_pos;
+	count = iov_iter_count(from);
 
-	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
+	trace_xfs_file_dax_write(ip, count, pos);
 
-	data = *from;
-	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
-			xfs_end_io_direct_write, 0);
-	if (ret > 0) {
-		iocb->ki_pos += ret;
-		iov_iter_advance(from, ret);
+	ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops);
+	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
+		i_size_write(inode, iocb->ki_pos);
+		error = xfs_setfilesize(ip, pos, ret);
 	}
+
 out:
 	xfs_rw_iunlock(ip, iolock);
-	return ret;
+	return error ? error : ret;
 }
 
 STATIC ssize_t
@@ -1495,7 +1468,7 @@ xfs_filemap_page_mkwrite(
 	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 
 	if (IS_DAX(inode)) {
-		ret = dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
+		ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
 	} else {
 		ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
 		ret = block_page_mkwrite_return(ret);
@@ -1529,7 +1502,7 @@ xfs_filemap_fault(
 		 * changes to xfs_get_blocks_direct() to map unwritten extent
 		 * ioend for conversion on read-only mappings.
 		 */
-		ret = dax_fault(vma, vmf, xfs_get_blocks_dax_fault);
+		ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
 	} else
 		ret = filemap_fault(vma, vmf);
 	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index c3cc175..23f7811 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -935,11 +935,13 @@ error_on_bmapi_transaction:
 	return error;
 }
 
-static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps)
+static inline bool imap_needs_alloc(struct inode *inode,
+		struct xfs_bmbt_irec *imap, int nimaps)
 {
 	return !nimaps ||
 		imap->br_startblock == HOLESTARTBLOCK ||
-		imap->br_startblock == DELAYSTARTBLOCK;
+		imap->br_startblock == DELAYSTARTBLOCK ||
+		(IS_DAX(inode) && ISUNWRITTEN(imap));
 }
 
 static int
@@ -960,7 +962,8 @@ xfs_file_iomap_begin(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	if ((flags & IOMAP_WRITE) && !xfs_get_extsz_hint(ip)) {
+	if ((flags & IOMAP_WRITE) &&
+	    !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
 		return xfs_file_iomap_begin_delay(inode, offset, length, flags,
 				iomap);
 	}
@@ -980,7 +983,7 @@ xfs_file_iomap_begin(
 		return error;
 	}
 
-	if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) {
+	if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
 		/*
 		 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
 		 * pages to keep the chunks of work done where somewhat symmetric
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 11/12] ext2: stop passing buffer_head to ext2_get_blocks
  2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
@ 2016-09-16 11:27     ` Christoph Hellwig
  2016-09-16 11:27 ` [PATCH 05/12] dax: provide an iomap based dax read/write path Christoph Hellwig
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
 fs/ext2/inode.c | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index d5c7d09..2a69ab2 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -618,7 +618,7 @@ static void ext2_splice_branch(struct inode *inode,
  */
 static int ext2_get_blocks(struct inode *inode,
 			   sector_t iblock, unsigned long maxblocks,
-			   struct buffer_head *bh_result,
+			   u32 *bno, bool *new, bool *boundary,
 			   int create)
 {
 	int err = -EIO;
@@ -644,7 +644,6 @@ static int ext2_get_blocks(struct inode *inode,
 	/* Simplest case - block found, no allocation needed */
 	if (!partial) {
 		first_block = le32_to_cpu(chain[depth - 1].key);
-		clear_buffer_new(bh_result); /* What's this do? */
 		count++;
 		/*map more blocks*/
 		while (count < maxblocks && count <= blocks_to_boundary) {
@@ -699,7 +698,6 @@ static int ext2_get_blocks(struct inode *inode,
 			mutex_unlock(&ei->truncate_mutex);
 			if (err)
 				goto cleanup;
-			clear_buffer_new(bh_result);
 			goto got_it;
 		}
 	}
@@ -745,15 +743,16 @@ static int ext2_get_blocks(struct inode *inode,
 			mutex_unlock(&ei->truncate_mutex);
 			goto cleanup;
 		}
-	} else
-		set_buffer_new(bh_result);
+	} else {
+		*new = true;
+	}
 
 	ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
 	mutex_unlock(&ei->truncate_mutex);
 got_it:
-	map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+	*bno = le32_to_cpu(chain[depth-1].key);
 	if (count > blocks_to_boundary)
-		set_buffer_boundary(bh_result);
+		*boundary = true;
 	err = count;
 	/* Clean up and exit */
 	partial = chain + depth - 1;	/* the whole chain */
@@ -765,16 +764,26 @@ cleanup:
 	return err;
 }
 
-int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
+int ext2_get_block(struct inode *inode, sector_t iblock,
+		struct buffer_head *bh_result, int create)
 {
 	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-	int ret = ext2_get_blocks(inode, iblock, max_blocks,
-			      bh_result, create);
-	if (ret > 0) {
-		bh_result->b_size = (ret << inode->i_blkbits);
-		ret = 0;
-	}
-	return ret;
+	bool new = false, boundary = false;
+	u32 bno;
+	int ret;
+
+	ret = ext2_get_blocks(inode, iblock, max_blocks, &bno, &new, &boundary,
+			create);
+	if (ret <= 0)
+		return ret;
+
+	map_bh(bh_result, inode->i_sb, bno);
+	bh_result->b_size = (ret << inode->i_blkbits);
+	if (new)
+		set_buffer_new(bh_result);
+	if (boundary)
+		set_buffer_boundary(bh_result);
+	return 0;
 
 }
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 11/12] ext2: stop passing buffer_head to ext2_get_blocks
@ 2016-09-16 11:27     ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/ext2/inode.c | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index d5c7d09..2a69ab2 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -618,7 +618,7 @@ static void ext2_splice_branch(struct inode *inode,
  */
 static int ext2_get_blocks(struct inode *inode,
 			   sector_t iblock, unsigned long maxblocks,
-			   struct buffer_head *bh_result,
+			   u32 *bno, bool *new, bool *boundary,
 			   int create)
 {
 	int err = -EIO;
@@ -644,7 +644,6 @@ static int ext2_get_blocks(struct inode *inode,
 	/* Simplest case - block found, no allocation needed */
 	if (!partial) {
 		first_block = le32_to_cpu(chain[depth - 1].key);
-		clear_buffer_new(bh_result); /* What's this do? */
 		count++;
 		/*map more blocks*/
 		while (count < maxblocks && count <= blocks_to_boundary) {
@@ -699,7 +698,6 @@ static int ext2_get_blocks(struct inode *inode,
 			mutex_unlock(&ei->truncate_mutex);
 			if (err)
 				goto cleanup;
-			clear_buffer_new(bh_result);
 			goto got_it;
 		}
 	}
@@ -745,15 +743,16 @@ static int ext2_get_blocks(struct inode *inode,
 			mutex_unlock(&ei->truncate_mutex);
 			goto cleanup;
 		}
-	} else
-		set_buffer_new(bh_result);
+	} else {
+		*new = true;
+	}
 
 	ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
 	mutex_unlock(&ei->truncate_mutex);
 got_it:
-	map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+	*bno = le32_to_cpu(chain[depth-1].key);
 	if (count > blocks_to_boundary)
-		set_buffer_boundary(bh_result);
+		*boundary = true;
 	err = count;
 	/* Clean up and exit */
 	partial = chain + depth - 1;	/* the whole chain */
@@ -765,16 +764,26 @@ cleanup:
 	return err;
 }
 
-int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
+int ext2_get_block(struct inode *inode, sector_t iblock,
+		struct buffer_head *bh_result, int create)
 {
 	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-	int ret = ext2_get_blocks(inode, iblock, max_blocks,
-			      bh_result, create);
-	if (ret > 0) {
-		bh_result->b_size = (ret << inode->i_blkbits);
-		ret = 0;
-	}
-	return ret;
+	bool new = false, boundary = false;
+	u32 bno;
+	int ret;
+
+	ret = ext2_get_blocks(inode, iblock, max_blocks, &bno, &new, &boundary,
+			create);
+	if (ret <= 0)
+		return ret;
+
+	map_bh(bh_result, inode->i_sb, bno);
+	bh_result->b_size = (ret << inode->i_blkbits);
+	if (new)
+		set_buffer_new(bh_result);
+	if (boundary)
+		set_buffer_boundary(bh_result);
+	return 0;
 
 }
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 12/12] ext2: use iomap to implement DAX
  2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
@ 2016-09-16 11:27     ` Christoph Hellwig
  2016-09-16 11:27 ` [PATCH 05/12] dax: provide an iomap based dax read/write path Christoph Hellwig
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
 fs/ext2/Kconfig |  1 +
 fs/ext2/ext2.h  |  1 +
 fs/ext2/file.c  | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++------
 fs/ext2/inode.c | 63 +++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 129 insertions(+), 12 deletions(-)

diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
index c634874e..36bea5a 100644
--- a/fs/ext2/Kconfig
+++ b/fs/ext2/Kconfig
@@ -1,5 +1,6 @@
 config EXT2_FS
 	tristate "Second extended fs support"
+	select FS_IOMAP if FS_DAX
 	help
 	  Ext2 is a standard Linux file system for hard disks.
 
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 06af2f9..37e2be7 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -814,6 +814,7 @@ extern const struct file_operations ext2_file_operations;
 /* inode.c */
 extern const struct address_space_operations ext2_aops;
 extern const struct address_space_operations ext2_nobh_aops;
+extern struct iomap_ops ext2_iomap_ops;
 
 /* namei.c */
 extern const struct inode_operations ext2_dir_inode_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 5efeefe..423cc01 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -22,11 +22,59 @@
 #include <linux/pagemap.h>
 #include <linux/dax.h>
 #include <linux/quotaops.h>
+#include <linux/iomap.h>
+#include <linux/uio.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
 
 #ifdef CONFIG_FS_DAX
+static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
+	ssize_t ret;
+
+	if (!iov_iter_count(to))
+		return 0; /* skip atime */
+
+	inode_lock_shared(inode);
+	ret = iomap_dax_rw(iocb, to, &ext2_iomap_ops);
+	inode_unlock_shared(inode);
+
+	file_accessed(iocb->ki_filp);
+	return ret;
+}
+
+static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	ssize_t ret;
+
+	inode_lock(inode);
+	ret = generic_write_checks(iocb, from);
+	if (ret <= 0)
+		goto out_unlock;
+	ret = file_remove_privs(file);
+	if (ret)
+		goto out_unlock;
+	ret = file_update_time(file);
+	if (ret)
+		goto out_unlock;
+
+	ret = iomap_dax_rw(iocb, from, &ext2_iomap_ops);
+	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
+		i_size_write(inode, iocb->ki_pos);
+		mark_inode_dirty(inode);
+	}
+
+out_unlock:
+	inode_unlock(inode);
+	if (ret > 0)
+		ret = generic_write_sync(iocb, ret);
+	return ret;
+}
+
 /*
  * The lock ordering for ext2 DAX fault paths is:
  *
@@ -51,7 +99,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 	down_read(&ei->dax_sem);
 
-	ret = dax_fault(vma, vmf, ext2_get_block);
+	ret = iomap_dax_fault(vma, vmf, &ext2_iomap_ops);
 
 	up_read(&ei->dax_sem);
 	if (vmf->flags & FAULT_FLAG_WRITE)
@@ -156,14 +204,28 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	return ret;
 }
 
-/*
- * We have mostly NULL's here: the current defaults are ok for
- * the ext2 filesystem.
- */
+static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+#ifdef CONFIG_FS_DAX
+	if (IS_DAX(iocb->ki_filp->f_mapping->host))
+		return ext2_dax_read_iter(iocb, to);
+#endif
+	return generic_file_read_iter(iocb, to);
+}
+
+static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+#ifdef CONFIG_FS_DAX
+	if (IS_DAX(iocb->ki_filp->f_mapping->host))
+		return ext2_dax_write_iter(iocb, from);
+#endif
+	return generic_file_write_iter(iocb, from);
+}
+
 const struct file_operations ext2_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read_iter	= generic_file_read_iter,
-	.write_iter	= generic_file_write_iter,
+	.read_iter	= ext2_file_read_iter,
+	.write_iter	= ext2_file_write_iter,
 	.unlocked_ioctl = ext2_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext2_compat_ioctl,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 2a69ab2..aae5f61 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -32,6 +32,7 @@
 #include <linux/buffer_head.h>
 #include <linux/mpage.h>
 #include <linux/fiemap.h>
+#include <linux/iomap.h>
 #include <linux/namei.h>
 #include <linux/uio.h>
 #include "ext2.h"
@@ -787,6 +788,59 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
 
 }
 
+#ifdef CONFIG_FS_DAX
+static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+		unsigned flags, struct iomap *iomap)
+{
+	unsigned int blkbits = inode->i_blkbits;
+	unsigned long first_block = offset >> blkbits;
+	unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits;
+	bool new = false, boundary = false;
+	u32 bno;
+	int ret;
+
+	ret = ext2_get_blocks(inode, first_block, max_blocks,
+			&bno, &new, &boundary, flags & IOMAP_WRITE);
+	if (ret < 0)
+		return ret;
+
+	iomap->flags = 0;
+	iomap->bdev = inode->i_sb->s_bdev;
+	iomap->offset = first_block << blkbits;
+
+	if (ret == 0) {
+		iomap->type = IOMAP_HOLE;
+		iomap->blkno = IOMAP_NULL_BLOCK;
+		iomap->length = 1 << blkbits;
+	} else {
+		iomap->type = IOMAP_MAPPED;
+		iomap->blkno = (sector_t)bno << (blkbits - 9);
+		iomap->length = (u64)ret << blkbits;
+		iomap->flags |= IOMAP_F_MERGED;
+	}
+
+	if (new)
+		iomap->flags |= IOMAP_F_NEW;
+	return 0;
+}
+
+static int
+ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+		ssize_t written, unsigned flags, struct iomap *iomap)
+{
+	if (iomap->type == IOMAP_MAPPED &&
+	    written < length &&
+	    (flags & IOMAP_WRITE))
+		ext2_write_failed(inode->i_mapping, offset + length);
+	return 0;
+}
+
+struct iomap_ops ext2_iomap_ops = {
+	.iomap_begin		= ext2_iomap_begin,
+	.iomap_end		= ext2_iomap_end,
+};
+#endif /* CONFIG_FS_DAX */
+
 int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		u64 start, u64 len)
 {
@@ -872,11 +926,10 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	loff_t offset = iocb->ki_pos;
 	ssize_t ret;
 
-	if (IS_DAX(inode))
-		ret = dax_do_io(iocb, inode, iter, ext2_get_block, NULL,
-				DIO_LOCKING);
-	else
-		ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
+	if (WARN_ON_ONCE(IS_DAX(inode)))
+		return -EIO;
+
+	ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
 	if (ret < 0 && iov_iter_rw(iter) == WRITE)
 		ext2_write_failed(mapping, offset + count);
 	return ret;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 12/12] ext2: use iomap to implement DAX
@ 2016-09-16 11:27     ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:27 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/ext2/Kconfig |  1 +
 fs/ext2/ext2.h  |  1 +
 fs/ext2/file.c  | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++------
 fs/ext2/inode.c | 63 +++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 129 insertions(+), 12 deletions(-)

diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
index c634874e..36bea5a 100644
--- a/fs/ext2/Kconfig
+++ b/fs/ext2/Kconfig
@@ -1,5 +1,6 @@
 config EXT2_FS
 	tristate "Second extended fs support"
+	select FS_IOMAP if FS_DAX
 	help
 	  Ext2 is a standard Linux file system for hard disks.
 
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 06af2f9..37e2be7 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -814,6 +814,7 @@ extern const struct file_operations ext2_file_operations;
 /* inode.c */
 extern const struct address_space_operations ext2_aops;
 extern const struct address_space_operations ext2_nobh_aops;
+extern struct iomap_ops ext2_iomap_ops;
 
 /* namei.c */
 extern const struct inode_operations ext2_dir_inode_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 5efeefe..423cc01 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -22,11 +22,59 @@
 #include <linux/pagemap.h>
 #include <linux/dax.h>
 #include <linux/quotaops.h>
+#include <linux/iomap.h>
+#include <linux/uio.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
 
 #ifdef CONFIG_FS_DAX
+static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
+	ssize_t ret;
+
+	if (!iov_iter_count(to))
+		return 0; /* skip atime */
+
+	inode_lock_shared(inode);
+	ret = iomap_dax_rw(iocb, to, &ext2_iomap_ops);
+	inode_unlock_shared(inode);
+
+	file_accessed(iocb->ki_filp);
+	return ret;
+}
+
+static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	ssize_t ret;
+
+	inode_lock(inode);
+	ret = generic_write_checks(iocb, from);
+	if (ret <= 0)
+		goto out_unlock;
+	ret = file_remove_privs(file);
+	if (ret)
+		goto out_unlock;
+	ret = file_update_time(file);
+	if (ret)
+		goto out_unlock;
+
+	ret = iomap_dax_rw(iocb, from, &ext2_iomap_ops);
+	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
+		i_size_write(inode, iocb->ki_pos);
+		mark_inode_dirty(inode);
+	}
+
+out_unlock:
+	inode_unlock(inode);
+	if (ret > 0)
+		ret = generic_write_sync(iocb, ret);
+	return ret;
+}
+
 /*
  * The lock ordering for ext2 DAX fault paths is:
  *
@@ -51,7 +99,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 	down_read(&ei->dax_sem);
 
-	ret = dax_fault(vma, vmf, ext2_get_block);
+	ret = iomap_dax_fault(vma, vmf, &ext2_iomap_ops);
 
 	up_read(&ei->dax_sem);
 	if (vmf->flags & FAULT_FLAG_WRITE)
@@ -156,14 +204,28 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	return ret;
 }
 
-/*
- * We have mostly NULL's here: the current defaults are ok for
- * the ext2 filesystem.
- */
+static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+#ifdef CONFIG_FS_DAX
+	if (IS_DAX(iocb->ki_filp->f_mapping->host))
+		return ext2_dax_read_iter(iocb, to);
+#endif
+	return generic_file_read_iter(iocb, to);
+}
+
+static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+#ifdef CONFIG_FS_DAX
+	if (IS_DAX(iocb->ki_filp->f_mapping->host))
+		return ext2_dax_write_iter(iocb, from);
+#endif
+	return generic_file_write_iter(iocb, from);
+}
+
 const struct file_operations ext2_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read_iter	= generic_file_read_iter,
-	.write_iter	= generic_file_write_iter,
+	.read_iter	= ext2_file_read_iter,
+	.write_iter	= ext2_file_write_iter,
 	.unlocked_ioctl = ext2_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext2_compat_ioctl,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 2a69ab2..aae5f61 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -32,6 +32,7 @@
 #include <linux/buffer_head.h>
 #include <linux/mpage.h>
 #include <linux/fiemap.h>
+#include <linux/iomap.h>
 #include <linux/namei.h>
 #include <linux/uio.h>
 #include "ext2.h"
@@ -787,6 +788,59 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
 
 }
 
+#ifdef CONFIG_FS_DAX
+static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+		unsigned flags, struct iomap *iomap)
+{
+	unsigned int blkbits = inode->i_blkbits;
+	unsigned long first_block = offset >> blkbits;
+	unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits;
+	bool new = false, boundary = false;
+	u32 bno;
+	int ret;
+
+	ret = ext2_get_blocks(inode, first_block, max_blocks,
+			&bno, &new, &boundary, flags & IOMAP_WRITE);
+	if (ret < 0)
+		return ret;
+
+	iomap->flags = 0;
+	iomap->bdev = inode->i_sb->s_bdev;
+	iomap->offset = first_block << blkbits;
+
+	if (ret == 0) {
+		iomap->type = IOMAP_HOLE;
+		iomap->blkno = IOMAP_NULL_BLOCK;
+		iomap->length = 1 << blkbits;
+	} else {
+		iomap->type = IOMAP_MAPPED;
+		iomap->blkno = (sector_t)bno << (blkbits - 9);
+		iomap->length = (u64)ret << blkbits;
+		iomap->flags |= IOMAP_F_MERGED;
+	}
+
+	if (new)
+		iomap->flags |= IOMAP_F_NEW;
+	return 0;
+}
+
+static int
+ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+		ssize_t written, unsigned flags, struct iomap *iomap)
+{
+	if (iomap->type == IOMAP_MAPPED &&
+	    written < length &&
+	    (flags & IOMAP_WRITE))
+		ext2_write_failed(inode->i_mapping, offset + length);
+	return 0;
+}
+
+struct iomap_ops ext2_iomap_ops = {
+	.iomap_begin		= ext2_iomap_begin,
+	.iomap_end		= ext2_iomap_end,
+};
+#endif /* CONFIG_FS_DAX */
+
 int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		u64 start, u64 len)
 {
@@ -872,11 +926,10 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	loff_t offset = iocb->ki_pos;
 	ssize_t ret;
 
-	if (IS_DAX(inode))
-		ret = dax_do_io(iocb, inode, iter, ext2_get_block, NULL,
-				DIO_LOCKING);
-	else
-		ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
+	if (WARN_ON_ONCE(IS_DAX(inode)))
+		return -EIO;
+
+	ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
 	if (ret < 0 && iov_iter_rw(iter) == WRITE)
 		ext2_write_failed(mapping, offset + count);
 	return ret;
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH 12/12] ext2: use iomap to implement DAX
  2016-09-16 11:27     ` Christoph Hellwig
@ 2016-09-26 14:49         ` Jan Kara
  -1 siblings, 0 replies; 27+ messages in thread
From: Jan Kara @ 2016-09-26 14:49 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

When looking through this patch I've noticed one bug:

On Fri 16-09-16 13:27:14, Christoph Hellwig wrote:
> +static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> +		unsigned flags, struct iomap *iomap)
> +{
...
> +	iomap->offset = first_block << blkbits;

Here is a missing cast to u64. Otherwise on 32-bit arch the shift could
overflow. I guess DAX is not expected to be used on 32-bit archs but
still... 

								Honza
-- 
Jan Kara <jack-IBi9RG/b67k@public.gmane.org>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 12/12] ext2: use iomap to implement DAX
@ 2016-09-26 14:49         ` Jan Kara
  0 siblings, 0 replies; 27+ messages in thread
From: Jan Kara @ 2016-09-26 14:49 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm, ross.zwisler

When looking through this patch I've noticed one bug:

On Fri 16-09-16 13:27:14, Christoph Hellwig wrote:
> +static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> +		unsigned flags, struct iomap *iomap)
> +{
...
> +	iomap->offset = first_block << blkbits;

Here is a missing cast to u64. Otherwise on 32-bit arch the shift could
overflow. I guess DAX is not expected to be used on 32-bit archs but
still... 

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 11/12] ext2: stop passing buffer_head to ext2_get_blocks
  2016-09-14 10:01 ` [PATCH 11/12] ext2: stop passing buffer_head to ext2_get_blocks Christoph Hellwig
@ 2016-09-14 22:42       ` Ross Zwisler
  0 siblings, 0 replies; 27+ messages in thread
From: Ross Zwisler @ 2016-09-14 22:42 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w

On Wed, Sep 14, 2016 at 12:01:30PM +0200, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>

Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 11/12] ext2: stop passing buffer_head to ext2_get_blocks
@ 2016-09-14 22:42       ` Ross Zwisler
  0 siblings, 0 replies; 27+ messages in thread
From: Ross Zwisler @ 2016-09-14 22:42 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm, ross.zwisler

On Wed, Sep 14, 2016 at 12:01:30PM +0200, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 11/12] ext2: stop passing buffer_head to ext2_get_blocks
  2016-09-14 10:01 iomap based DAX path V2 Christoph Hellwig
@ 2016-09-14 10:01 ` Christoph Hellwig
       [not found]   ` <1473847291-18913-12-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-14 10:01 UTC (permalink / raw)
  To: linux-xfs, linux-fsdevel, linux-nvdimm; +Cc: ross.zwisler

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/ext2/inode.c | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index d5c7d09..2a69ab2 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -618,7 +618,7 @@ static void ext2_splice_branch(struct inode *inode,
  */
 static int ext2_get_blocks(struct inode *inode,
 			   sector_t iblock, unsigned long maxblocks,
-			   struct buffer_head *bh_result,
+			   u32 *bno, bool *new, bool *boundary,
 			   int create)
 {
 	int err = -EIO;
@@ -644,7 +644,6 @@ static int ext2_get_blocks(struct inode *inode,
 	/* Simplest case - block found, no allocation needed */
 	if (!partial) {
 		first_block = le32_to_cpu(chain[depth - 1].key);
-		clear_buffer_new(bh_result); /* What's this do? */
 		count++;
 		/*map more blocks*/
 		while (count < maxblocks && count <= blocks_to_boundary) {
@@ -699,7 +698,6 @@ static int ext2_get_blocks(struct inode *inode,
 			mutex_unlock(&ei->truncate_mutex);
 			if (err)
 				goto cleanup;
-			clear_buffer_new(bh_result);
 			goto got_it;
 		}
 	}
@@ -745,15 +743,16 @@ static int ext2_get_blocks(struct inode *inode,
 			mutex_unlock(&ei->truncate_mutex);
 			goto cleanup;
 		}
-	} else
-		set_buffer_new(bh_result);
+	} else {
+		*new = true;
+	}
 
 	ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
 	mutex_unlock(&ei->truncate_mutex);
 got_it:
-	map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+	*bno = le32_to_cpu(chain[depth-1].key);
 	if (count > blocks_to_boundary)
-		set_buffer_boundary(bh_result);
+		*boundary = true;
 	err = count;
 	/* Clean up and exit */
 	partial = chain + depth - 1;	/* the whole chain */
@@ -765,16 +764,26 @@ cleanup:
 	return err;
 }
 
-int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
+int ext2_get_block(struct inode *inode, sector_t iblock,
+		struct buffer_head *bh_result, int create)
 {
 	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-	int ret = ext2_get_blocks(inode, iblock, max_blocks,
-			      bh_result, create);
-	if (ret > 0) {
-		bh_result->b_size = (ret << inode->i_blkbits);
-		ret = 0;
-	}
-	return ret;
+	bool new = false, boundary = false;
+	u32 bno;
+	int ret;
+
+	ret = ext2_get_blocks(inode, iblock, max_blocks, &bno, &new, &boundary,
+			create);
+	if (ret <= 0)
+		return ret;
+
+	map_bh(bh_result, inode->i_sb, bno);
+	bh_result->b_size = (ret << inode->i_blkbits);
+	if (new)
+		set_buffer_new(bh_result);
+	if (boundary)
+		set_buffer_boundary(bh_result);
+	return 0;
 
 }
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2016-09-26 14:49 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
     [not found] ` <1474025234-13804-1-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-16 11:27   ` [PATCH 01/12] iomap: add IOMAP_F_NEW flag Christoph Hellwig
2016-09-16 11:27     ` Christoph Hellwig
2016-09-16 11:27   ` [PATCH 02/12] iomap: expose iomap_apply outside iomap.c Christoph Hellwig
2016-09-16 11:27     ` Christoph Hellwig
2016-09-16 11:27   ` [PATCH 03/12] dax: don't pass buffer_head to dax_insert_mapping Christoph Hellwig
2016-09-16 11:27     ` Christoph Hellwig
2016-09-16 11:27   ` [PATCH 04/12] dax: don't pass buffer_head to copy_user_dax Christoph Hellwig
2016-09-16 11:27     ` Christoph Hellwig
2016-09-16 11:27   ` [PATCH 06/12] dax: provide an iomap based fault handler Christoph Hellwig
2016-09-16 11:27     ` Christoph Hellwig
2016-09-16 11:27   ` [PATCH 07/12] xfs: fix locking for DAX writes Christoph Hellwig
2016-09-16 11:27     ` Christoph Hellwig
2016-09-16 11:27   ` [PATCH 10/12] xfs: use iomap to implement DAX Christoph Hellwig
2016-09-16 11:27     ` Christoph Hellwig
2016-09-16 11:27   ` [PATCH 11/12] ext2: stop passing buffer_head to ext2_get_blocks Christoph Hellwig
2016-09-16 11:27     ` Christoph Hellwig
2016-09-16 11:27   ` [PATCH 12/12] ext2: use iomap to implement DAX Christoph Hellwig
2016-09-16 11:27     ` Christoph Hellwig
     [not found]     ` <1474025234-13804-13-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-26 14:49       ` Jan Kara
2016-09-26 14:49         ` Jan Kara
2016-09-16 11:27 ` [PATCH 05/12] dax: provide an iomap based dax read/write path Christoph Hellwig
2016-09-16 11:27 ` [PATCH 08/12] xfs: take the ilock shared if possible in xfs_file_iomap_begin Christoph Hellwig
2016-09-16 11:27 ` [PATCH 09/12] xfs: refactor xfs_setfilesize Christoph Hellwig
  -- strict thread matches above, loose matches on Subject: below --
2016-09-14 10:01 iomap based DAX path V2 Christoph Hellwig
2016-09-14 10:01 ` [PATCH 11/12] ext2: stop passing buffer_head to ext2_get_blocks Christoph Hellwig
     [not found]   ` <1473847291-18913-12-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2016-09-14 22:42     ` Ross Zwisler
2016-09-14 22:42       ` Ross Zwisler

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.