* [PATCH 01/10] iomap: add IOMAP_F_NEW flag
2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34 ` Christoph Hellwig
2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-y27Ovi1pjclAfugRpC6u6w
Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
fs/xfs/xfs_iomap.c | 2 ++
include/linux/iomap.h | 1 +
2 files changed, 3 insertions(+)
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f96c8ff..5d06a2d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -635,6 +635,7 @@ retry:
if (end_fsb != orig_end_fsb)
xfs_inode_set_eofblocks_tag(ip);
+ iomap->flags = IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
done:
if (isnullstartblock(got.br_startblock))
@@ -1000,6 +1001,7 @@ xfs_file_iomap_begin(
if (error)
return error;
+ iomap->flags = IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
} else {
ASSERT(nimaps);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3d70ece..14d7067 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -22,6 +22,7 @@ struct vm_fault;
* Flags for iomap mappings:
*/
#define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */
+#define IOMAP_F_NEW 0x02 /* blocks have been newly allocated */
/*
* Magic value for blkno:
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH 01/10] iomap: add IOMAP_F_NEW flag
@ 2016-09-09 16:34 ` Christoph Hellwig
0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel, linux-nvdimm
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_iomap.c | 2 ++
include/linux/iomap.h | 1 +
2 files changed, 3 insertions(+)
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f96c8ff..5d06a2d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -635,6 +635,7 @@ retry:
if (end_fsb != orig_end_fsb)
xfs_inode_set_eofblocks_tag(ip);
+ iomap->flags = IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
done:
if (isnullstartblock(got.br_startblock))
@@ -1000,6 +1001,7 @@ xfs_file_iomap_begin(
if (error)
return error;
+ iomap->flags = IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
} else {
ASSERT(nimaps);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3d70ece..14d7067 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -22,6 +22,7 @@ struct vm_fault;
* Flags for iomap mappings:
*/
#define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */
+#define IOMAP_F_NEW 0x02 /* blocks have been newly allocated */
/*
* Magic value for blkno:
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread
[parent not found: <1473438884-674-2-git-send-email-hch-jcswGhMUV9g@public.gmane.org>]
* Re: [PATCH 01/10] iomap: add IOMAP_F_NEW flag
2016-09-09 16:34 ` Christoph Hellwig
@ 2016-09-13 22:43 ` Ross Zwisler
-1 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 22:43 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-y27Ovi1pjclAfugRpC6u6w
On Fri, Sep 09, 2016 at 06:34:35PM +0200, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Though I guess I've never seen a patch with a completely empty changelog
before? :) Maybe it's okay for really easy patches? I just thought I'd get
shouted down for doing it.
> ---
> fs/xfs/xfs_iomap.c | 2 ++
> include/linux/iomap.h | 1 +
> 2 files changed, 3 insertions(+)
>
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index f96c8ff..5d06a2d 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -635,6 +635,7 @@ retry:
> if (end_fsb != orig_end_fsb)
> xfs_inode_set_eofblocks_tag(ip);
>
> + iomap->flags = IOMAP_F_NEW;
> trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
> done:
> if (isnullstartblock(got.br_startblock))
> @@ -1000,6 +1001,7 @@ xfs_file_iomap_begin(
> if (error)
> return error;
>
> + iomap->flags = IOMAP_F_NEW;
> trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
> } else {
> ASSERT(nimaps);
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 3d70ece..14d7067 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -22,6 +22,7 @@ struct vm_fault;
> * Flags for iomap mappings:
> */
> #define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */
> +#define IOMAP_F_NEW 0x02 /* blocks have been newly allocated */
>
> /*
> * Magic value for blkno:
> --
> 2.1.4
>
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
^ permalink raw reply [flat|nested] 58+ messages in thread
* Re: [PATCH 01/10] iomap: add IOMAP_F_NEW flag
@ 2016-09-13 22:43 ` Ross Zwisler
0 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 22:43 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm
On Fri, Sep 09, 2016 at 06:34:35PM +0200, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Though I guess I've never seen a patch with a completely empty changelog
before? :) Maybe it's okay for really easy patches? I just thought I'd get
shouted down for doing it.
> ---
> fs/xfs/xfs_iomap.c | 2 ++
> include/linux/iomap.h | 1 +
> 2 files changed, 3 insertions(+)
>
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index f96c8ff..5d06a2d 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -635,6 +635,7 @@ retry:
> if (end_fsb != orig_end_fsb)
> xfs_inode_set_eofblocks_tag(ip);
>
> + iomap->flags = IOMAP_F_NEW;
> trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
> done:
> if (isnullstartblock(got.br_startblock))
> @@ -1000,6 +1001,7 @@ xfs_file_iomap_begin(
> if (error)
> return error;
>
> + iomap->flags = IOMAP_F_NEW;
> trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
> } else {
> ASSERT(nimaps);
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 3d70ece..14d7067 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -22,6 +22,7 @@ struct vm_fault;
> * Flags for iomap mappings:
> */
> #define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */
> +#define IOMAP_F_NEW 0x02 /* blocks have been newly allocated */
>
> /*
> * Magic value for blkno:
> --
> 2.1.4
>
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
^ permalink raw reply [flat|nested] 58+ messages in thread
* Re: [PATCH 01/10] iomap: add IOMAP_F_NEW flag
2016-09-13 22:43 ` Ross Zwisler
(?)
@ 2016-09-14 7:08 ` Christoph Hellwig
-1 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-14 7:08 UTC (permalink / raw)
To: Ross Zwisler, Christoph Hellwig, linux-xfs, linux-fsdevel, linux-nvdimm
On Tue, Sep 13, 2016 at 04:43:57PM -0600, Ross Zwisler wrote:
> On Fri, Sep 09, 2016 at 06:34:35PM +0200, Christoph Hellwig wrote:
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
>
> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
>
> Though I guess I've never seen a patch with a completely empty changelog
> before? :) Maybe it's okay for really easy patches? I just thought I'd get
> shouted down for doing it.
I think it's ok and others do it a lot too (e.g. Al Viro). But I've been
shouted at for it from others nevertheless..
^ permalink raw reply [flat|nested] 58+ messages in thread
* [PATCH 02/10] iomap: expose iomap_apply outside iomap.c
2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34 ` Christoph Hellwig
2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-y27Ovi1pjclAfugRpC6u6w
This allows the DAX code to use it.
Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
fs/internal.h | 11 +++++++++++
fs/iomap.c | 5 +----
2 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/fs/internal.h b/fs/internal.h
index ba07376..8591786 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -12,6 +12,7 @@
struct super_block;
struct file_system_type;
struct iomap;
+struct iomap_ops;
struct linux_binprm;
struct path;
struct mount;
@@ -164,3 +165,13 @@ extern struct dentry_operations ns_dentry_operations;
extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
unsigned long arg);
extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
+/*
+ * iomap support:
+ */
+typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
+ void *data, struct iomap *iomap);
+
+loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
+ unsigned flags, struct iomap_ops *ops, void *data,
+ iomap_actor_t actor);
diff --git a/fs/iomap.c b/fs/iomap.c
index 706270f..f4df9c6 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -27,9 +27,6 @@
#include <linux/dax.h>
#include "internal.h"
-typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
- void *data, struct iomap *iomap);
-
/*
* Execute a iomap write on a segment of the mapping that spans a
* contiguous range of pages that have identical block mapping state.
@@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
* resources they require in the iomap_begin call, and release them in the
* iomap_end call.
*/
-static loff_t
+loff_t
iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
struct iomap_ops *ops, void *data, iomap_actor_t actor)
{
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH 02/10] iomap: expose iomap_apply outside iomap.c
@ 2016-09-09 16:34 ` Christoph Hellwig
0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel, linux-nvdimm
This allows the DAX code to use it.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/internal.h | 11 +++++++++++
fs/iomap.c | 5 +----
2 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/fs/internal.h b/fs/internal.h
index ba07376..8591786 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -12,6 +12,7 @@
struct super_block;
struct file_system_type;
struct iomap;
+struct iomap_ops;
struct linux_binprm;
struct path;
struct mount;
@@ -164,3 +165,13 @@ extern struct dentry_operations ns_dentry_operations;
extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
unsigned long arg);
extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
+/*
+ * iomap support:
+ */
+typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
+ void *data, struct iomap *iomap);
+
+loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
+ unsigned flags, struct iomap_ops *ops, void *data,
+ iomap_actor_t actor);
diff --git a/fs/iomap.c b/fs/iomap.c
index 706270f..f4df9c6 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -27,9 +27,6 @@
#include <linux/dax.h>
#include "internal.h"
-typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
- void *data, struct iomap *iomap);
-
/*
* Execute a iomap write on a segment of the mapping that spans a
* contiguous range of pages that have identical block mapping state.
@@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
* resources they require in the iomap_begin call, and release them in the
* iomap_end call.
*/
-static loff_t
+loff_t
iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
struct iomap_ops *ops, void *data, iomap_actor_t actor)
{
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread
[parent not found: <1473438884-674-3-git-send-email-hch-jcswGhMUV9g@public.gmane.org>]
* Re: [PATCH 02/10] iomap: expose iomap_apply outside iomap.c
2016-09-09 16:34 ` Christoph Hellwig
@ 2016-09-13 22:48 ` Ross Zwisler
-1 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 22:48 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-y27Ovi1pjclAfugRpC6u6w
On Fri, Sep 09, 2016 at 06:34:36PM +0200, Christoph Hellwig wrote:
> This allows the DAX code to use it.
>
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
> ---
> fs/internal.h | 11 +++++++++++
> fs/iomap.c | 5 +----
> 2 files changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/fs/internal.h b/fs/internal.h
> index ba07376..8591786 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -12,6 +12,7 @@
> struct super_block;
> struct file_system_type;
> struct iomap;
> +struct iomap_ops;
> struct linux_binprm;
> struct path;
> struct mount;
> @@ -164,3 +165,13 @@ extern struct dentry_operations ns_dentry_operations;
> extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
> unsigned long arg);
> extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
> +
> +/*
> + * iomap support:
> + */
> +typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
> + void *data, struct iomap *iomap);
> +
> +loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
> + unsigned flags, struct iomap_ops *ops, void *data,
> + iomap_actor_t actor);
> diff --git a/fs/iomap.c b/fs/iomap.c
> index 706270f..f4df9c6 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -27,9 +27,6 @@
> #include <linux/dax.h>
> #include "internal.h"
>
> -typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
> - void *data, struct iomap *iomap);
> -
> /*
> * Execute a iomap write on a segment of the mapping that spans a
> * contiguous range of pages that have identical block mapping state.
> @@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
> * resources they require in the iomap_begin call, and release them in the
> * iomap_end call.
> */
> -static loff_t
> +loff_t
> iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
> struct iomap_ops *ops, void *data, iomap_actor_t actor)
> {
> --
> 2.1.4
>
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
^ permalink raw reply [flat|nested] 58+ messages in thread
* Re: [PATCH 02/10] iomap: expose iomap_apply outside iomap.c
@ 2016-09-13 22:48 ` Ross Zwisler
0 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 22:48 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm
On Fri, Sep 09, 2016 at 06:34:36PM +0200, Christoph Hellwig wrote:
> This allows the DAX code to use it.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> ---
> fs/internal.h | 11 +++++++++++
> fs/iomap.c | 5 +----
> 2 files changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/fs/internal.h b/fs/internal.h
> index ba07376..8591786 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -12,6 +12,7 @@
> struct super_block;
> struct file_system_type;
> struct iomap;
> +struct iomap_ops;
> struct linux_binprm;
> struct path;
> struct mount;
> @@ -164,3 +165,13 @@ extern struct dentry_operations ns_dentry_operations;
> extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
> unsigned long arg);
> extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
> +
> +/*
> + * iomap support:
> + */
> +typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
> + void *data, struct iomap *iomap);
> +
> +loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
> + unsigned flags, struct iomap_ops *ops, void *data,
> + iomap_actor_t actor);
> diff --git a/fs/iomap.c b/fs/iomap.c
> index 706270f..f4df9c6 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -27,9 +27,6 @@
> #include <linux/dax.h>
> #include "internal.h"
>
> -typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
> - void *data, struct iomap *iomap);
> -
> /*
> * Execute a iomap write on a segment of the mapping that spans a
> * contiguous range of pages that have identical block mapping state.
> @@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
> * resources they require in the iomap_begin call, and release them in the
> * iomap_end call.
> */
> -static loff_t
> +loff_t
> iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
> struct iomap_ops *ops, void *data, iomap_actor_t actor)
> {
> --
> 2.1.4
>
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
^ permalink raw reply [flat|nested] 58+ messages in thread
* [PATCH 03/10] dax: don't pass buffer_head to dax_insert_mapping
2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34 ` Christoph Hellwig
2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-y27Ovi1pjclAfugRpC6u6w
This way we can use this helper for the iomap based DAX implementation
as well.
Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
fs/dax.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index 993dc6f..98463bb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -790,14 +790,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
static int dax_insert_mapping(struct address_space *mapping,
- struct buffer_head *bh, void **entryp,
- struct vm_area_struct *vma, struct vm_fault *vmf)
+ struct block_device *bdev, sector_t sector, size_t size,
+ void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf)
{
unsigned long vaddr = (unsigned long)vmf->virtual_address;
- struct block_device *bdev = bh->b_bdev;
struct blk_dax_ctl dax = {
- .sector = to_sector(bh, mapping->host),
- .size = bh->b_size,
+ .sector = sector,
+ .size = size,
};
void *ret;
void *entry = *entryp;
@@ -898,7 +897,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
/* Filesystem should not return unwritten buffers to us! */
WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
- error = dax_insert_mapping(mapping, &bh, &entry, vma, vmf);
+ error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
+ bh.b_size, &entry, vma, vmf);
unlock_entry:
put_locked_mapping_entry(mapping, vmf->pgoff, entry);
out:
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH 03/10] dax: don't pass buffer_head to dax_insert_mapping
@ 2016-09-09 16:34 ` Christoph Hellwig
0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel, linux-nvdimm
This way we can use this helper for the iomap based DAX implementation
as well.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/dax.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index 993dc6f..98463bb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -790,14 +790,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
static int dax_insert_mapping(struct address_space *mapping,
- struct buffer_head *bh, void **entryp,
- struct vm_area_struct *vma, struct vm_fault *vmf)
+ struct block_device *bdev, sector_t sector, size_t size,
+ void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf)
{
unsigned long vaddr = (unsigned long)vmf->virtual_address;
- struct block_device *bdev = bh->b_bdev;
struct blk_dax_ctl dax = {
- .sector = to_sector(bh, mapping->host),
- .size = bh->b_size,
+ .sector = sector,
+ .size = size,
};
void *ret;
void *entry = *entryp;
@@ -898,7 +897,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
/* Filesystem should not return unwritten buffers to us! */
WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
- error = dax_insert_mapping(mapping, &bh, &entry, vma, vmf);
+ error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
+ bh.b_size, &entry, vma, vmf);
unlock_entry:
put_locked_mapping_entry(mapping, vmf->pgoff, entry);
out:
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread
[parent not found: <1473438884-674-4-git-send-email-hch-jcswGhMUV9g@public.gmane.org>]
* Re: [PATCH 03/10] dax: don't pass buffer_head to dax_insert_mapping
2016-09-09 16:34 ` Christoph Hellwig
@ 2016-09-13 22:53 ` Ross Zwisler
-1 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 22:53 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-y27Ovi1pjclAfugRpC6u6w
On Fri, Sep 09, 2016 at 06:34:37PM +0200, Christoph Hellwig wrote:
> This way we can use this helper for the iomap based DAX implementation
> as well.
>
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
> ---
> fs/dax.c | 12 ++++++------
> 1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/fs/dax.c b/fs/dax.c
> index 993dc6f..98463bb 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -790,14 +790,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
> EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
>
> static int dax_insert_mapping(struct address_space *mapping,
> - struct buffer_head *bh, void **entryp,
> - struct vm_area_struct *vma, struct vm_fault *vmf)
> + struct block_device *bdev, sector_t sector, size_t size,
> + void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf)
> {
> unsigned long vaddr = (unsigned long)vmf->virtual_address;
> - struct block_device *bdev = bh->b_bdev;
> struct blk_dax_ctl dax = {
> - .sector = to_sector(bh, mapping->host),
> - .size = bh->b_size,
> + .sector = sector,
> + .size = size,
> };
> void *ret;
> void *entry = *entryp;
> @@ -898,7 +897,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
>
> /* Filesystem should not return unwritten buffers to us! */
> WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
> - error = dax_insert_mapping(mapping, &bh, &entry, vma, vmf);
> + error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
> + bh.b_size, &entry, vma, vmf);
> unlock_entry:
> put_locked_mapping_entry(mapping, vmf->pgoff, entry);
> out:
> --
> 2.1.4
>
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
^ permalink raw reply [flat|nested] 58+ messages in thread
* Re: [PATCH 03/10] dax: don't pass buffer_head to dax_insert_mapping
@ 2016-09-13 22:53 ` Ross Zwisler
0 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 22:53 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm
On Fri, Sep 09, 2016 at 06:34:37PM +0200, Christoph Hellwig wrote:
> This way we can use this helper for the iomap based DAX implementation
> as well.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> ---
> fs/dax.c | 12 ++++++------
> 1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/fs/dax.c b/fs/dax.c
> index 993dc6f..98463bb 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -790,14 +790,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
> EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
>
> static int dax_insert_mapping(struct address_space *mapping,
> - struct buffer_head *bh, void **entryp,
> - struct vm_area_struct *vma, struct vm_fault *vmf)
> + struct block_device *bdev, sector_t sector, size_t size,
> + void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf)
> {
> unsigned long vaddr = (unsigned long)vmf->virtual_address;
> - struct block_device *bdev = bh->b_bdev;
> struct blk_dax_ctl dax = {
> - .sector = to_sector(bh, mapping->host),
> - .size = bh->b_size,
> + .sector = sector,
> + .size = size,
> };
> void *ret;
> void *entry = *entryp;
> @@ -898,7 +897,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
>
> /* Filesystem should not return unwritten buffers to us! */
> WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
> - error = dax_insert_mapping(mapping, &bh, &entry, vma, vmf);
> + error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
> + bh.b_size, &entry, vma, vmf);
> unlock_entry:
> put_locked_mapping_entry(mapping, vmf->pgoff, entry);
> out:
> --
> 2.1.4
>
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
^ permalink raw reply [flat|nested] 58+ messages in thread
* [PATCH 04/10] dax: don't pass buffer_head to copy_user_dax
2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34 ` Christoph Hellwig
2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-y27Ovi1pjclAfugRpC6u6w
This way we can use this helper for the iomap based DAX implementation
as well.
Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
fs/dax.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index 98463bb..84343ce 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -580,14 +580,13 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
return VM_FAULT_LOCKED;
}
-static int copy_user_bh(struct page *to, struct inode *inode,
- struct buffer_head *bh, unsigned long vaddr)
+static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
+ struct page *to, unsigned long vaddr)
{
struct blk_dax_ctl dax = {
- .sector = to_sector(bh, inode),
- .size = bh->b_size,
+ .sector = sector,
+ .size = size,
};
- struct block_device *bdev = bh->b_bdev;
void *vto;
if (dax_map_atomic(bdev, &dax) < 0)
@@ -867,7 +866,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
if (vmf->cow_page) {
struct page *new_page = vmf->cow_page;
if (buffer_written(&bh))
- error = copy_user_bh(new_page, inode, &bh, vaddr);
+ error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode),
+ bh.b_size, new_page, vaddr);
else
clear_user_highpage(new_page, vaddr);
if (error)
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH 04/10] dax: don't pass buffer_head to copy_user_dax
@ 2016-09-09 16:34 ` Christoph Hellwig
0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel, linux-nvdimm
This way we can use this helper for the iomap based DAX implementation
as well.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/dax.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index 98463bb..84343ce 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -580,14 +580,13 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
return VM_FAULT_LOCKED;
}
-static int copy_user_bh(struct page *to, struct inode *inode,
- struct buffer_head *bh, unsigned long vaddr)
+static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
+ struct page *to, unsigned long vaddr)
{
struct blk_dax_ctl dax = {
- .sector = to_sector(bh, inode),
- .size = bh->b_size,
+ .sector = sector,
+ .size = size,
};
- struct block_device *bdev = bh->b_bdev;
void *vto;
if (dax_map_atomic(bdev, &dax) < 0)
@@ -867,7 +866,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
if (vmf->cow_page) {
struct page *new_page = vmf->cow_page;
if (buffer_written(&bh))
- error = copy_user_bh(new_page, inode, &bh, vaddr);
+ error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode),
+ bh.b_size, new_page, vaddr);
else
clear_user_highpage(new_page, vaddr);
if (error)
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread
* Re: [PATCH 04/10] dax: don't pass buffer_head to copy_user_dax
2016-09-09 16:34 ` Christoph Hellwig
(?)
@ 2016-09-13 22:54 ` Ross Zwisler
-1 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 22:54 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm
On Fri, Sep 09, 2016 at 06:34:38PM +0200, Christoph Hellwig wrote:
> This way we can use this helper for the iomap based DAX implementation
> as well.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> ---
> fs/dax.c | 12 ++++++------
> 1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/fs/dax.c b/fs/dax.c
> index 98463bb..84343ce 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -580,14 +580,13 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
> return VM_FAULT_LOCKED;
> }
>
> -static int copy_user_bh(struct page *to, struct inode *inode,
> - struct buffer_head *bh, unsigned long vaddr)
> +static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
> + struct page *to, unsigned long vaddr)
> {
> struct blk_dax_ctl dax = {
> - .sector = to_sector(bh, inode),
> - .size = bh->b_size,
> + .sector = sector,
> + .size = size,
> };
> - struct block_device *bdev = bh->b_bdev;
> void *vto;
>
> if (dax_map_atomic(bdev, &dax) < 0)
> @@ -867,7 +866,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> if (vmf->cow_page) {
> struct page *new_page = vmf->cow_page;
> if (buffer_written(&bh))
> - error = copy_user_bh(new_page, inode, &bh, vaddr);
> + error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode),
> + bh.b_size, new_page, vaddr);
> else
> clear_user_highpage(new_page, vaddr);
> if (error)
> --
> 2.1.4
>
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
^ permalink raw reply [flat|nested] 58+ messages in thread
* [PATCH 05/10] dax: provide an iomap based dax read/write path
2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34 ` Christoph Hellwig
2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-y27Ovi1pjclAfugRpC6u6w
This is a much simpler implementation of the DAX read/write path that makes
use of the iomap infrastructure. It does not try to mirror the direct I/O
calling conventions and thus doesn't have to deal with i_dio_count or the
end_io handler, but instead leaves locking and filesystem-specific I/O
completion to the caller.
Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
fs/dax.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/iomap.h | 2 +
2 files changed, 105 insertions(+)
diff --git a/fs/dax.c b/fs/dax.c
index 84343ce..57ad456 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -31,6 +31,8 @@
#include <linux/vmstat.h>
#include <linux/pfn_t.h>
#include <linux/sizes.h>
+#include <linux/iomap.h>
+#include "internal.h"
/*
* We use lowest available bit in exceptional entry for locking, other two
@@ -1241,3 +1243,104 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
return dax_zero_page_range(inode, from, length, get_block);
}
EXPORT_SYMBOL_GPL(dax_truncate_page);
+
+#ifdef CONFIG_FS_IOMAP
+static loff_t
+iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+ struct iomap *iomap)
+{
+ struct iov_iter *iter = data;
+ loff_t end = pos + length, done = 0;
+ ssize_t ret = 0;
+
+ if (iov_iter_rw(iter) == READ) {
+ end = min(end, i_size_read(inode));
+ if (pos >= end)
+ return 0;
+
+ if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
+ return iov_iter_zero(min(length, end - pos), iter);
+ }
+
+ if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
+ return -EIO;
+
+ while (pos < end) {
+ unsigned offset = pos & (PAGE_SIZE - 1);
+ struct blk_dax_ctl dax = { 0 };
+ ssize_t map_len;
+
+ dax.sector = iomap->blkno +
+ (((pos & PAGE_MASK) - iomap->offset) >> 9);
+ dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
+ map_len = dax_map_atomic(iomap->bdev, &dax);
+ if (map_len < 0) {
+ ret = map_len;
+ break;
+ }
+
+ dax.addr += offset;
+ map_len -= offset;
+ if (map_len > end - pos)
+ map_len = end - pos;
+
+ if (iov_iter_rw(iter) == WRITE)
+ map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
+ else
+ map_len = copy_to_iter(dax.addr, map_len, iter);
+ dax_unmap_atomic(iomap->bdev, &dax);
+ if (map_len <= 0) {
+ ret = map_len ? map_len : -EFAULT;
+ break;
+ }
+
+ pos += map_len;
+ length -= map_len;
+ done += map_len;
+ }
+
+ return done ? done : ret;
+}
+
+/**
+ * iomap_dax_rw - Perform I/O to a DAX file
+ * @iocb: The control block for this I/O
+ * @iter: The addresses to do I/O from or to
+ * @ops: iomap ops passed from the file system
+ *
+ * This funtions performs read and write operations to directly mapped
+ * persistent memory. The callers needs to take care of read/write exclusion
+ * and evicting any page cache pages in the region under I/O.
+ */
+ssize_t
+iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+ struct iomap_ops *ops)
+{
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+ loff_t pos = iocb->ki_pos, ret = 0, done = 0;
+ size_t count = iov_iter_count(iter);
+ unsigned flags = 0;
+
+ if (!count)
+ return 0;
+
+ if (iov_iter_rw(iter) == WRITE)
+ flags |= IOMAP_WRITE;
+
+ do {
+ ret = iomap_apply(inode, pos, count, flags, ops, iter,
+ iomap_dax_actor);
+ if (ret <= 0)
+ break;
+ pos += ret;
+ done += ret;
+ } while ((count = iov_iter_count(iter)));
+
+ if (!done)
+ return ret;
+
+ iocb->ki_pos += done;
+ return done;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_rw);
+#endif /* CONFIG_FS_IOMAP */
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 14d7067..3d5f785 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -65,6 +65,8 @@ struct iomap_ops {
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
struct iomap_ops *ops);
+ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+ struct iomap_ops *ops);
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
bool *did_zero, struct iomap_ops *ops);
int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH 05/10] dax: provide an iomap based dax read/write path
@ 2016-09-09 16:34 ` Christoph Hellwig
0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel, linux-nvdimm
This is a much simpler implementation of the DAX read/write path that makes
use of the iomap infrastructure. It does not try to mirror the direct I/O
calling conventions and thus doesn't have to deal with i_dio_count or the
end_io handler, but instead leaves locking and filesystem-specific I/O
completion to the caller.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/dax.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/iomap.h | 2 +
2 files changed, 105 insertions(+)
diff --git a/fs/dax.c b/fs/dax.c
index 84343ce..57ad456 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -31,6 +31,8 @@
#include <linux/vmstat.h>
#include <linux/pfn_t.h>
#include <linux/sizes.h>
+#include <linux/iomap.h>
+#include "internal.h"
/*
* We use lowest available bit in exceptional entry for locking, other two
@@ -1241,3 +1243,104 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
return dax_zero_page_range(inode, from, length, get_block);
}
EXPORT_SYMBOL_GPL(dax_truncate_page);
+
+#ifdef CONFIG_FS_IOMAP
+static loff_t
+iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+ struct iomap *iomap)
+{
+ struct iov_iter *iter = data;
+ loff_t end = pos + length, done = 0;
+ ssize_t ret = 0;
+
+ if (iov_iter_rw(iter) == READ) {
+ end = min(end, i_size_read(inode));
+ if (pos >= end)
+ return 0;
+
+ if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
+ return iov_iter_zero(min(length, end - pos), iter);
+ }
+
+ if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
+ return -EIO;
+
+ while (pos < end) {
+ unsigned offset = pos & (PAGE_SIZE - 1);
+ struct blk_dax_ctl dax = { 0 };
+ ssize_t map_len;
+
+ dax.sector = iomap->blkno +
+ (((pos & PAGE_MASK) - iomap->offset) >> 9);
+ dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
+ map_len = dax_map_atomic(iomap->bdev, &dax);
+ if (map_len < 0) {
+ ret = map_len;
+ break;
+ }
+
+ dax.addr += offset;
+ map_len -= offset;
+ if (map_len > end - pos)
+ map_len = end - pos;
+
+ if (iov_iter_rw(iter) == WRITE)
+ map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
+ else
+ map_len = copy_to_iter(dax.addr, map_len, iter);
+ dax_unmap_atomic(iomap->bdev, &dax);
+ if (map_len <= 0) {
+ ret = map_len ? map_len : -EFAULT;
+ break;
+ }
+
+ pos += map_len;
+ length -= map_len;
+ done += map_len;
+ }
+
+ return done ? done : ret;
+}
+
+/**
+ * iomap_dax_rw - Perform I/O to a DAX file
+ * @iocb: The control block for this I/O
+ * @iter: The addresses to do I/O from or to
+ * @ops: iomap ops passed from the file system
+ *
+ * This funtions performs read and write operations to directly mapped
+ * persistent memory. The callers needs to take care of read/write exclusion
+ * and evicting any page cache pages in the region under I/O.
+ */
+ssize_t
+iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+ struct iomap_ops *ops)
+{
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+ loff_t pos = iocb->ki_pos, ret = 0, done = 0;
+ size_t count = iov_iter_count(iter);
+ unsigned flags = 0;
+
+ if (!count)
+ return 0;
+
+ if (iov_iter_rw(iter) == WRITE)
+ flags |= IOMAP_WRITE;
+
+ do {
+ ret = iomap_apply(inode, pos, count, flags, ops, iter,
+ iomap_dax_actor);
+ if (ret <= 0)
+ break;
+ pos += ret;
+ done += ret;
+ } while ((count = iov_iter_count(iter)));
+
+ if (!done)
+ return ret;
+
+ iocb->ki_pos += done;
+ return done;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_rw);
+#endif /* CONFIG_FS_IOMAP */
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 14d7067..3d5f785 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -65,6 +65,8 @@ struct iomap_ops {
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
struct iomap_ops *ops);
+ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+ struct iomap_ops *ops);
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
bool *did_zero, struct iomap_ops *ops);
int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread
[parent not found: <1473438884-674-6-git-send-email-hch-jcswGhMUV9g@public.gmane.org>]
* Re: [PATCH 05/10] dax: provide an iomap based dax read/write path
2016-09-09 16:34 ` Christoph Hellwig
@ 2016-09-13 23:00 ` Ross Zwisler
-1 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 23:00 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-y27Ovi1pjclAfugRpC6u6w
On Fri, Sep 09, 2016 at 06:34:39PM +0200, Christoph Hellwig wrote:
> This is a much simpler implementation of the DAX read/write path that makes
> use of the iomap infrastructure. It does not try to mirror the direct I/O
> calling conventions and thus doesn't have to deal with i_dio_count or the
> end_io handler, but instead leaves locking and filesystem-specific I/O
> completion to the caller.
>
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
> ---
> fs/dax.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
> include/linux/iomap.h | 2 +
> 2 files changed, 105 insertions(+)
>
> diff --git a/fs/dax.c b/fs/dax.c
> index 84343ce..57ad456 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -31,6 +31,8 @@
> #include <linux/vmstat.h>
> #include <linux/pfn_t.h>
> #include <linux/sizes.h>
> +#include <linux/iomap.h>
> +#include "internal.h"
>
> /*
> * We use lowest available bit in exceptional entry for locking, other two
> @@ -1241,3 +1243,104 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
> return dax_zero_page_range(inode, from, length, get_block);
> }
> EXPORT_SYMBOL_GPL(dax_truncate_page);
> +
> +#ifdef CONFIG_FS_IOMAP
> +static loff_t
> +iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> + struct iomap *iomap)
> +{
> + struct iov_iter *iter = data;
> + loff_t end = pos + length, done = 0;
> + ssize_t ret = 0;
> +
> + if (iov_iter_rw(iter) == READ) {
> + end = min(end, i_size_read(inode));
> + if (pos >= end)
> + return 0;
> +
> + if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
> + return iov_iter_zero(min(length, end - pos), iter);
> + }
> +
> + if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
> + return -EIO;
> +
> + while (pos < end) {
> + unsigned offset = pos & (PAGE_SIZE - 1);
> + struct blk_dax_ctl dax = { 0 };
> + ssize_t map_len;
> +
> + dax.sector = iomap->blkno +
> + (((pos & PAGE_MASK) - iomap->offset) >> 9);
> + dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
> + map_len = dax_map_atomic(iomap->bdev, &dax);
> + if (map_len < 0) {
> + ret = map_len;
> + break;
> + }
> +
> + dax.addr += offset;
> + map_len -= offset;
> + if (map_len > end - pos)
> + map_len = end - pos;
> +
> + if (iov_iter_rw(iter) == WRITE)
> + map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
> + else
> + map_len = copy_to_iter(dax.addr, map_len, iter);
> + dax_unmap_atomic(iomap->bdev, &dax);
> + if (map_len <= 0) {
> + ret = map_len ? map_len : -EFAULT;
> + break;
> + }
> +
> + pos += map_len;
> + length -= map_len;
> + done += map_len;
> + }
> +
> + return done ? done : ret;
> +}
> +
> +/**
> + * iomap_dax_rw - Perform I/O to a DAX file
> + * @iocb: The control block for this I/O
> + * @iter: The addresses to do I/O from or to
> + * @ops: iomap ops passed from the file system
> + *
> + * This funtions performs read and write operations to directly mapped
function
> + * persistent memory. The callers needs to take care of read/write exclusion
> + * and evicting any page cache pages in the region under I/O.
> + */
> +ssize_t
> +iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
> + struct iomap_ops *ops)
> +{
> + struct inode *inode = iocb->ki_filp->f_mapping->host;
> + loff_t pos = iocb->ki_pos, ret = 0, done = 0;
Just a note that 'ret' is loff_t about half the time in the iomap code and
ssize_t the other half. I guess it doesn't really matter since they should
both be big unsigned values (64 bits on x96_64), but it's a bit inconsistent.
> + size_t count = iov_iter_count(iter);
> + unsigned flags = 0;
> +
> + if (!count)
> + return 0;
> +
> + if (iov_iter_rw(iter) == WRITE)
> + flags |= IOMAP_WRITE;
> +
> + do {
> + ret = iomap_apply(inode, pos, count, flags, ops, iter,
> + iomap_dax_actor);
> + if (ret <= 0)
> + break;
> + pos += ret;
> + done += ret;
> + } while ((count = iov_iter_count(iter)));
> +
> + if (!done)
> + return ret;
> +
> + iocb->ki_pos += done;
> + return done;
> +}
I think you can remove the special casing around 'done' and 'count' and make
this a bit simpler:
ssize_t
iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
struct iomap_ops *ops)
{
struct inode *inode = iocb->ki_filp->f_mapping->host;
loff_t pos = iocb->ki_pos, ret = 0, done = 0;
unsigned flags = 0;
size_t count;
if (iov_iter_rw(iter) == WRITE)
flags |= IOMAP_WRITE;
while ((count = iov_iter_count(iter))) {
ret = iomap_apply(inode, pos, count, flags, ops, iter,
iomap_dax_actor);
if (ret <= 0)
break;
pos += ret;
done += ret;
}
iocb->ki_pos += done;
return done ? done : ret;
}
This is now very similar to iomap_file_buffered_write().
> +EXPORT_SYMBOL_GPL(iomap_dax_rw);
> +#endif /* CONFIG_FS_IOMAP */
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 14d7067..3d5f785 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -65,6 +65,8 @@ struct iomap_ops {
>
> ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
> struct iomap_ops *ops);
> +ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
> + struct iomap_ops *ops);
> int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
> bool *did_zero, struct iomap_ops *ops);
> int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
> --
> 2.1.4
>
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
^ permalink raw reply [flat|nested] 58+ messages in thread
* Re: [PATCH 05/10] dax: provide an iomap based dax read/write path
@ 2016-09-13 23:00 ` Ross Zwisler
0 siblings, 0 replies; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 23:00 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm
On Fri, Sep 09, 2016 at 06:34:39PM +0200, Christoph Hellwig wrote:
> This is a much simpler implementation of the DAX read/write path that makes
> use of the iomap infrastructure. It does not try to mirror the direct I/O
> calling conventions and thus doesn't have to deal with i_dio_count or the
> end_io handler, but instead leaves locking and filesystem-specific I/O
> completion to the caller.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/dax.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
> include/linux/iomap.h | 2 +
> 2 files changed, 105 insertions(+)
>
> diff --git a/fs/dax.c b/fs/dax.c
> index 84343ce..57ad456 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -31,6 +31,8 @@
> #include <linux/vmstat.h>
> #include <linux/pfn_t.h>
> #include <linux/sizes.h>
> +#include <linux/iomap.h>
> +#include "internal.h"
>
> /*
> * We use lowest available bit in exceptional entry for locking, other two
> @@ -1241,3 +1243,104 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
> return dax_zero_page_range(inode, from, length, get_block);
> }
> EXPORT_SYMBOL_GPL(dax_truncate_page);
> +
> +#ifdef CONFIG_FS_IOMAP
> +static loff_t
> +iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> + struct iomap *iomap)
> +{
> + struct iov_iter *iter = data;
> + loff_t end = pos + length, done = 0;
> + ssize_t ret = 0;
> +
> + if (iov_iter_rw(iter) == READ) {
> + end = min(end, i_size_read(inode));
> + if (pos >= end)
> + return 0;
> +
> + if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
> + return iov_iter_zero(min(length, end - pos), iter);
> + }
> +
> + if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
> + return -EIO;
> +
> + while (pos < end) {
> + unsigned offset = pos & (PAGE_SIZE - 1);
> + struct blk_dax_ctl dax = { 0 };
> + ssize_t map_len;
> +
> + dax.sector = iomap->blkno +
> + (((pos & PAGE_MASK) - iomap->offset) >> 9);
> + dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
> + map_len = dax_map_atomic(iomap->bdev, &dax);
> + if (map_len < 0) {
> + ret = map_len;
> + break;
> + }
> +
> + dax.addr += offset;
> + map_len -= offset;
> + if (map_len > end - pos)
> + map_len = end - pos;
> +
> + if (iov_iter_rw(iter) == WRITE)
> + map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
> + else
> + map_len = copy_to_iter(dax.addr, map_len, iter);
> + dax_unmap_atomic(iomap->bdev, &dax);
> + if (map_len <= 0) {
> + ret = map_len ? map_len : -EFAULT;
> + break;
> + }
> +
> + pos += map_len;
> + length -= map_len;
> + done += map_len;
> + }
> +
> + return done ? done : ret;
> +}
> +
> +/**
> + * iomap_dax_rw - Perform I/O to a DAX file
> + * @iocb: The control block for this I/O
> + * @iter: The addresses to do I/O from or to
> + * @ops: iomap ops passed from the file system
> + *
> + * This funtions performs read and write operations to directly mapped
function
> + * persistent memory. The callers needs to take care of read/write exclusion
> + * and evicting any page cache pages in the region under I/O.
> + */
> +ssize_t
> +iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
> + struct iomap_ops *ops)
> +{
> + struct inode *inode = iocb->ki_filp->f_mapping->host;
> + loff_t pos = iocb->ki_pos, ret = 0, done = 0;
Just a note that 'ret' is loff_t about half the time in the iomap code and
ssize_t the other half. I guess it doesn't really matter since they should
both be big unsigned values (64 bits on x96_64), but it's a bit inconsistent.
> + size_t count = iov_iter_count(iter);
> + unsigned flags = 0;
> +
> + if (!count)
> + return 0;
> +
> + if (iov_iter_rw(iter) == WRITE)
> + flags |= IOMAP_WRITE;
> +
> + do {
> + ret = iomap_apply(inode, pos, count, flags, ops, iter,
> + iomap_dax_actor);
> + if (ret <= 0)
> + break;
> + pos += ret;
> + done += ret;
> + } while ((count = iov_iter_count(iter)));
> +
> + if (!done)
> + return ret;
> +
> + iocb->ki_pos += done;
> + return done;
> +}
I think you can remove the special casing around 'done' and 'count' and make
this a bit simpler:
ssize_t
iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
struct iomap_ops *ops)
{
struct inode *inode = iocb->ki_filp->f_mapping->host;
loff_t pos = iocb->ki_pos, ret = 0, done = 0;
unsigned flags = 0;
size_t count;
if (iov_iter_rw(iter) == WRITE)
flags |= IOMAP_WRITE;
while ((count = iov_iter_count(iter))) {
ret = iomap_apply(inode, pos, count, flags, ops, iter,
iomap_dax_actor);
if (ret <= 0)
break;
pos += ret;
done += ret;
}
iocb->ki_pos += done;
return done ? done : ret;
}
This is now very similar to iomap_file_buffered_write().
> +EXPORT_SYMBOL_GPL(iomap_dax_rw);
> +#endif /* CONFIG_FS_IOMAP */
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 14d7067..3d5f785 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -65,6 +65,8 @@ struct iomap_ops {
>
> ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
> struct iomap_ops *ops);
> +ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
> + struct iomap_ops *ops);
> int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
> bool *did_zero, struct iomap_ops *ops);
> int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
> --
> 2.1.4
>
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
^ permalink raw reply [flat|nested] 58+ messages in thread
* [PATCH 06/10] dax: provide an iomap based fault handler
2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34 ` Christoph Hellwig
2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-y27Ovi1pjclAfugRpC6u6w
Very similar to the existing dax_fault function, but instead of using
the get_block callback we rely on the iomap_ops vector from iomap.c.
That also avoids having to do two calls into the file system for write
faults.
Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
fs/dax.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/iomap.h | 2 +
2 files changed, 115 insertions(+)
diff --git a/fs/dax.c b/fs/dax.c
index 57ad456..a170a94 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1343,4 +1343,117 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
return done;
}
EXPORT_SYMBOL_GPL(iomap_dax_rw);
+
+/**
+ * iomap_dax_fault - handle a page fault on a DAX file
+ * @vma: The virtual memory area where the fault occurred
+ * @vmf: The description of the fault
+ * @ops: iomap ops passed from the file system
+ *
+ * When a page fault occurs, filesystems may call this helper in their fault
+ * or mkwrite handler for DAX files. Sssumes the caller has done all the
+ * necessary locking for the page fault to proceed successfully.
+ */
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+ struct iomap_ops *ops)
+{
+ struct address_space *mapping = vma->vm_file->f_mapping;
+ struct inode *inode = mapping->host;
+ unsigned long vaddr = (unsigned long)vmf->virtual_address;
+ loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
+ sector_t sector;
+ struct iomap iomap = { 0 };
+ unsigned flags = 0;
+ int error, major = 0;
+ void *entry;
+
+ /*
+ * Check whether offset isn't beyond end of file now. Caller is supposed
+ * to hold locks serializing us with truncate / punch hole so this is
+ * a reliable test.
+ */
+ if (pos >= i_size_read(inode))
+ return VM_FAULT_SIGBUS;
+
+ entry = grab_mapping_entry(mapping, vmf->pgoff);
+ if (IS_ERR(entry)) {
+ error = PTR_ERR(entry);
+ goto out;
+ }
+
+ if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
+ flags |= IOMAP_WRITE;
+
+ /*
+ * Note that we don't bother to use iomap_apply here: DAX required
+ * the file system block size to be equal the page size, which means
+ * that we never have to deal with more than a single extent here.
+ */
+ error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
+ if (error)
+ goto unlock_entry;
+ if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
+ error = -EIO; /* fs corruption? */
+ goto unlock_entry;
+ }
+
+ sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9);
+
+ if (vmf->cow_page) {
+ switch (iomap.type) {
+ case IOMAP_HOLE:
+ case IOMAP_UNWRITTEN:
+ clear_user_highpage(vmf->cow_page, vaddr);
+ break;
+ case IOMAP_MAPPED:
+ error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
+ vmf->cow_page, vaddr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ error = -EIO;
+ break;
+ }
+
+ if (error)
+ goto unlock_entry;
+ if (!radix_tree_exceptional_entry(entry)) {
+ vmf->page = entry;
+ return VM_FAULT_LOCKED;
+ }
+ vmf->entry = entry;
+ return VM_FAULT_DAX_LOCKED;
+ }
+
+ switch (iomap.type) {
+ case IOMAP_MAPPED:
+ if (iomap.flags & IOMAP_F_NEW) {
+ count_vm_event(PGMAJFAULT);
+ mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+ major = VM_FAULT_MAJOR;
+ }
+ break;
+ case IOMAP_UNWRITTEN:
+ case IOMAP_HOLE:
+ if (!(vmf->flags & FAULT_FLAG_WRITE))
+ return dax_load_hole(mapping, entry, vmf);
+ default:
+ WARN_ON_ONCE(1);
+ error = -EIO;
+ }
+
+ /* Filesystem should not return unwritten buffers to us! */
+ error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE,
+ &entry, vma, vmf);
+unlock_entry:
+ put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+out:
+ if (error == -ENOMEM)
+ return VM_FAULT_OOM | major;
+ /* -EBUSY is fine, somebody else faulted on the same PTE */
+ if (error < 0 && error != -EBUSY)
+ return VM_FAULT_SIGBUS | major;
+ return VM_FAULT_NOPAGE | major;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_fault);
#endif /* CONFIG_FS_IOMAP */
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3d5f785..a4ef953 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -73,6 +73,8 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
struct iomap_ops *ops);
int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
struct iomap_ops *ops);
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+ struct iomap_ops *ops);
int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
loff_t start, loff_t len, struct iomap_ops *ops);
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-09 16:34 ` Christoph Hellwig
0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel, linux-nvdimm
Very similar to the existing dax_fault function, but instead of using
the get_block callback we rely on the iomap_ops vector from iomap.c.
That also avoids having to do two calls into the file system for write
faults.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/dax.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/iomap.h | 2 +
2 files changed, 115 insertions(+)
diff --git a/fs/dax.c b/fs/dax.c
index 57ad456..a170a94 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1343,4 +1343,117 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
return done;
}
EXPORT_SYMBOL_GPL(iomap_dax_rw);
+
+/**
+ * iomap_dax_fault - handle a page fault on a DAX file
+ * @vma: The virtual memory area where the fault occurred
+ * @vmf: The description of the fault
+ * @ops: iomap ops passed from the file system
+ *
+ * When a page fault occurs, filesystems may call this helper in their fault
+ * or mkwrite handler for DAX files. Sssumes the caller has done all the
+ * necessary locking for the page fault to proceed successfully.
+ */
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+ struct iomap_ops *ops)
+{
+ struct address_space *mapping = vma->vm_file->f_mapping;
+ struct inode *inode = mapping->host;
+ unsigned long vaddr = (unsigned long)vmf->virtual_address;
+ loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
+ sector_t sector;
+ struct iomap iomap = { 0 };
+ unsigned flags = 0;
+ int error, major = 0;
+ void *entry;
+
+ /*
+ * Check whether offset isn't beyond end of file now. Caller is supposed
+ * to hold locks serializing us with truncate / punch hole so this is
+ * a reliable test.
+ */
+ if (pos >= i_size_read(inode))
+ return VM_FAULT_SIGBUS;
+
+ entry = grab_mapping_entry(mapping, vmf->pgoff);
+ if (IS_ERR(entry)) {
+ error = PTR_ERR(entry);
+ goto out;
+ }
+
+ if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
+ flags |= IOMAP_WRITE;
+
+ /*
+ * Note that we don't bother to use iomap_apply here: DAX required
+ * the file system block size to be equal the page size, which means
+ * that we never have to deal with more than a single extent here.
+ */
+ error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
+ if (error)
+ goto unlock_entry;
+ if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
+ error = -EIO; /* fs corruption? */
+ goto unlock_entry;
+ }
+
+ sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9);
+
+ if (vmf->cow_page) {
+ switch (iomap.type) {
+ case IOMAP_HOLE:
+ case IOMAP_UNWRITTEN:
+ clear_user_highpage(vmf->cow_page, vaddr);
+ break;
+ case IOMAP_MAPPED:
+ error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
+ vmf->cow_page, vaddr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ error = -EIO;
+ break;
+ }
+
+ if (error)
+ goto unlock_entry;
+ if (!radix_tree_exceptional_entry(entry)) {
+ vmf->page = entry;
+ return VM_FAULT_LOCKED;
+ }
+ vmf->entry = entry;
+ return VM_FAULT_DAX_LOCKED;
+ }
+
+ switch (iomap.type) {
+ case IOMAP_MAPPED:
+ if (iomap.flags & IOMAP_F_NEW) {
+ count_vm_event(PGMAJFAULT);
+ mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+ major = VM_FAULT_MAJOR;
+ }
+ break;
+ case IOMAP_UNWRITTEN:
+ case IOMAP_HOLE:
+ if (!(vmf->flags & FAULT_FLAG_WRITE))
+ return dax_load_hole(mapping, entry, vmf);
+ default:
+ WARN_ON_ONCE(1);
+ error = -EIO;
+ }
+
+ /* Filesystem should not return unwritten buffers to us! */
+ error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE,
+ &entry, vma, vmf);
+unlock_entry:
+ put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+out:
+ if (error == -ENOMEM)
+ return VM_FAULT_OOM | major;
+ /* -EBUSY is fine, somebody else faulted on the same PTE */
+ if (error < 0 && error != -EBUSY)
+ return VM_FAULT_SIGBUS | major;
+ return VM_FAULT_NOPAGE | major;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_fault);
#endif /* CONFIG_FS_IOMAP */
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3d5f785..a4ef953 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -73,6 +73,8 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
struct iomap_ops *ops);
int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
struct iomap_ops *ops);
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+ struct iomap_ops *ops);
int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
loff_t start, loff_t len, struct iomap_ops *ops);
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread
[parent not found: <1473438884-674-7-git-send-email-hch-jcswGhMUV9g@public.gmane.org>]
* Re: [PATCH 06/10] dax: provide an iomap based fault handler
2016-09-09 16:34 ` Christoph Hellwig
@ 2016-09-09 22:55 ` Dave Chinner
-1 siblings, 0 replies; 58+ messages in thread
From: Dave Chinner @ 2016-09-09 22:55 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-y27Ovi1pjclAfugRpC6u6w
On Fri, Sep 09, 2016 at 06:34:40PM +0200, Christoph Hellwig wrote:
> Very similar to the existing dax_fault function, but instead of using
> the get_block callback we rely on the iomap_ops vector from iomap.c.
> That also avoids having to do two calls into the file system for write
> faults.
>
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Just noticed this on a quick initial browse...
> + if (vmf->cow_page) {
> + switch (iomap.type) {
> + case IOMAP_HOLE:
> + case IOMAP_UNWRITTEN:
> + clear_user_highpage(vmf->cow_page, vaddr);
> + break;
> + case IOMAP_MAPPED:
> + error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
> + vmf->cow_page, vaddr);
> + break;
> + default:
> + WARN_ON_ONCE(1);
> + error = -EIO;
> + break;
> + }
> +
> + if (error)
> + goto unlock_entry;
> + if (!radix_tree_exceptional_entry(entry)) {
> + vmf->page = entry;
> + return VM_FAULT_LOCKED;
> + }
> + vmf->entry = entry;
> + return VM_FAULT_DAX_LOCKED;
> + }
> +
> + switch (iomap.type) {
> + case IOMAP_MAPPED:
> + if (iomap.flags & IOMAP_F_NEW) {
> + count_vm_event(PGMAJFAULT);
> + mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
> + major = VM_FAULT_MAJOR;
> + }
> + break;
> + case IOMAP_UNWRITTEN:
> + case IOMAP_HOLE:
> + if (!(vmf->flags & FAULT_FLAG_WRITE))
> + return dax_load_hole(mapping, entry, vmf);
> + default:
> + WARN_ON_ONCE(1);
> + error = -EIO;
> + }
THe errors from the above two cases are not acted on. they are
immediately overwritten by:
> +
> + /* Filesystem should not return unwritten buffers to us! */
> + error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE,
> + &entry, vma, vmf);
> +unlock_entry:
> + put_locked_mapping_entry(mapping, vmf->pgoff, entry);
> +out:
> + if (error == -ENOMEM)
> + return VM_FAULT_OOM | major;
> + /* -EBUSY is fine, somebody else faulted on the same PTE */
> + if (error < 0 && error != -EBUSY)
> + return VM_FAULT_SIGBUS | major;
> + return VM_FAULT_NOPAGE | major;
> +}
Is there a missing "if (error) goto out;" check somewhere here?
I'm also wondering if you've looked at supporting the PMD fault case
with iomap?
Cheers,
Dave.
--
Dave Chinner
david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org
^ permalink raw reply [flat|nested] 58+ messages in thread
* Re: [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-09 22:55 ` Dave Chinner
0 siblings, 0 replies; 58+ messages in thread
From: Dave Chinner @ 2016-09-09 22:55 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm
On Fri, Sep 09, 2016 at 06:34:40PM +0200, Christoph Hellwig wrote:
> Very similar to the existing dax_fault function, but instead of using
> the get_block callback we rely on the iomap_ops vector from iomap.c.
> That also avoids having to do two calls into the file system for write
> faults.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Just noticed this on a quick initial browse...
> + if (vmf->cow_page) {
> + switch (iomap.type) {
> + case IOMAP_HOLE:
> + case IOMAP_UNWRITTEN:
> + clear_user_highpage(vmf->cow_page, vaddr);
> + break;
> + case IOMAP_MAPPED:
> + error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
> + vmf->cow_page, vaddr);
> + break;
> + default:
> + WARN_ON_ONCE(1);
> + error = -EIO;
> + break;
> + }
> +
> + if (error)
> + goto unlock_entry;
> + if (!radix_tree_exceptional_entry(entry)) {
> + vmf->page = entry;
> + return VM_FAULT_LOCKED;
> + }
> + vmf->entry = entry;
> + return VM_FAULT_DAX_LOCKED;
> + }
> +
> + switch (iomap.type) {
> + case IOMAP_MAPPED:
> + if (iomap.flags & IOMAP_F_NEW) {
> + count_vm_event(PGMAJFAULT);
> + mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
> + major = VM_FAULT_MAJOR;
> + }
> + break;
> + case IOMAP_UNWRITTEN:
> + case IOMAP_HOLE:
> + if (!(vmf->flags & FAULT_FLAG_WRITE))
> + return dax_load_hole(mapping, entry, vmf);
> + default:
> + WARN_ON_ONCE(1);
> + error = -EIO;
> + }
THe errors from the above two cases are not acted on. they are
immediately overwritten by:
> +
> + /* Filesystem should not return unwritten buffers to us! */
> + error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE,
> + &entry, vma, vmf);
> +unlock_entry:
> + put_locked_mapping_entry(mapping, vmf->pgoff, entry);
> +out:
> + if (error == -ENOMEM)
> + return VM_FAULT_OOM | major;
> + /* -EBUSY is fine, somebody else faulted on the same PTE */
> + if (error < 0 && error != -EBUSY)
> + return VM_FAULT_SIGBUS | major;
> + return VM_FAULT_NOPAGE | major;
> +}
Is there a missing "if (error) goto out;" check somewhere here?
I'm also wondering if you've looked at supporting the PMD fault case
with iomap?
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 58+ messages in thread
* Re: [PATCH 06/10] dax: provide an iomap based fault handler
2016-09-09 22:55 ` Dave Chinner
@ 2016-09-10 7:36 ` Christoph Hellwig
-1 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-10 7:36 UTC (permalink / raw)
To: Dave Chinner
Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig,
linux-nvdimm-y27Ovi1pjclAfugRpC6u6w
On Sat, Sep 10, 2016 at 08:55:57AM +1000, Dave Chinner wrote:
> THe errors from the above two cases are not acted on. they are
> immediately overwritten by:
Yes, Robert also pointed this out. Fix below.
> Is there a missing "if (error) goto out;" check somewhere here?
Just the one above.
> I'm also wondering if you've looked at supporting the PMD fault case
> with iomap?
PMD faults currently don't work at all. Ross has a series to resurrect
them, but we'll need to coordinate between the two series somehow. My
preference would be to not resurrect them for the bh path and only do
it for the iomap version.
diff --git a/fs/dax.c b/fs/dax.c
index a170a94..5534594 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1440,6 +1440,7 @@ int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
default:
WARN_ON_ONCE(1);
error = -EIO;
+ goto unlock_entry;
}
/* Filesystem should not return unwritten buffers to us! */
^ permalink raw reply related [flat|nested] 58+ messages in thread
* Re: [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-10 7:36 ` Christoph Hellwig
0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-10 7:36 UTC (permalink / raw)
To: Dave Chinner
Cc: Christoph Hellwig, linux-xfs, linux-fsdevel, linux-nvdimm, elliott
On Sat, Sep 10, 2016 at 08:55:57AM +1000, Dave Chinner wrote:
> THe errors from the above two cases are not acted on. they are
> immediately overwritten by:
Yes, Robert also pointed this out. Fix below.
> Is there a missing "if (error) goto out;" check somewhere here?
Just the one above.
> I'm also wondering if you've looked at supporting the PMD fault case
> with iomap?
PMD faults currently don't work at all. Ross has a series to resurrect
them, but we'll need to coordinate between the two series somehow. My
preference would be to not resurrect them for the bh path and only do
it for the iomap version.
diff --git a/fs/dax.c b/fs/dax.c
index a170a94..5534594 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1440,6 +1440,7 @@ int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
default:
WARN_ON_ONCE(1);
error = -EIO;
+ goto unlock_entry;
}
/* Filesystem should not return unwritten buffers to us! */
^ permalink raw reply related [flat|nested] 58+ messages in thread
* Re: [PATCH 06/10] dax: provide an iomap based fault handler
2016-09-10 7:36 ` Christoph Hellwig
(?)
@ 2016-09-13 15:51 ` Ross Zwisler
[not found] ` <20160913155126.GA10622-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
-1 siblings, 1 reply; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 15:51 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Dave Chinner, linux-xfs, linux-fsdevel, linux-nvdimm
On Sat, Sep 10, 2016 at 09:36:46AM +0200, Christoph Hellwig wrote:
> On Sat, Sep 10, 2016 at 08:55:57AM +1000, Dave Chinner wrote:
> > THe errors from the above two cases are not acted on. they are
> > immediately overwritten by:
>
> Yes, Robert also pointed this out. Fix below.
>
> > Is there a missing "if (error) goto out;" check somewhere here?
>
> Just the one above.
>
> > I'm also wondering if you've looked at supporting the PMD fault case
> > with iomap?
>
> PMD faults currently don't work at all. Ross has a series to resurrect
> them, but we'll need to coordinate between the two series somehow. My
> preference would be to not resurrect them for the bh path and only do
> it for the iomap version.
I'm working on this right now. I expect that most/all of the infrastructure
between the bh+get_block_t version and the iomap version to be shared, it'll
just be a matter of having a PMD version of the iomap fault handler. This
should be pretty minor.
Let's see how it goes, but right now my plan is to have both - I'd like to
keep feature parity between ext2/ext4 and XFS, and that means having PMD
faults in ext4 via bh+get_block_t until they move over to iomap.
Regarding coordination, the PMD v2 series hasn't gotten much review so far, so
I'm not sure it'll go in for v4.9. At this point I'm planning on just
rebasing on top of your iomap series, though if it gets taken sooner I
wouldn't object.
> diff --git a/fs/dax.c b/fs/dax.c
> index a170a94..5534594 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1440,6 +1440,7 @@ int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> default:
> WARN_ON_ONCE(1);
> error = -EIO;
> + goto unlock_entry;
> }
>
> /* Filesystem should not return unwritten buffers to us! */
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
^ permalink raw reply [flat|nested] 58+ messages in thread
* RE: [PATCH 06/10] dax: provide an iomap based fault handler
2016-09-09 16:34 ` Christoph Hellwig
@ 2016-09-10 1:38 ` Elliott, Robert (Persistent Memory)
-1 siblings, 0 replies; 58+ messages in thread
From: Elliott, Robert (Persistent Memory) @ 2016-09-10 1:38 UTC (permalink / raw)
To: Christoph Hellwig, linux-xfs-u79uwXL29TY76Z2rM5mHXA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-y27Ovi1pjclAfugRpC6u6w
> -----Original Message-----
> From: Linux-nvdimm [mailto:linux-nvdimm-bounces-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org] On
> Behalf Of Christoph Hellwig
> Sent: Friday, September 09, 2016 11:35 AM
> Subject: [PATCH 06/10] dax: provide an iomap based fault handler
>
...
> diff --git a/fs/dax.c b/fs/dax.c
> @@ -1343,4 +1343,117 @@ iomap_dax_rw(struct kiocb *iocb, struct
...
> + default:
> + WARN_ON_ONCE(1);
> + error = -EIO;
> + }
> +
> + /* Filesystem should not return unwritten buffers to us! */
> + error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE,
> + &entry, vma, vmf);
That -EIO value would be immediately overwritten.
^ permalink raw reply [flat|nested] 58+ messages in thread
* RE: [PATCH 06/10] dax: provide an iomap based fault handler
@ 2016-09-10 1:38 ` Elliott, Robert (Persistent Memory)
0 siblings, 0 replies; 58+ messages in thread
From: Elliott, Robert (Persistent Memory) @ 2016-09-10 1:38 UTC (permalink / raw)
To: Christoph Hellwig, linux-xfs, linux-fsdevel, linux-nvdimm
> -----Original Message-----
> From: Linux-nvdimm [mailto:linux-nvdimm-bounces@lists.01.org] On
> Behalf Of Christoph Hellwig
> Sent: Friday, September 09, 2016 11:35 AM
> Subject: [PATCH 06/10] dax: provide an iomap based fault handler
>
...
> diff --git a/fs/dax.c b/fs/dax.c
> @@ -1343,4 +1343,117 @@ iomap_dax_rw(struct kiocb *iocb, struct
...
> + default:
> + WARN_ON_ONCE(1);
> + error = -EIO;
> + }
> +
> + /* Filesystem should not return unwritten buffers to us! */
> + error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE,
> + &entry, vma, vmf);
That -EIO value would be immediately overwritten.
^ permalink raw reply [flat|nested] 58+ messages in thread
* Re: [PATCH 06/10] dax: provide an iomap based fault handler
2016-09-09 16:34 ` Christoph Hellwig
(?)
(?)
@ 2016-09-13 23:10 ` Ross Zwisler
[not found] ` <20160913231039.GF26002-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
-1 siblings, 1 reply; 58+ messages in thread
From: Ross Zwisler @ 2016-09-13 23:10 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs, linux-fsdevel, linux-nvdimm
On Fri, Sep 09, 2016 at 06:34:40PM +0200, Christoph Hellwig wrote:
> Very similar to the existing dax_fault function, but instead of using
> the get_block callback we rely on the iomap_ops vector from iomap.c.
> That also avoids having to do two calls into the file system for write
> faults.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/dax.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++
> include/linux/iomap.h | 2 +
> 2 files changed, 115 insertions(+)
>
> diff --git a/fs/dax.c b/fs/dax.c
> index 57ad456..a170a94 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1343,4 +1343,117 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
> return done;
> }
> EXPORT_SYMBOL_GPL(iomap_dax_rw);
> +
> +/**
> + * iomap_dax_fault - handle a page fault on a DAX file
> + * @vma: The virtual memory area where the fault occurred
> + * @vmf: The description of the fault
> + * @ops: iomap ops passed from the file system
> + *
> + * When a page fault occurs, filesystems may call this helper in their fault
> + * or mkwrite handler for DAX files. Sssumes the caller has done all the
Assumes
> + * necessary locking for the page fault to proceed successfully.
> + */
> +int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> + struct iomap_ops *ops)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + struct inode *inode = mapping->host;
> + unsigned long vaddr = (unsigned long)vmf->virtual_address;
> + loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
> + sector_t sector;
> + struct iomap iomap = { 0 };
> + unsigned flags = 0;
> + int error, major = 0;
> + void *entry;
> +
> + /*
> + * Check whether offset isn't beyond end of file now. Caller is supposed
> + * to hold locks serializing us with truncate / punch hole so this is
> + * a reliable test.
> + */
> + if (pos >= i_size_read(inode))
> + return VM_FAULT_SIGBUS;
> +
> + entry = grab_mapping_entry(mapping, vmf->pgoff);
> + if (IS_ERR(entry)) {
> + error = PTR_ERR(entry);
> + goto out;
> + }
> +
> + if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
> + flags |= IOMAP_WRITE;
> +
> + /*
> + * Note that we don't bother to use iomap_apply here: DAX required
> + * the file system block size to be equal the page size, which means
> + * that we never have to deal with more than a single extent here.
> + */
> + error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
> + if (error)
> + goto unlock_entry;
> + if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
> + error = -EIO; /* fs corruption? */
> + goto unlock_entry;
> + }
> +
> + sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9);
> +
> + if (vmf->cow_page) {
> + switch (iomap.type) {
> + case IOMAP_HOLE:
> + case IOMAP_UNWRITTEN:
> + clear_user_highpage(vmf->cow_page, vaddr);
> + break;
> + case IOMAP_MAPPED:
> + error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
> + vmf->cow_page, vaddr);
> + break;
> + default:
> + WARN_ON_ONCE(1);
> + error = -EIO;
> + break;
> + }
> +
> + if (error)
> + goto unlock_entry;
> + if (!radix_tree_exceptional_entry(entry)) {
> + vmf->page = entry;
> + return VM_FAULT_LOCKED;
> + }
> + vmf->entry = entry;
> + return VM_FAULT_DAX_LOCKED;
> + }
> +
> + switch (iomap.type) {
> + case IOMAP_MAPPED:
> + if (iomap.flags & IOMAP_F_NEW) {
> + count_vm_event(PGMAJFAULT);
> + mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
> + major = VM_FAULT_MAJOR;
> + }
> + break;
> + case IOMAP_UNWRITTEN:
> + case IOMAP_HOLE:
> + if (!(vmf->flags & FAULT_FLAG_WRITE))
> + return dax_load_hole(mapping, entry, vmf);
> + default:
> + WARN_ON_ONCE(1);
> + error = -EIO;
> + }
> +
> + /* Filesystem should not return unwritten buffers to us! */
This comment is above a WARN_ON_ONCE() that checks for unwritten buffers in
dax_fault(), but that test isn't present in this code nor do we disallow
unwritten buffers (apparently, based on the IOMAP_UNWRITTEN handling above).
This comment should probably be removed.
> + error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE,
> + &entry, vma, vmf);
> +unlock_entry:
If you stick a space in front of the labels (as is done in the rest of dax.c)
it prevents future patches from using them at the beginning of hunks. Here's a
patch adding a random space as an example:
@@ -908,6 +908,7 @@ out:
return VM_FAULT_OOM | major;
/* -EBUSY is fine, somebody else faulted on the same PTE */
if ((error < 0) && (error != -EBUSY))
return VM_FAULT_SIGBUS | major;
+
return VM_FAULT_NOPAGE | major;
}
vs
@@ -908,6 +908,7 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
return VM_FAULT_OOM | major;
/* -EBUSY is fine, somebody else faulted on the same PTE */
if ((error < 0) && (error != -EBUSY))
return VM_FAULT_SIGBUS | major;
+
return VM_FAULT_NOPAGE | major;
}
where 'out' is a label without a leading space in the first case and with a
leading space in the second.
With these few nits addressed:
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> + put_locked_mapping_entry(mapping, vmf->pgoff, entry);
> +out:
> + if (error == -ENOMEM)
> + return VM_FAULT_OOM | major;
> + /* -EBUSY is fine, somebody else faulted on the same PTE */
> + if (error < 0 && error != -EBUSY)
> + return VM_FAULT_SIGBUS | major;
> + return VM_FAULT_NOPAGE | major;
> +}
> +EXPORT_SYMBOL_GPL(iomap_dax_fault);
> #endif /* CONFIG_FS_IOMAP */
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 3d5f785..a4ef953 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -73,6 +73,8 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
> struct iomap_ops *ops);
> int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
> struct iomap_ops *ops);
> +int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> + struct iomap_ops *ops);
> int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
> loff_t start, loff_t len, struct iomap_ops *ops);
>
> --
> 2.1.4
>
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
^ permalink raw reply [flat|nested] 58+ messages in thread
* [PATCH 07/10] xfs: fix locking for DAX writes
2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34 ` Christoph Hellwig
2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-y27Ovi1pjclAfugRpC6u6w
So far DAX writes inherited the locking from direct I/O writes, but the direct
I/O model of using shared locks for writes is actually wrong for DAX. For
direct I/O we're out of any standards and don't have to provide the Posix
required exclusion between writers, but for DAX which gets transparently
enable on applications without any knowledge of it we can't simply drop the
requirement. Even worse this only happens for aligned writes and thus
doesn't show up for many typical use cases.
Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
fs/xfs/xfs_file.c | 20 +-------------------
1 file changed, 1 insertion(+), 19 deletions(-)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e612a02..62649cc 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -714,24 +714,11 @@ xfs_file_dax_write(
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
ssize_t ret = 0;
- int unaligned_io = 0;
- int iolock;
+ int iolock = XFS_IOLOCK_EXCL;
struct iov_iter data;
- /* "unaligned" here means not aligned to a filesystem block */
- if ((iocb->ki_pos & mp->m_blockmask) ||
- ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
- unaligned_io = 1;
- iolock = XFS_IOLOCK_EXCL;
- } else if (mapping->nrpages) {
- iolock = XFS_IOLOCK_EXCL;
- } else {
- iolock = XFS_IOLOCK_SHARED;
- }
xfs_rw_ilock(ip, iolock);
-
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
@@ -758,11 +745,6 @@ xfs_file_dax_write(
WARN_ON_ONCE(ret);
}
- if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
- xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
- iolock = XFS_IOLOCK_SHARED;
- }
-
trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
data = *from;
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH 07/10] xfs: fix locking for DAX writes
@ 2016-09-09 16:34 ` Christoph Hellwig
0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel, linux-nvdimm
So far DAX writes inherited the locking from direct I/O writes, but the direct
I/O model of using shared locks for writes is actually wrong for DAX. For
direct I/O we're out of any standards and don't have to provide the Posix
required exclusion between writers, but for DAX which gets transparently
enable on applications without any knowledge of it we can't simply drop the
requirement. Even worse this only happens for aligned writes and thus
doesn't show up for many typical use cases.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_file.c | 20 +-------------------
1 file changed, 1 insertion(+), 19 deletions(-)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e612a02..62649cc 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -714,24 +714,11 @@ xfs_file_dax_write(
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
ssize_t ret = 0;
- int unaligned_io = 0;
- int iolock;
+ int iolock = XFS_IOLOCK_EXCL;
struct iov_iter data;
- /* "unaligned" here means not aligned to a filesystem block */
- if ((iocb->ki_pos & mp->m_blockmask) ||
- ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
- unaligned_io = 1;
- iolock = XFS_IOLOCK_EXCL;
- } else if (mapping->nrpages) {
- iolock = XFS_IOLOCK_EXCL;
- } else {
- iolock = XFS_IOLOCK_SHARED;
- }
xfs_rw_ilock(ip, iolock);
-
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
@@ -758,11 +745,6 @@ xfs_file_dax_write(
WARN_ON_ONCE(ret);
}
- if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
- xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
- iolock = XFS_IOLOCK_SHARED;
- }
-
trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
data = *from;
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH 08/10] xfs: take the ilock shared if possible in xfs_file_iomap_begin
2016-09-09 16:34 iomap based DAX path Christoph Hellwig
@ 2016-09-09 16:34 ` Christoph Hellwig
2016-09-09 16:34 ` [PATCH 09/10] xfs: refactor xfs_setfilesize Christoph Hellwig
2016-09-09 16:34 ` [PATCH 10/10] xfs: use iomap to implement DAX Christoph Hellwig
2 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs-u79uwXL29TY76Z2rM5mHXA,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-y27Ovi1pjclAfugRpC6u6w
We always just read the extent first, and will later lock exlusively
after first dropping the lock in case we actually allocate blocks.
Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
fs/xfs/xfs_iomap.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5d06a2d..c3cc175 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -955,6 +955,7 @@ xfs_file_iomap_begin(
struct xfs_bmbt_irec imap;
xfs_fileoff_t offset_fsb, end_fsb;
int nimaps = 1, error = 0;
+ unsigned lockmode;
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
@@ -964,7 +965,7 @@ xfs_file_iomap_begin(
iomap);
}
- xfs_ilock(ip, XFS_ILOCK_EXCL);
+ lockmode = xfs_ilock_data_map_shared(ip);
ASSERT(offset <= mp->m_super->s_maxbytes);
if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
@@ -975,7 +976,7 @@ xfs_file_iomap_begin(
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, XFS_BMAPI_ENTIRE);
if (error) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(ip, lockmode);
return error;
}
@@ -995,7 +996,8 @@ xfs_file_iomap_begin(
* xfs_iomap_write_direct() expects the shared lock. It
* is unlocked on return.
*/
- xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+ if (lockmode == XFS_ILOCK_EXCL)
+ xfs_ilock_demote(ip, lockmode);
error = xfs_iomap_write_direct(ip, offset, length, &imap,
nimaps);
if (error)
@@ -1006,7 +1008,7 @@ xfs_file_iomap_begin(
} else {
ASSERT(nimaps);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(ip, lockmode);
trace_xfs_iomap_found(ip, offset, length, 0, &imap);
}
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH 08/10] xfs: take the ilock shared if possible in xfs_file_iomap_begin
@ 2016-09-09 16:34 ` Christoph Hellwig
0 siblings, 0 replies; 58+ messages in thread
From: Christoph Hellwig @ 2016-09-09 16:34 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel, linux-nvdimm
We always just read the extent first, and will later lock exlusively
after first dropping the lock in case we actually allocate blocks.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_iomap.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5d06a2d..c3cc175 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -955,6 +955,7 @@ xfs_file_iomap_begin(
struct xfs_bmbt_irec imap;
xfs_fileoff_t offset_fsb, end_fsb;
int nimaps = 1, error = 0;
+ unsigned lockmode;
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
@@ -964,7 +965,7 @@ xfs_file_iomap_begin(
iomap);
}
- xfs_ilock(ip, XFS_ILOCK_EXCL);
+ lockmode = xfs_ilock_data_map_shared(ip);
ASSERT(offset <= mp->m_super->s_maxbytes);
if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
@@ -975,7 +976,7 @@ xfs_file_iomap_begin(
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, XFS_BMAPI_ENTIRE);
if (error) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(ip, lockmode);
return error;
}
@@ -995,7 +996,8 @@ xfs_file_iomap_begin(
* xfs_iomap_write_direct() expects the shared lock. It
* is unlocked on return.
*/
- xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+ if (lockmode == XFS_ILOCK_EXCL)
+ xfs_ilock_demote(ip, lockmode);
error = xfs_iomap_write_direct(ip, offset, length, &imap,
nimaps);
if (error)
@@ -1006,7 +1008,7 @@ xfs_file_iomap_begin(
} else {
ASSERT(nimaps);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(ip, lockmode);
trace_xfs_iomap_found(ip, offset, length, 0, &imap);
}
--
2.1.4
^ permalink raw reply related [flat|nested] 58+ messages in thread