* cleanup the page fault path V2
@ 2017-08-29 16:26 Christoph Hellwig
2017-08-29 16:26 ` [PATCH 1/2] iomap: return VM_FAULT_* codes from iomap_page_mkwrite Christoph Hellwig
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Christoph Hellwig @ 2017-08-29 16:26 UTC (permalink / raw)
To: linux-xfs; +Cc: jack
Hi all,
these two patches simplify the xfs/iomap page fault path. They came up in
the context of MAP_SYNC support, but are useful on their own, and might be
good to get into 4.14 before the real MAP_SYNC work starts hitting the
tree in some form.
Changes since V1:
- drop the incorrect pfn consolidation
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/2] iomap: return VM_FAULT_* codes from iomap_page_mkwrite
2017-08-29 16:26 cleanup the page fault path V2 Christoph Hellwig
@ 2017-08-29 16:26 ` Christoph Hellwig
2017-08-30 8:50 ` Jan Kara
2017-08-29 16:26 ` [PATCH 2/2] xfs: consolidate the various page fault handlers Christoph Hellwig
2017-08-30 16:09 ` cleanup the page fault path V2 Darrick J. Wong
2 siblings, 1 reply; 9+ messages in thread
From: Christoph Hellwig @ 2017-08-29 16:26 UTC (permalink / raw)
To: linux-xfs; +Cc: jack
All callers will need the VM_FAULT_* flags, so convert in the helper.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
fs/iomap.c | 4 ++--
fs/xfs/xfs_file.c | 1 -
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/fs/iomap.c b/fs/iomap.c
index 59cc98ad7577..8554a8d75281 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -477,10 +477,10 @@ int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
set_page_dirty(page);
wait_for_stable_page(page);
- return 0;
+ return VM_FAULT_LOCKED;
out_unlock:
unlock_page(page);
- return ret;
+ return block_page_mkwrite_return(ret);
}
EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index c4893e226fd8..8b0181c6d2a6 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1035,7 +1035,6 @@ xfs_filemap_page_mkwrite(
ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
} else {
ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
- ret = block_page_mkwrite_return(ret);
}
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
--
2.11.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/2] xfs: consolidate the various page fault handlers
2017-08-29 16:26 cleanup the page fault path V2 Christoph Hellwig
2017-08-29 16:26 ` [PATCH 1/2] iomap: return VM_FAULT_* codes from iomap_page_mkwrite Christoph Hellwig
@ 2017-08-29 16:26 ` Christoph Hellwig
2017-08-29 18:48 ` Darrick J. Wong
2017-08-30 9:34 ` Jan Kara
2017-08-30 16:09 ` cleanup the page fault path V2 Darrick J. Wong
2 siblings, 2 replies; 9+ messages in thread
From: Christoph Hellwig @ 2017-08-29 16:26 UTC (permalink / raw)
To: linux-xfs; +Cc: jack
Add a new __xfs_filemap_fault helper that implements all four page fault
callouts, and make these methods themselves small stubs that set the
correct write_fault flag, and exit early for the non-DAX case for the
hugepage related ones.
Also remove the extra size checking in the pfn_fault path, which is now
handled in the core DAX code.
Life would be so much simpler if we only had one method for all this.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
fs/xfs/xfs_file.c | 96 +++++++++++++++++++-----------------------------------
fs/xfs/xfs_trace.h | 29 +++++++++++++++--
2 files changed, 60 insertions(+), 65 deletions(-)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 8b0181c6d2a6..0debbc7e3f03 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1011,95 +1011,67 @@ xfs_file_llseek(
* page_lock (MM)
* i_lock (XFS - extent map serialisation)
*/
-
-/*
- * mmap()d file has taken write protection fault and is being made writable. We
- * can set the page state up correctly for a writable page, which means we can
- * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
- * mapping.
- */
-STATIC int
-xfs_filemap_page_mkwrite(
- struct vm_fault *vmf)
+static int
+__xfs_filemap_fault(
+ struct vm_fault *vmf,
+ enum page_entry_size pe_size,
+ bool write_fault)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
+ struct xfs_inode *ip = XFS_I(inode);
int ret;
- trace_xfs_filemap_page_mkwrite(XFS_I(inode));
+ trace_xfs_filemap_fault(ip, pe_size, write_fault);
- sb_start_pagefault(inode->i_sb);
- file_update_time(vmf->vma->vm_file);
- xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+ if (write_fault) {
+ sb_start_pagefault(inode->i_sb);
+ file_update_time(vmf->vma->vm_file);
+ }
+ xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) {
- ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
+ ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
} else {
- ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
+ if (write_fault)
+ ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
+ else
+ ret = filemap_fault(vmf);
}
-
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- sb_end_pagefault(inode->i_sb);
+ if (write_fault)
+ sb_end_pagefault(inode->i_sb);
return ret;
}
-STATIC int
+static int
xfs_filemap_fault(
struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vmf->vma->vm_file);
- int ret;
-
- trace_xfs_filemap_fault(XFS_I(inode));
-
/* DAX can shortcut the normal fault path on write faults! */
- if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode))
- return xfs_filemap_page_mkwrite(vmf);
-
- xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- if (IS_DAX(inode))
- ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
- else
- ret = filemap_fault(vmf);
- xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
-
- return ret;
+ return __xfs_filemap_fault(vmf, PE_SIZE_PTE,
+ IS_DAX(file_inode(vmf->vma->vm_file)) &&
+ (vmf->flags & FAULT_FLAG_WRITE));
}
-/*
- * Similar to xfs_filemap_fault(), the DAX fault path can call into here on
- * both read and write faults. Hence we need to handle both cases. There is no
- * ->huge_mkwrite callout for huge pages, so we have a single function here to
- * handle both cases here. @flags carries the information on the type of fault
- * occuring.
- */
-STATIC int
+static int
xfs_filemap_huge_fault(
struct vm_fault *vmf,
enum page_entry_size pe_size)
{
- struct inode *inode = file_inode(vmf->vma->vm_file);
- struct xfs_inode *ip = XFS_I(inode);
- int ret;
-
- if (!IS_DAX(inode))
+ if (!IS_DAX(file_inode(vmf->vma->vm_file)))
return VM_FAULT_FALLBACK;
- trace_xfs_filemap_huge_fault(ip);
-
- if (vmf->flags & FAULT_FLAG_WRITE) {
- sb_start_pagefault(inode->i_sb);
- file_update_time(vmf->vma->vm_file);
- }
-
- xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
- xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
-
- if (vmf->flags & FAULT_FLAG_WRITE)
- sb_end_pagefault(inode->i_sb);
+ /* DAX can shortcut the normal fault path on write faults! */
+ return __xfs_filemap_fault(vmf, pe_size,
+ (vmf->flags & FAULT_FLAG_WRITE));
+}
- return ret;
+static int
+xfs_filemap_page_mkwrite(
+ struct vm_fault *vmf)
+{
+ return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
}
/*
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index bcc3cdf8e1c5..bae243ad7f3b 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -689,11 +689,34 @@ DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag);
DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag);
DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid);
-DEFINE_INODE_EVENT(xfs_filemap_fault);
-DEFINE_INODE_EVENT(xfs_filemap_huge_fault);
-DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite);
+TRACE_EVENT(xfs_filemap_fault,
+ TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size,
+ bool write_fault),
+ TP_ARGS(ip, pe_size, write_fault),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(enum page_entry_size, pe_size)
+ __field(bool, write_fault)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->pe_size = pe_size;
+ __entry->write_fault = write_fault;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx %s write_fault %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_symbolic(__entry->pe_size,
+ { PE_SIZE_PTE, "PTE" },
+ { PE_SIZE_PMD, "PMD" },
+ { PE_SIZE_PUD, "PUD" }),
+ __entry->write_fault)
+)
+
DECLARE_EVENT_CLASS(xfs_iref_class,
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
TP_ARGS(ip, caller_ip),
--
2.11.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] xfs: consolidate the various page fault handlers
2017-08-29 16:26 ` [PATCH 2/2] xfs: consolidate the various page fault handlers Christoph Hellwig
@ 2017-08-29 18:48 ` Darrick J. Wong
2017-08-30 9:34 ` Jan Kara
1 sibling, 0 replies; 9+ messages in thread
From: Darrick J. Wong @ 2017-08-29 18:48 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs, jack
On Tue, Aug 29, 2017 at 06:26:13PM +0200, Christoph Hellwig wrote:
> Add a new __xfs_filemap_fault helper that implements all four page fault
> callouts, and make these methods themselves small stubs that set the
> correct write_fault flag, and exit early for the non-DAX case for the
> hugepage related ones.
>
> Also remove the extra size checking in the pfn_fault path, which is now
> handled in the core DAX code.
>
> Life would be so much simpler if we only had one method for all this.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Looks ok, will test (again),
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
> fs/xfs/xfs_file.c | 96 +++++++++++++++++++-----------------------------------
> fs/xfs/xfs_trace.h | 29 +++++++++++++++--
> 2 files changed, 60 insertions(+), 65 deletions(-)
>
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 8b0181c6d2a6..0debbc7e3f03 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1011,95 +1011,67 @@ xfs_file_llseek(
> * page_lock (MM)
> * i_lock (XFS - extent map serialisation)
> */
> -
> -/*
> - * mmap()d file has taken write protection fault and is being made writable. We
> - * can set the page state up correctly for a writable page, which means we can
> - * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
> - * mapping.
> - */
> -STATIC int
> -xfs_filemap_page_mkwrite(
> - struct vm_fault *vmf)
> +static int
> +__xfs_filemap_fault(
> + struct vm_fault *vmf,
> + enum page_entry_size pe_size,
> + bool write_fault)
> {
> struct inode *inode = file_inode(vmf->vma->vm_file);
> + struct xfs_inode *ip = XFS_I(inode);
> int ret;
>
> - trace_xfs_filemap_page_mkwrite(XFS_I(inode));
> + trace_xfs_filemap_fault(ip, pe_size, write_fault);
>
> - sb_start_pagefault(inode->i_sb);
> - file_update_time(vmf->vma->vm_file);
> - xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> + if (write_fault) {
> + sb_start_pagefault(inode->i_sb);
> + file_update_time(vmf->vma->vm_file);
> + }
>
> + xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> if (IS_DAX(inode)) {
> - ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
> + ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
> } else {
> - ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
> + if (write_fault)
> + ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
> + else
> + ret = filemap_fault(vmf);
> }
> -
> xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> - sb_end_pagefault(inode->i_sb);
>
> + if (write_fault)
> + sb_end_pagefault(inode->i_sb);
> return ret;
> }
>
> -STATIC int
> +static int
> xfs_filemap_fault(
> struct vm_fault *vmf)
> {
> - struct inode *inode = file_inode(vmf->vma->vm_file);
> - int ret;
> -
> - trace_xfs_filemap_fault(XFS_I(inode));
> -
> /* DAX can shortcut the normal fault path on write faults! */
> - if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode))
> - return xfs_filemap_page_mkwrite(vmf);
> -
> - xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> - if (IS_DAX(inode))
> - ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
> - else
> - ret = filemap_fault(vmf);
> - xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> -
> - return ret;
> + return __xfs_filemap_fault(vmf, PE_SIZE_PTE,
> + IS_DAX(file_inode(vmf->vma->vm_file)) &&
> + (vmf->flags & FAULT_FLAG_WRITE));
> }
>
> -/*
> - * Similar to xfs_filemap_fault(), the DAX fault path can call into here on
> - * both read and write faults. Hence we need to handle both cases. There is no
> - * ->huge_mkwrite callout for huge pages, so we have a single function here to
> - * handle both cases here. @flags carries the information on the type of fault
> - * occuring.
> - */
> -STATIC int
> +static int
> xfs_filemap_huge_fault(
> struct vm_fault *vmf,
> enum page_entry_size pe_size)
> {
> - struct inode *inode = file_inode(vmf->vma->vm_file);
> - struct xfs_inode *ip = XFS_I(inode);
> - int ret;
> -
> - if (!IS_DAX(inode))
> + if (!IS_DAX(file_inode(vmf->vma->vm_file)))
> return VM_FAULT_FALLBACK;
>
> - trace_xfs_filemap_huge_fault(ip);
> -
> - if (vmf->flags & FAULT_FLAG_WRITE) {
> - sb_start_pagefault(inode->i_sb);
> - file_update_time(vmf->vma->vm_file);
> - }
> -
> - xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> - ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
> - xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> -
> - if (vmf->flags & FAULT_FLAG_WRITE)
> - sb_end_pagefault(inode->i_sb);
> + /* DAX can shortcut the normal fault path on write faults! */
> + return __xfs_filemap_fault(vmf, pe_size,
> + (vmf->flags & FAULT_FLAG_WRITE));
> +}
>
> - return ret;
> +static int
> +xfs_filemap_page_mkwrite(
> + struct vm_fault *vmf)
> +{
> + return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
> }
>
> /*
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index bcc3cdf8e1c5..bae243ad7f3b 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -689,11 +689,34 @@ DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag);
> DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag);
> DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid);
>
> -DEFINE_INODE_EVENT(xfs_filemap_fault);
> -DEFINE_INODE_EVENT(xfs_filemap_huge_fault);
> -DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
> DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite);
>
> +TRACE_EVENT(xfs_filemap_fault,
> + TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size,
> + bool write_fault),
> + TP_ARGS(ip, pe_size, write_fault),
> + TP_STRUCT__entry(
> + __field(dev_t, dev)
> + __field(xfs_ino_t, ino)
> + __field(enum page_entry_size, pe_size)
> + __field(bool, write_fault)
> + ),
> + TP_fast_assign(
> + __entry->dev = VFS_I(ip)->i_sb->s_dev;
> + __entry->ino = ip->i_ino;
> + __entry->pe_size = pe_size;
> + __entry->write_fault = write_fault;
> + ),
> + TP_printk("dev %d:%d ino 0x%llx %s write_fault %d",
> + MAJOR(__entry->dev), MINOR(__entry->dev),
> + __entry->ino,
> + __print_symbolic(__entry->pe_size,
> + { PE_SIZE_PTE, "PTE" },
> + { PE_SIZE_PMD, "PMD" },
> + { PE_SIZE_PUD, "PUD" }),
> + __entry->write_fault)
> +)
> +
> DECLARE_EVENT_CLASS(xfs_iref_class,
> TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
> TP_ARGS(ip, caller_ip),
> --
> 2.11.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] iomap: return VM_FAULT_* codes from iomap_page_mkwrite
2017-08-29 16:26 ` [PATCH 1/2] iomap: return VM_FAULT_* codes from iomap_page_mkwrite Christoph Hellwig
@ 2017-08-30 8:50 ` Jan Kara
0 siblings, 0 replies; 9+ messages in thread
From: Jan Kara @ 2017-08-30 8:50 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs, jack
On Tue 29-08-17 18:26:12, Christoph Hellwig wrote:
> All callers will need the VM_FAULT_* flags, so convert in the helper.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Looks good. You can add:
Reviewed-by: Jan Kara <jack@suse.cz>
Honza
> ---
> fs/iomap.c | 4 ++--
> fs/xfs/xfs_file.c | 1 -
> 2 files changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/fs/iomap.c b/fs/iomap.c
> index 59cc98ad7577..8554a8d75281 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -477,10 +477,10 @@ int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
>
> set_page_dirty(page);
> wait_for_stable_page(page);
> - return 0;
> + return VM_FAULT_LOCKED;
> out_unlock:
> unlock_page(page);
> - return ret;
> + return block_page_mkwrite_return(ret);
> }
> EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
>
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index c4893e226fd8..8b0181c6d2a6 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1035,7 +1035,6 @@ xfs_filemap_page_mkwrite(
> ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
> } else {
> ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
> - ret = block_page_mkwrite_return(ret);
> }
>
> xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> --
> 2.11.0
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] xfs: consolidate the various page fault handlers
2017-08-29 16:26 ` [PATCH 2/2] xfs: consolidate the various page fault handlers Christoph Hellwig
2017-08-29 18:48 ` Darrick J. Wong
@ 2017-08-30 9:34 ` Jan Kara
1 sibling, 0 replies; 9+ messages in thread
From: Jan Kara @ 2017-08-30 9:34 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs, jack
On Tue 29-08-17 18:26:13, Christoph Hellwig wrote:
> Add a new __xfs_filemap_fault helper that implements all four page fault
> callouts, and make these methods themselves small stubs that set the
> correct write_fault flag, and exit early for the non-DAX case for the
> hugepage related ones.
>
> Also remove the extra size checking in the pfn_fault path, which is now
> handled in the core DAX code.
>
> Life would be so much simpler if we only had one method for all this.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Looks good to me. You can add:
Reviewed-by: Jan Kara <jack@suse.cz>
Just I'd note that the check for IS_DAX in xfs_filemap_fault() (to set
write_fault argument) is racy wrt changing of the inode flag (as it was
before) but that's a problem to be fixed separately I guess.
Honza
> ---
> fs/xfs/xfs_file.c | 96 +++++++++++++++++++-----------------------------------
> fs/xfs/xfs_trace.h | 29 +++++++++++++++--
> 2 files changed, 60 insertions(+), 65 deletions(-)
>
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 8b0181c6d2a6..0debbc7e3f03 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1011,95 +1011,67 @@ xfs_file_llseek(
> * page_lock (MM)
> * i_lock (XFS - extent map serialisation)
> */
> -
> -/*
> - * mmap()d file has taken write protection fault and is being made writable. We
> - * can set the page state up correctly for a writable page, which means we can
> - * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
> - * mapping.
> - */
> -STATIC int
> -xfs_filemap_page_mkwrite(
> - struct vm_fault *vmf)
> +static int
> +__xfs_filemap_fault(
> + struct vm_fault *vmf,
> + enum page_entry_size pe_size,
> + bool write_fault)
> {
> struct inode *inode = file_inode(vmf->vma->vm_file);
> + struct xfs_inode *ip = XFS_I(inode);
> int ret;
>
> - trace_xfs_filemap_page_mkwrite(XFS_I(inode));
> + trace_xfs_filemap_fault(ip, pe_size, write_fault);
>
> - sb_start_pagefault(inode->i_sb);
> - file_update_time(vmf->vma->vm_file);
> - xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> + if (write_fault) {
> + sb_start_pagefault(inode->i_sb);
> + file_update_time(vmf->vma->vm_file);
> + }
>
> + xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> if (IS_DAX(inode)) {
> - ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
> + ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
> } else {
> - ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
> + if (write_fault)
> + ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
> + else
> + ret = filemap_fault(vmf);
> }
> -
> xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> - sb_end_pagefault(inode->i_sb);
>
> + if (write_fault)
> + sb_end_pagefault(inode->i_sb);
> return ret;
> }
>
> -STATIC int
> +static int
> xfs_filemap_fault(
> struct vm_fault *vmf)
> {
> - struct inode *inode = file_inode(vmf->vma->vm_file);
> - int ret;
> -
> - trace_xfs_filemap_fault(XFS_I(inode));
> -
> /* DAX can shortcut the normal fault path on write faults! */
> - if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode))
> - return xfs_filemap_page_mkwrite(vmf);
> -
> - xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> - if (IS_DAX(inode))
> - ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
> - else
> - ret = filemap_fault(vmf);
> - xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> -
> - return ret;
> + return __xfs_filemap_fault(vmf, PE_SIZE_PTE,
> + IS_DAX(file_inode(vmf->vma->vm_file)) &&
> + (vmf->flags & FAULT_FLAG_WRITE));
> }
>
> -/*
> - * Similar to xfs_filemap_fault(), the DAX fault path can call into here on
> - * both read and write faults. Hence we need to handle both cases. There is no
> - * ->huge_mkwrite callout for huge pages, so we have a single function here to
> - * handle both cases here. @flags carries the information on the type of fault
> - * occuring.
> - */
> -STATIC int
> +static int
> xfs_filemap_huge_fault(
> struct vm_fault *vmf,
> enum page_entry_size pe_size)
> {
> - struct inode *inode = file_inode(vmf->vma->vm_file);
> - struct xfs_inode *ip = XFS_I(inode);
> - int ret;
> -
> - if (!IS_DAX(inode))
> + if (!IS_DAX(file_inode(vmf->vma->vm_file)))
> return VM_FAULT_FALLBACK;
>
> - trace_xfs_filemap_huge_fault(ip);
> -
> - if (vmf->flags & FAULT_FLAG_WRITE) {
> - sb_start_pagefault(inode->i_sb);
> - file_update_time(vmf->vma->vm_file);
> - }
> -
> - xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> - ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
> - xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> -
> - if (vmf->flags & FAULT_FLAG_WRITE)
> - sb_end_pagefault(inode->i_sb);
> + /* DAX can shortcut the normal fault path on write faults! */
> + return __xfs_filemap_fault(vmf, pe_size,
> + (vmf->flags & FAULT_FLAG_WRITE));
> +}
>
> - return ret;
> +static int
> +xfs_filemap_page_mkwrite(
> + struct vm_fault *vmf)
> +{
> + return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
> }
>
> /*
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index bcc3cdf8e1c5..bae243ad7f3b 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -689,11 +689,34 @@ DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag);
> DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag);
> DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid);
>
> -DEFINE_INODE_EVENT(xfs_filemap_fault);
> -DEFINE_INODE_EVENT(xfs_filemap_huge_fault);
> -DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
> DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite);
>
> +TRACE_EVENT(xfs_filemap_fault,
> + TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size,
> + bool write_fault),
> + TP_ARGS(ip, pe_size, write_fault),
> + TP_STRUCT__entry(
> + __field(dev_t, dev)
> + __field(xfs_ino_t, ino)
> + __field(enum page_entry_size, pe_size)
> + __field(bool, write_fault)
> + ),
> + TP_fast_assign(
> + __entry->dev = VFS_I(ip)->i_sb->s_dev;
> + __entry->ino = ip->i_ino;
> + __entry->pe_size = pe_size;
> + __entry->write_fault = write_fault;
> + ),
> + TP_printk("dev %d:%d ino 0x%llx %s write_fault %d",
> + MAJOR(__entry->dev), MINOR(__entry->dev),
> + __entry->ino,
> + __print_symbolic(__entry->pe_size,
> + { PE_SIZE_PTE, "PTE" },
> + { PE_SIZE_PMD, "PMD" },
> + { PE_SIZE_PUD, "PUD" }),
> + __entry->write_fault)
> +)
> +
> DECLARE_EVENT_CLASS(xfs_iref_class,
> TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
> TP_ARGS(ip, caller_ip),
> --
> 2.11.0
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: cleanup the page fault path V2
2017-08-29 16:26 cleanup the page fault path V2 Christoph Hellwig
2017-08-29 16:26 ` [PATCH 1/2] iomap: return VM_FAULT_* codes from iomap_page_mkwrite Christoph Hellwig
2017-08-29 16:26 ` [PATCH 2/2] xfs: consolidate the various page fault handlers Christoph Hellwig
@ 2017-08-30 16:09 ` Darrick J. Wong
2 siblings, 0 replies; 9+ messages in thread
From: Darrick J. Wong @ 2017-08-30 16:09 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs, jack
On Tue, Aug 29, 2017 at 06:26:11PM +0200, Christoph Hellwig wrote:
> Hi all,
>
> these two patches simplify the xfs/iomap page fault path. They came up in
> the context of MAP_SYNC support, but are useful on their own, and might be
> good to get into 4.14 before the real MAP_SYNC work starts hitting the
> tree in some form.
Seems to have passed a testing cycle, will queue for next...
--D
>
> Changes since V1:
> - drop the incorrect pfn consolidation
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] iomap: return VM_FAULT_* codes from iomap_page_mkwrite
2017-08-28 6:36 ` [PATCH 1/2] iomap: return VM_FAULT_* codes from iomap_page_mkwrite Christoph Hellwig
@ 2017-08-28 16:33 ` Darrick J. Wong
0 siblings, 0 replies; 9+ messages in thread
From: Darrick J. Wong @ 2017-08-28 16:33 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs, jack
On Mon, Aug 28, 2017 at 08:36:58AM +0200, Christoph Hellwig wrote:
> All callers will need the VM_FAULT_* flags, so convert in the helper.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
> fs/iomap.c | 4 ++--
> fs/xfs/xfs_file.c | 1 -
> 2 files changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/fs/iomap.c b/fs/iomap.c
> index 59cc98ad7577..8554a8d75281 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -477,10 +477,10 @@ int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
>
> set_page_dirty(page);
> wait_for_stable_page(page);
> - return 0;
> + return VM_FAULT_LOCKED;
> out_unlock:
> unlock_page(page);
> - return ret;
> + return block_page_mkwrite_return(ret);
> }
> EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
>
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index c4893e226fd8..8b0181c6d2a6 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1035,7 +1035,6 @@ xfs_filemap_page_mkwrite(
> ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
> } else {
> ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
> - ret = block_page_mkwrite_return(ret);
> }
>
> xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> --
> 2.11.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/2] iomap: return VM_FAULT_* codes from iomap_page_mkwrite
2017-08-28 6:36 cleanup the page faul path Christoph Hellwig
@ 2017-08-28 6:36 ` Christoph Hellwig
2017-08-28 16:33 ` Darrick J. Wong
0 siblings, 1 reply; 9+ messages in thread
From: Christoph Hellwig @ 2017-08-28 6:36 UTC (permalink / raw)
To: linux-xfs; +Cc: jack
All callers will need the VM_FAULT_* flags, so convert in the helper.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
fs/iomap.c | 4 ++--
fs/xfs/xfs_file.c | 1 -
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/fs/iomap.c b/fs/iomap.c
index 59cc98ad7577..8554a8d75281 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -477,10 +477,10 @@ int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
set_page_dirty(page);
wait_for_stable_page(page);
- return 0;
+ return VM_FAULT_LOCKED;
out_unlock:
unlock_page(page);
- return ret;
+ return block_page_mkwrite_return(ret);
}
EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index c4893e226fd8..8b0181c6d2a6 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1035,7 +1035,6 @@ xfs_filemap_page_mkwrite(
ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
} else {
ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
- ret = block_page_mkwrite_return(ret);
}
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
--
2.11.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2017-08-30 16:09 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-29 16:26 cleanup the page fault path V2 Christoph Hellwig
2017-08-29 16:26 ` [PATCH 1/2] iomap: return VM_FAULT_* codes from iomap_page_mkwrite Christoph Hellwig
2017-08-30 8:50 ` Jan Kara
2017-08-29 16:26 ` [PATCH 2/2] xfs: consolidate the various page fault handlers Christoph Hellwig
2017-08-29 18:48 ` Darrick J. Wong
2017-08-30 9:34 ` Jan Kara
2017-08-30 16:09 ` cleanup the page fault path V2 Darrick J. Wong
-- strict thread matches above, loose matches on Subject: below --
2017-08-28 6:36 cleanup the page faul path Christoph Hellwig
2017-08-28 6:36 ` [PATCH 1/2] iomap: return VM_FAULT_* codes from iomap_page_mkwrite Christoph Hellwig
2017-08-28 16:33 ` Darrick J. Wong
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.