All of lore.kernel.org
 help / color / mirror / Atom feed
From: Michal Hocko <mhocko@suse.com>
To: Dave Jiang <dave.jiang@intel.com>
Cc: jack@suse.cz, linux-nvdimm@lists.01.org, david@fromorbit.com,
	linux-mm@kvack.org, tytso@mit.edu, akpm@linux-foundation.org,
	hch@lst.de
Subject: Re: [PATCH v3 1/3] dax: masking off __GFP_FS in fs DAX handlers
Date: Fri, 16 Dec 2016 09:34:42 +0100	[thread overview]
Message-ID: <20161216083442.GA14105@dhcp22.suse.cz> (raw)
In-Reply-To: <148183505925.96369.9987658623875784437.stgit@djiang5-desk3.ch.intel.com>

On Thu 15-12-16 13:50:59, Dave Jiang wrote:
> The caller into dax needs to clear __GFP_FS mask bit since it's
> responsible for acquiring locks / transactions that blocks __GFP_FS
> allocation.  The caller will restore the original mask when dax function
> returns.

Could you have a look at [1]. Does the new API look like a fit for your
use case here? It at least sounds a bit easier from the code POV to me.

[1] http://lkml.kernel.org/r/20161215140715.12732-1-mhocko@kernel.org

> 
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> Reviewed-by: Jan Kara <jack@suse.cz>
> ---
>  fs/dax.c          |    1 +
>  fs/ext2/file.c    |    9 ++++++++-
>  fs/ext4/file.c    |   10 +++++++++-
>  fs/xfs/xfs_file.c |   14 +++++++++++++-
>  4 files changed, 31 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index d3fe880..6395bc6 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1380,6 +1380,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
>  	vmf.pgoff = pgoff;
>  	vmf.flags = flags;
>  	vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
> +	vmf.gfp_mask &= ~__GFP_FS;
>  
>  	switch (iomap.type) {
>  	case IOMAP_MAPPED:
> diff --git a/fs/ext2/file.c b/fs/ext2/file.c
> index b0f2415..8422d5f 100644
> --- a/fs/ext2/file.c
> +++ b/fs/ext2/file.c
> @@ -92,16 +92,19 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	struct inode *inode = file_inode(vma->vm_file);
>  	struct ext2_inode_info *ei = EXT2_I(inode);
>  	int ret;
> +	gfp_t old_gfp = vmf->gfp_mask;
>  
>  	if (vmf->flags & FAULT_FLAG_WRITE) {
>  		sb_start_pagefault(inode->i_sb);
>  		file_update_time(vma->vm_file);
>  	}
> +	vmf->gfp_mask &= ~__GFP_FS;
>  	down_read(&ei->dax_sem);
>  
>  	ret = dax_iomap_fault(vma, vmf, &ext2_iomap_ops);
>  
>  	up_read(&ei->dax_sem);
> +	vmf->gfp_mask = old_gfp;
>  	if (vmf->flags & FAULT_FLAG_WRITE)
>  		sb_end_pagefault(inode->i_sb);
>  	return ret;
> @@ -114,6 +117,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
>  	struct ext2_inode_info *ei = EXT2_I(inode);
>  	loff_t size;
>  	int ret;
> +	gfp_t old_gfp = vmf->gfp_mask;
>  
>  	sb_start_pagefault(inode->i_sb);
>  	file_update_time(vma->vm_file);
> @@ -123,8 +127,11 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
>  	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
>  	if (vmf->pgoff >= size)
>  		ret = VM_FAULT_SIGBUS;
> -	else
> +	else {
> +		vmf->gfp_mask &= ~__GFP_FS;
>  		ret = dax_pfn_mkwrite(vma, vmf);
> +		vmf->gfp_mask = old_gfp;
> +	}
>  
>  	up_read(&ei->dax_sem);
>  	sb_end_pagefault(inode->i_sb);
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index d663d3d..a3f2bf0 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -261,14 +261,17 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	struct inode *inode = file_inode(vma->vm_file);
>  	struct super_block *sb = inode->i_sb;
>  	bool write = vmf->flags & FAULT_FLAG_WRITE;
> +	gfp_t old_gfp = vmf->gfp_mask;
>  
>  	if (write) {
>  		sb_start_pagefault(sb);
>  		file_update_time(vma->vm_file);
>  	}
> +	vmf->gfp_mask &= ~__GFP_FS;
>  	down_read(&EXT4_I(inode)->i_mmap_sem);
>  	result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
>  	up_read(&EXT4_I(inode)->i_mmap_sem);
> +	vmf->gfp_mask = old_gfp;
>  	if (write)
>  		sb_end_pagefault(sb);
>  
> @@ -320,8 +323,13 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
>  	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
>  	if (vmf->pgoff >= size)
>  		ret = VM_FAULT_SIGBUS;
> -	else
> +	else {
> +		gfp_t old_gfp = vmf->gfp_mask;
> +
> +		vmf->gfp_mask &= ~__GFP_FS;
>  		ret = dax_pfn_mkwrite(vma, vmf);
> +		vmf->gfp_mask = old_gfp;
> +	}
>  	up_read(&EXT4_I(inode)->i_mmap_sem);
>  	sb_end_pagefault(sb);
>  
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index d818c16..52202b4 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1474,7 +1474,11 @@ xfs_filemap_page_mkwrite(
>  	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
>  
>  	if (IS_DAX(inode)) {
> +		gfp_t old_gfp = vmf->gfp_mask;
> +
> +		vmf->gfp_mask &= ~__GFP_FS;
>  		ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
> +		vmf->gfp_mask = old_gfp;
>  	} else {
>  		ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
>  		ret = block_page_mkwrite_return(ret);
> @@ -1502,13 +1506,16 @@ xfs_filemap_fault(
>  
>  	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
>  	if (IS_DAX(inode)) {
> +		gfp_t old_gfp = vmf->gfp_mask;
>  		/*
>  		 * we do not want to trigger unwritten extent conversion on read
>  		 * faults - that is unnecessary overhead and would also require
>  		 * changes to xfs_get_blocks_direct() to map unwritten extent
>  		 * ioend for conversion on read-only mappings.
>  		 */
> +		vmf->gfp_mask &= ~__GFP_FS;
>  		ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
> +		vmf->gfp_mask = old_gfp;
>  	} else
>  		ret = filemap_fault(vma, vmf);
>  	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> @@ -1581,8 +1588,13 @@ xfs_filemap_pfn_mkwrite(
>  	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
>  	if (vmf->pgoff >= size)
>  		ret = VM_FAULT_SIGBUS;
> -	else if (IS_DAX(inode))
> +	else if (IS_DAX(inode)) {
> +		gfp_t old_gfp = vmf->gfp_mask;
> +
> +		vmf->gfp_mask &= ~__GFP_FS;
>  		ret = dax_pfn_mkwrite(vma, vmf);
> +		vmf->gfp_mask = old_gfp;
> +	}
>  	xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
>  	sb_end_pagefault(inode->i_sb);
>  	return ret;
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

-- 
Michal Hocko
SUSE Labs
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

WARNING: multiple messages have this Message-ID (diff)
From: Michal Hocko <mhocko@suse.com>
To: Dave Jiang <dave.jiang@intel.com>
Cc: akpm@linux-foundation.org, jack@suse.cz,
	linux-nvdimm@lists.01.org, david@fromorbit.com, hch@lst.de,
	linux-mm@kvack.org, tytso@mit.edu, ross.zwisler@linux.intel.com,
	dan.j.williams@intel.com
Subject: Re: [PATCH v3 1/3] dax: masking off __GFP_FS in fs DAX handlers
Date: Fri, 16 Dec 2016 09:34:42 +0100	[thread overview]
Message-ID: <20161216083442.GA14105@dhcp22.suse.cz> (raw)
In-Reply-To: <148183505925.96369.9987658623875784437.stgit@djiang5-desk3.ch.intel.com>

On Thu 15-12-16 13:50:59, Dave Jiang wrote:
> The caller into dax needs to clear __GFP_FS mask bit since it's
> responsible for acquiring locks / transactions that blocks __GFP_FS
> allocation.  The caller will restore the original mask when dax function
> returns.

Could you have a look at [1]. Does the new API look like a fit for your
use case here? It at least sounds a bit easier from the code POV to me.

[1] http://lkml.kernel.org/r/20161215140715.12732-1-mhocko@kernel.org

> 
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> Reviewed-by: Jan Kara <jack@suse.cz>
> ---
>  fs/dax.c          |    1 +
>  fs/ext2/file.c    |    9 ++++++++-
>  fs/ext4/file.c    |   10 +++++++++-
>  fs/xfs/xfs_file.c |   14 +++++++++++++-
>  4 files changed, 31 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index d3fe880..6395bc6 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1380,6 +1380,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
>  	vmf.pgoff = pgoff;
>  	vmf.flags = flags;
>  	vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
> +	vmf.gfp_mask &= ~__GFP_FS;
>  
>  	switch (iomap.type) {
>  	case IOMAP_MAPPED:
> diff --git a/fs/ext2/file.c b/fs/ext2/file.c
> index b0f2415..8422d5f 100644
> --- a/fs/ext2/file.c
> +++ b/fs/ext2/file.c
> @@ -92,16 +92,19 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	struct inode *inode = file_inode(vma->vm_file);
>  	struct ext2_inode_info *ei = EXT2_I(inode);
>  	int ret;
> +	gfp_t old_gfp = vmf->gfp_mask;
>  
>  	if (vmf->flags & FAULT_FLAG_WRITE) {
>  		sb_start_pagefault(inode->i_sb);
>  		file_update_time(vma->vm_file);
>  	}
> +	vmf->gfp_mask &= ~__GFP_FS;
>  	down_read(&ei->dax_sem);
>  
>  	ret = dax_iomap_fault(vma, vmf, &ext2_iomap_ops);
>  
>  	up_read(&ei->dax_sem);
> +	vmf->gfp_mask = old_gfp;
>  	if (vmf->flags & FAULT_FLAG_WRITE)
>  		sb_end_pagefault(inode->i_sb);
>  	return ret;
> @@ -114,6 +117,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
>  	struct ext2_inode_info *ei = EXT2_I(inode);
>  	loff_t size;
>  	int ret;
> +	gfp_t old_gfp = vmf->gfp_mask;
>  
>  	sb_start_pagefault(inode->i_sb);
>  	file_update_time(vma->vm_file);
> @@ -123,8 +127,11 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
>  	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
>  	if (vmf->pgoff >= size)
>  		ret = VM_FAULT_SIGBUS;
> -	else
> +	else {
> +		vmf->gfp_mask &= ~__GFP_FS;
>  		ret = dax_pfn_mkwrite(vma, vmf);
> +		vmf->gfp_mask = old_gfp;
> +	}
>  
>  	up_read(&ei->dax_sem);
>  	sb_end_pagefault(inode->i_sb);
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index d663d3d..a3f2bf0 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -261,14 +261,17 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	struct inode *inode = file_inode(vma->vm_file);
>  	struct super_block *sb = inode->i_sb;
>  	bool write = vmf->flags & FAULT_FLAG_WRITE;
> +	gfp_t old_gfp = vmf->gfp_mask;
>  
>  	if (write) {
>  		sb_start_pagefault(sb);
>  		file_update_time(vma->vm_file);
>  	}
> +	vmf->gfp_mask &= ~__GFP_FS;
>  	down_read(&EXT4_I(inode)->i_mmap_sem);
>  	result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
>  	up_read(&EXT4_I(inode)->i_mmap_sem);
> +	vmf->gfp_mask = old_gfp;
>  	if (write)
>  		sb_end_pagefault(sb);
>  
> @@ -320,8 +323,13 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
>  	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
>  	if (vmf->pgoff >= size)
>  		ret = VM_FAULT_SIGBUS;
> -	else
> +	else {
> +		gfp_t old_gfp = vmf->gfp_mask;
> +
> +		vmf->gfp_mask &= ~__GFP_FS;
>  		ret = dax_pfn_mkwrite(vma, vmf);
> +		vmf->gfp_mask = old_gfp;
> +	}
>  	up_read(&EXT4_I(inode)->i_mmap_sem);
>  	sb_end_pagefault(sb);
>  
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index d818c16..52202b4 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1474,7 +1474,11 @@ xfs_filemap_page_mkwrite(
>  	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
>  
>  	if (IS_DAX(inode)) {
> +		gfp_t old_gfp = vmf->gfp_mask;
> +
> +		vmf->gfp_mask &= ~__GFP_FS;
>  		ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
> +		vmf->gfp_mask = old_gfp;
>  	} else {
>  		ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
>  		ret = block_page_mkwrite_return(ret);
> @@ -1502,13 +1506,16 @@ xfs_filemap_fault(
>  
>  	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
>  	if (IS_DAX(inode)) {
> +		gfp_t old_gfp = vmf->gfp_mask;
>  		/*
>  		 * we do not want to trigger unwritten extent conversion on read
>  		 * faults - that is unnecessary overhead and would also require
>  		 * changes to xfs_get_blocks_direct() to map unwritten extent
>  		 * ioend for conversion on read-only mappings.
>  		 */
> +		vmf->gfp_mask &= ~__GFP_FS;
>  		ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
> +		vmf->gfp_mask = old_gfp;
>  	} else
>  		ret = filemap_fault(vma, vmf);
>  	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> @@ -1581,8 +1588,13 @@ xfs_filemap_pfn_mkwrite(
>  	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
>  	if (vmf->pgoff >= size)
>  		ret = VM_FAULT_SIGBUS;
> -	else if (IS_DAX(inode))
> +	else if (IS_DAX(inode)) {
> +		gfp_t old_gfp = vmf->gfp_mask;
> +
> +		vmf->gfp_mask &= ~__GFP_FS;
>  		ret = dax_pfn_mkwrite(vma, vmf);
> +		vmf->gfp_mask = old_gfp;
> +	}
>  	xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
>  	sb_end_pagefault(inode->i_sb);
>  	return ret;
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2016-12-16  8:34 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-12-15 20:50 [PATCH v3 1/3] dax: masking off __GFP_FS in fs DAX handlers Dave Jiang
2016-12-15 20:50 ` Dave Jiang
2016-12-15 20:51 ` [PATCH v3 2/3] mm, dax: make pmd_fault() and friends to be the same as fault() Dave Jiang
2016-12-15 20:51   ` Dave Jiang
2016-12-15 23:23   ` Ross Zwisler
2016-12-15 23:23     ` Ross Zwisler
2016-12-15 20:51 ` [PATCH v3 3/3] mm, dax: move pmd_fault() to take only vmf parameter Dave Jiang
2016-12-15 20:51   ` Dave Jiang
2016-12-15 23:23   ` Ross Zwisler
2016-12-15 23:23     ` Ross Zwisler
2016-12-16  7:17   ` Jan Kara
2016-12-16  7:17     ` Jan Kara
2016-12-16  8:34 ` Michal Hocko [this message]
2016-12-16  8:34   ` [PATCH v3 1/3] dax: masking off __GFP_FS in fs DAX handlers Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161216083442.GA14105@dhcp22.suse.cz \
    --to=mhocko@suse.com \
    --cc=akpm@linux-foundation.org \
    --cc=dave.jiang@intel.com \
    --cc=david@fromorbit.com \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.