All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Kirill A. Shutemov" <kirill@shutemov.name>
To: Matthew Wilcox <matthew.r.wilcox@intel.com>
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, Matthew Wilcox <willy@linux.intel.com>
Subject: Re: [PATCH 03/10] thp: Prepare for DAX huge pages
Date: Sun, 19 Jul 2015 14:03:05 +0300	[thread overview]
Message-ID: <20150719110305.GA2341@node.dhcp.inet.fi> (raw)
In-Reply-To: <1436560165-8943-4-git-send-email-matthew.r.wilcox@intel.com>

On Fri, Jul 10, 2015 at 04:29:18PM -0400, Matthew Wilcox wrote:
> From: Matthew Wilcox <willy@linux.intel.com>
> 
> Add a vma_is_dax() helper macro to test whether the VMA is DAX, and use
> it in zap_huge_pmd() and __split_huge_page_pmd().
> 
> Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
> ---
>  include/linux/dax.h |  4 ++++
>  mm/huge_memory.c    | 46 ++++++++++++++++++++++++++++------------------
>  2 files changed, 32 insertions(+), 18 deletions(-)
> 
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index 4f27d3d..9b51f9d 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -18,4 +18,8 @@ int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
>  #define dax_mkwrite(vma, vmf, gb, iod)		dax_fault(vma, vmf, gb, iod)
>  #define __dax_mkwrite(vma, vmf, gb, iod)	__dax_fault(vma, vmf, gb, iod)
>  
> +static inline bool vma_is_dax(struct vm_area_struct *vma)
> +{
> +	return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
> +}
>  #endif
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 911071b..b7bd855 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -23,6 +23,7 @@
>  #include <linux/pagemap.h>
>  #include <linux/migrate.h>
>  #include <linux/hashtable.h>
> +#include <linux/dax.h>
>  
>  #include <asm/tlb.h>
>  #include <asm/pgalloc.h>
> @@ -1391,7 +1392,6 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  	int ret = 0;
>  
>  	if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
> -		struct page *page;
>  		pgtable_t pgtable;
>  		pmd_t orig_pmd;
>  		/*
> @@ -1403,13 +1403,22 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  		orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
>  							tlb->fullmm);
>  		tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
> -		pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
> +		if (vma_is_dax(vma)) {
> +			if (is_huge_zero_pmd(orig_pmd)) {
> +				pgtable = NULL;

pgtable_t is not always a pointer. See arch/arc.

> +			} else {
> +				spin_unlock(ptl);
> +				return 1;
> +			}
> +		} else {
> +			pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
> +		}
>  		if (is_huge_zero_pmd(orig_pmd)) {
>  			atomic_long_dec(&tlb->mm->nr_ptes);
>  			spin_unlock(ptl);
>  			put_huge_zero_page();
>  		} else {
> -			page = pmd_page(orig_pmd);
> +			struct page *page = pmd_page(orig_pmd);
>  			page_remove_rmap(page);
>  			VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
>  			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
> @@ -1418,7 +1427,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  			spin_unlock(ptl);
>  			tlb_remove_page(tlb, page);
>  		}
> -		pte_free(tlb->mm, pgtable);
> +		if (pgtable)
> +			pte_free(tlb->mm, pgtable);

It's better to drop "pgtable = NULL;" above and use "if (vma_is_dax(vma))"
here.

>  		ret = 1;
>  	}
>  	return ret;
> @@ -2887,7 +2897,7 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
>  		pmd_t *pmd)
>  {
>  	spinlock_t *ptl;
> -	struct page *page;
> +	struct page *page = NULL;
>  	struct mm_struct *mm = vma->vm_mm;
>  	unsigned long haddr = address & HPAGE_PMD_MASK;
>  	unsigned long mmun_start;	/* For mmu_notifiers */
> @@ -2900,25 +2910,25 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
>  again:
>  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
>  	ptl = pmd_lock(mm, pmd);
> -	if (unlikely(!pmd_trans_huge(*pmd))) {
> -		spin_unlock(ptl);
> -		mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
> -		return;
> -	}
> -	if (is_huge_zero_pmd(*pmd)) {
> +	if (unlikely(!pmd_trans_huge(*pmd)))
> +		goto unlock;
> +	if (vma_is_dax(vma)) {
> +		pmdp_huge_clear_flush(vma, haddr, pmd);

pmdp_huge_clear_flush_notify()

> +	} else if (is_huge_zero_pmd(*pmd)) {
>  		__split_huge_zero_page_pmd(vma, haddr, pmd);
> -		spin_unlock(ptl);
> -		mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
> -		return;
> +	} else {
> +		page = pmd_page(*pmd);
> +		VM_BUG_ON_PAGE(!page_count(page), page);
> +		get_page(page);
>  	}
> -	page = pmd_page(*pmd);
> -	VM_BUG_ON_PAGE(!page_count(page), page);
> -	get_page(page);
> + unlock:
>  	spin_unlock(ptl);
>  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
>  
> -	split_huge_page(page);
> +	if (!page)
> +		return;
>  
> +	split_huge_page(page);
>  	put_page(page);
>  
>  	/*
> -- 
> 2.1.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
 Kirill A. Shutemov

WARNING: multiple messages have this Message-ID (diff)
From: "Kirill A. Shutemov" <kirill@shutemov.name>
To: Matthew Wilcox <matthew.r.wilcox@intel.com>
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, Matthew Wilcox <willy@linux.intel.com>
Subject: Re: [PATCH 03/10] thp: Prepare for DAX huge pages
Date: Sun, 19 Jul 2015 14:03:05 +0300	[thread overview]
Message-ID: <20150719110305.GA2341@node.dhcp.inet.fi> (raw)
In-Reply-To: <1436560165-8943-4-git-send-email-matthew.r.wilcox@intel.com>

On Fri, Jul 10, 2015 at 04:29:18PM -0400, Matthew Wilcox wrote:
> From: Matthew Wilcox <willy@linux.intel.com>
> 
> Add a vma_is_dax() helper macro to test whether the VMA is DAX, and use
> it in zap_huge_pmd() and __split_huge_page_pmd().
> 
> Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
> ---
>  include/linux/dax.h |  4 ++++
>  mm/huge_memory.c    | 46 ++++++++++++++++++++++++++++------------------
>  2 files changed, 32 insertions(+), 18 deletions(-)
> 
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index 4f27d3d..9b51f9d 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -18,4 +18,8 @@ int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
>  #define dax_mkwrite(vma, vmf, gb, iod)		dax_fault(vma, vmf, gb, iod)
>  #define __dax_mkwrite(vma, vmf, gb, iod)	__dax_fault(vma, vmf, gb, iod)
>  
> +static inline bool vma_is_dax(struct vm_area_struct *vma)
> +{
> +	return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
> +}
>  #endif
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 911071b..b7bd855 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -23,6 +23,7 @@
>  #include <linux/pagemap.h>
>  #include <linux/migrate.h>
>  #include <linux/hashtable.h>
> +#include <linux/dax.h>
>  
>  #include <asm/tlb.h>
>  #include <asm/pgalloc.h>
> @@ -1391,7 +1392,6 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  	int ret = 0;
>  
>  	if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
> -		struct page *page;
>  		pgtable_t pgtable;
>  		pmd_t orig_pmd;
>  		/*
> @@ -1403,13 +1403,22 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  		orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
>  							tlb->fullmm);
>  		tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
> -		pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
> +		if (vma_is_dax(vma)) {
> +			if (is_huge_zero_pmd(orig_pmd)) {
> +				pgtable = NULL;

pgtable_t is not always a pointer. See arch/arc.

> +			} else {
> +				spin_unlock(ptl);
> +				return 1;
> +			}
> +		} else {
> +			pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
> +		}
>  		if (is_huge_zero_pmd(orig_pmd)) {
>  			atomic_long_dec(&tlb->mm->nr_ptes);
>  			spin_unlock(ptl);
>  			put_huge_zero_page();
>  		} else {
> -			page = pmd_page(orig_pmd);
> +			struct page *page = pmd_page(orig_pmd);
>  			page_remove_rmap(page);
>  			VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
>  			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
> @@ -1418,7 +1427,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  			spin_unlock(ptl);
>  			tlb_remove_page(tlb, page);
>  		}
> -		pte_free(tlb->mm, pgtable);
> +		if (pgtable)
> +			pte_free(tlb->mm, pgtable);

It's better to drop "pgtable = NULL;" above and use "if (vma_is_dax(vma))"
here.

>  		ret = 1;
>  	}
>  	return ret;
> @@ -2887,7 +2897,7 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
>  		pmd_t *pmd)
>  {
>  	spinlock_t *ptl;
> -	struct page *page;
> +	struct page *page = NULL;
>  	struct mm_struct *mm = vma->vm_mm;
>  	unsigned long haddr = address & HPAGE_PMD_MASK;
>  	unsigned long mmun_start;	/* For mmu_notifiers */
> @@ -2900,25 +2910,25 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
>  again:
>  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
>  	ptl = pmd_lock(mm, pmd);
> -	if (unlikely(!pmd_trans_huge(*pmd))) {
> -		spin_unlock(ptl);
> -		mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
> -		return;
> -	}
> -	if (is_huge_zero_pmd(*pmd)) {
> +	if (unlikely(!pmd_trans_huge(*pmd)))
> +		goto unlock;
> +	if (vma_is_dax(vma)) {
> +		pmdp_huge_clear_flush(vma, haddr, pmd);

pmdp_huge_clear_flush_notify()

> +	} else if (is_huge_zero_pmd(*pmd)) {
>  		__split_huge_zero_page_pmd(vma, haddr, pmd);
> -		spin_unlock(ptl);
> -		mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
> -		return;
> +	} else {
> +		page = pmd_page(*pmd);
> +		VM_BUG_ON_PAGE(!page_count(page), page);
> +		get_page(page);
>  	}
> -	page = pmd_page(*pmd);
> -	VM_BUG_ON_PAGE(!page_count(page), page);
> -	get_page(page);
> + unlock:
>  	spin_unlock(ptl);
>  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
>  
> -	split_huge_page(page);
> +	if (!page)
> +		return;
>  
> +	split_huge_page(page);
>  	put_page(page);
>  
>  	/*
> -- 
> 2.1.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
 Kirill A. Shutemov

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2015-07-19 11:03 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-10 20:29 [PATCH 00/10] Huge page support for DAX files Matthew Wilcox
2015-07-10 20:29 ` Matthew Wilcox
2015-07-10 20:29 ` [PATCH 01/10] thp: vma_adjust_trans_huge(): adjust file-backed VMA too Matthew Wilcox
2015-07-10 20:29   ` Matthew Wilcox
2015-07-10 20:29 ` [PATCH 02/10] dax: Move DAX-related functions to a new header Matthew Wilcox
2015-07-10 20:29   ` Matthew Wilcox
2015-07-10 20:29 ` [PATCH 03/10] thp: Prepare for DAX huge pages Matthew Wilcox
2015-07-10 20:29   ` Matthew Wilcox
2015-07-19 11:03   ` Kirill A. Shutemov [this message]
2015-07-19 11:03     ` Kirill A. Shutemov
2015-07-10 20:29 ` [PATCH 04/10] mm: Add a pmd_fault handler Matthew Wilcox
2015-07-10 20:29   ` Matthew Wilcox
2015-07-10 20:29 ` [PATCH 05/10] mm: Export various functions for the benefit of DAX Matthew Wilcox
2015-07-10 20:29   ` Matthew Wilcox
2015-07-10 20:29 ` [PATCH 06/10] mm: Add vmf_insert_pfn_pmd() Matthew Wilcox
2015-07-10 20:29   ` Matthew Wilcox
2015-07-13 13:23   ` Jeff Moyer
2015-07-13 13:23     ` Jeff Moyer
2015-07-13 15:02     ` Matthew Wilcox
2015-07-13 15:02       ` Matthew Wilcox
2015-07-10 20:29 ` [PATCH 07/10] dax: Add huge page fault support Matthew Wilcox
2015-07-10 20:29   ` Matthew Wilcox
2015-07-13 15:05   ` Jan Kara
2015-07-13 15:05     ` Jan Kara
2015-07-13 15:33     ` Matthew Wilcox
2015-07-13 15:33       ` Matthew Wilcox
2015-07-10 20:29 ` [PATCH 08/10] ext2: Huge " Matthew Wilcox
2015-07-10 20:29   ` Matthew Wilcox
2015-07-10 20:29 ` [PATCH 09/10] ext4: " Matthew Wilcox
2015-07-10 20:29   ` Matthew Wilcox
2015-07-10 20:29 ` [PATCH 10/10] xfs: " Matthew Wilcox
2015-07-10 20:29   ` Matthew Wilcox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150719110305.GA2341@node.dhcp.inet.fi \
    --to=kirill@shutemov.name \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=matthew.r.wilcox@intel.com \
    --cc=willy@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.