Linux-MIPS Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 1/3] mm: set page fault address for update_mmu_cache_pmd
@ 2020-06-24  9:26 Bibo Mao
  2020-06-24  9:26 ` [PATCH 2/3] mm/huge_memory.c: update tlb entry if pmd is changed Bibo Mao
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Bibo Mao @ 2020-06-24  9:26 UTC (permalink / raw)
  To: Andrew Morton, Thomas Bogendoerfer, Paul Burton,
	Anshuman Khandual, Mike Rapoport, Daniel Silsby
  Cc: linux-mips, linux-kernel, linux-mm

update_mmu_cache_pmd is used to update tlb for the pmd entry by
software. On MIPS system, the tlb entry indexed by page fault
address maybe exists already, only that tlb entry may be small
page, also it may be huge page. Before updating pmd entry with
huge page size, older tlb entry need to be invalidated.

Here page fault address is passed to function update_mmu_cache_pmd,
rather than pmd huge page start address. The page fault address
can be used for invalidating older tlb entry.

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
---
 arch/mips/include/asm/pgtable.h | 9 +++++++++
 mm/huge_memory.c                | 7 ++++---
 mm/memory.c                     | 2 +-
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index dd7a0f5..bd81661 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -554,11 +554,20 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 #define	__HAVE_ARCH_UPDATE_MMU_TLB
 #define update_mmu_tlb	update_mmu_cache
 
+extern void local_flush_tlb_page(struct vm_area_struct *vma,
+				unsigned long page);
 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
 	unsigned long address, pmd_t *pmdp)
 {
 	pte_t pte = *(pte_t *)pmdp;
 
+	/*
+	 * If pmd_none is true, older tlb entry will be normal page.
+	 * here to invalidate older tlb entry indexed by address
+	 * parameter address must be page fault address rather than
+	 * start address of pmd huge page
+	 */
+	local_flush_tlb_page(vma, address);
 	__update_tlb(vma, address, pte);
 }
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 78c84be..0f9187b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -780,6 +780,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		pgtable_t pgtable)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	unsigned long start = addr & PMD_MASK;
 	pmd_t entry;
 	spinlock_t *ptl;
 
@@ -792,7 +793,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 			}
 			entry = pmd_mkyoung(*pmd);
 			entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
-			if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
+			if (pmdp_set_access_flags(vma, start, pmd, entry, 1))
 				update_mmu_cache_pmd(vma, addr, pmd);
 		}
 
@@ -813,7 +814,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		pgtable = NULL;
 	}
 
-	set_pmd_at(mm, addr, pmd, entry);
+	set_pmd_at(mm, start, pmd, entry);
 	update_mmu_cache_pmd(vma, addr, pmd);
 
 out_unlock:
@@ -864,7 +865,7 @@ vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
 
 	track_pfn_insert(vma, &pgprot, pfn);
 
-	insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
+	insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, pgprot, write, pgtable);
 	return VM_FAULT_NOPAGE;
 }
 EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd_prot);
diff --git a/mm/memory.c b/mm/memory.c
index dc7f354..c703458 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3592,7 +3592,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 
 	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
 
-	update_mmu_cache_pmd(vma, haddr, vmf->pmd);
+	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 
 	/* fault is handled */
 	ret = 0;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/3] mm/huge_memory.c: update tlb entry if pmd is changed
  2020-06-24  9:26 [PATCH 1/3] mm: set page fault address for update_mmu_cache_pmd Bibo Mao
@ 2020-06-24  9:26 ` Bibo Mao
  2020-06-25  0:30   ` Mike Kravetz
  2020-06-24  9:26 ` [PATCH 3/3] MIPS: Do not call flush_tlb_all when setting pmd entry Bibo Mao
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 12+ messages in thread
From: Bibo Mao @ 2020-06-24  9:26 UTC (permalink / raw)
  To: Andrew Morton, Thomas Bogendoerfer, Paul Burton,
	Anshuman Khandual, Mike Rapoport, Daniel Silsby
  Cc: linux-mips, linux-kernel, linux-mm

When set_pmd_at is called in function do_huge_pmd_anonymous_page,
new tlb entry can be added by software on MIPS platform.

Here add update_mmu_cache_pmd when pmd entry is set, and
update_mmu_cache_pmd is defined as empty excepts arc/mips platform.
This patch has no negative effect on other platforms except arc/mips
system.

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
---
 mm/huge_memory.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0f9187b..8b4ccf7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -643,6 +643,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		lru_cache_add_active_or_unevictable(page, vma);
 		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
 		set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
+		update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 		add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
 		mm_inc_nr_ptes(vma->vm_mm);
 		spin_unlock(vmf->ptl);
@@ -756,6 +757,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 			} else {
 				set_huge_zero_page(pgtable, vma->vm_mm, vma,
 						   haddr, vmf->pmd, zero_page);
+				update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 				spin_unlock(vmf->ptl);
 				set = true;
 			}
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 3/3] MIPS: Do not call flush_tlb_all when setting pmd entry
  2020-06-24  9:26 [PATCH 1/3] mm: set page fault address for update_mmu_cache_pmd Bibo Mao
  2020-06-24  9:26 ` [PATCH 2/3] mm/huge_memory.c: update tlb entry if pmd is changed Bibo Mao
@ 2020-06-24  9:26 ` Bibo Mao
  2020-06-24 19:49 ` [PATCH 1/3] mm: set page fault address for update_mmu_cache_pmd Andrew Morton
  2020-06-30 10:09 ` Kirill A. Shutemov
  3 siblings, 0 replies; 12+ messages in thread
From: Bibo Mao @ 2020-06-24  9:26 UTC (permalink / raw)
  To: Andrew Morton, Thomas Bogendoerfer, Paul Burton,
	Anshuman Khandual, Mike Rapoport, Daniel Silsby
  Cc: linux-mips, linux-kernel, linux-mm

Function set_pmd_at is to set pmd entry, if tlb entry need to
be flushed, there exists pmdp_huge_clear_flush alike function
before set_pmd_at is called. So it is not necessary to call
flush_tlb_all in this function.

In these scenarios, tlb for the pmd range needs to be flushed:
1. privilege degrade such as wrprotect is set on the pmd entry
2. pmd entry is cleared
3. there is exception if set_pmd_at is issued by dup_mmap, since
flush_tlb_mm is called for parent process, it is not necessary
to flush tlb in function copy_huge_pmd.

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
---
 arch/mips/mm/pgtable-32.c | 1 -
 arch/mips/mm/pgtable-64.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index bd4b065..61891af 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c
@@ -45,7 +45,6 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 		pmd_t *pmdp, pmd_t pmd)
 {
 	*pmdp = pmd;
-	flush_tlb_all();
 }
 #endif /* defined(CONFIG_TRANSPARENT_HUGEPAGE) */
 
diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
index 183ff9f..7536f78 100644
--- a/arch/mips/mm/pgtable-64.c
+++ b/arch/mips/mm/pgtable-64.c
@@ -100,7 +100,6 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 		pmd_t *pmdp, pmd_t pmd)
 {
 	*pmdp = pmd;
-	flush_tlb_all();
 }
 
 void __init pagetable_init(void)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] mm: set page fault address for update_mmu_cache_pmd
  2020-06-24  9:26 [PATCH 1/3] mm: set page fault address for update_mmu_cache_pmd Bibo Mao
  2020-06-24  9:26 ` [PATCH 2/3] mm/huge_memory.c: update tlb entry if pmd is changed Bibo Mao
  2020-06-24  9:26 ` [PATCH 3/3] MIPS: Do not call flush_tlb_all when setting pmd entry Bibo Mao
@ 2020-06-24 19:49 ` Andrew Morton
  2020-06-30 10:09 ` Kirill A. Shutemov
  3 siblings, 0 replies; 12+ messages in thread
From: Andrew Morton @ 2020-06-24 19:49 UTC (permalink / raw)
  To: Bibo Mao
  Cc: Thomas Bogendoerfer, Paul Burton, Anshuman Khandual,
	Mike Rapoport, Daniel Silsby, linux-mips, linux-kernel, linux-mm

On Wed, 24 Jun 2020 17:26:30 +0800 Bibo Mao <maobibo@loongson.cn> wrote:

> update_mmu_cache_pmd is used to update tlb for the pmd entry by
> software. On MIPS system, the tlb entry indexed by page fault
> address maybe exists already, only that tlb entry may be small
> page, also it may be huge page. Before updating pmd entry with
> huge page size, older tlb entry need to be invalidated.
> 
> Here page fault address is passed to function update_mmu_cache_pmd,
> rather than pmd huge page start address. The page fault address
> can be used for invalidating older tlb entry.
> 
> ...
>
> --- a/arch/mips/include/asm/pgtable.h
> +++ b/arch/mips/include/asm/pgtable.h
> @@ -554,11 +554,20 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
>  #define	__HAVE_ARCH_UPDATE_MMU_TLB
>  #define update_mmu_tlb	update_mmu_cache
>  
> +extern void local_flush_tlb_page(struct vm_area_struct *vma,
> +				unsigned long page);

This is unfortunate.  We can't #include <asm/'tlbflush.h>?

>  static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
>  	unsigned long address, pmd_t *pmdp)
>  {
>  	pte_t pte = *(pte_t *)pmdp;
>  
> +	/*
> +	 * If pmd_none is true, older tlb entry will be normal page.
> +	 * here to invalidate older tlb entry indexed by address
> +	 * parameter address must be page fault address rather than
> +	 * start address of pmd huge page
> +	 */
> +	local_flush_tlb_page(vma, address);
>  	__update_tlb(vma, address, pte);
>  }
>  


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3] mm/huge_memory.c: update tlb entry if pmd is changed
  2020-06-24  9:26 ` [PATCH 2/3] mm/huge_memory.c: update tlb entry if pmd is changed Bibo Mao
@ 2020-06-25  0:30   ` Mike Kravetz
  2020-06-25  9:57     ` maobibo
  2020-06-25 12:01     ` Aneesh Kumar K.V
  0 siblings, 2 replies; 12+ messages in thread
From: Mike Kravetz @ 2020-06-25  0:30 UTC (permalink / raw)
  To: Bibo Mao, Andrew Morton, Thomas Bogendoerfer, Paul Burton,
	Anshuman Khandual, Mike Rapoport, Daniel Silsby
  Cc: linux-mips, linux-kernel, linux-mm

On 6/24/20 2:26 AM, Bibo Mao wrote:
> When set_pmd_at is called in function do_huge_pmd_anonymous_page,
> new tlb entry can be added by software on MIPS platform.
> 
> Here add update_mmu_cache_pmd when pmd entry is set, and
> update_mmu_cache_pmd is defined as empty excepts arc/mips platform.
> This patch has no negative effect on other platforms except arc/mips
> system.

I am confused by this comment.  It appears that update_mmu_cache_pmd
is defined as non-empty on arc, mips, powerpc and sparc architectures.
Am I missing something?

If those architectures do provide update_mmu_cache_pmd, then the previous
patch and this one now call update_mmu_cache_pmd with the actual faulting
address instead of the huge page aligned address.  This was intentional
for mips.  However, are there any potential issues on the other architectures?
I am no expert in any of those architectures.  arc looks like it could be
problematic as update_mmu_cache_pmd calls update_mmu_cache and then
operates on (address & PAGE_MASK).  That could now be different.

-- 
Mike Kravetz

> 
> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
> ---
>  mm/huge_memory.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 0f9187b..8b4ccf7 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -643,6 +643,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
>  		lru_cache_add_active_or_unevictable(page, vma);
>  		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
>  		set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
> +		update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
>  		add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
>  		mm_inc_nr_ptes(vma->vm_mm);
>  		spin_unlock(vmf->ptl);
> @@ -756,6 +757,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
>  			} else {
>  				set_huge_zero_page(pgtable, vma->vm_mm, vma,
>  						   haddr, vmf->pmd, zero_page);
> +				update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
>  				spin_unlock(vmf->ptl);
>  				set = true;
>  			}
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3] mm/huge_memory.c: update tlb entry if pmd is changed
  2020-06-25  0:30   ` Mike Kravetz
@ 2020-06-25  9:57     ` maobibo
  2020-06-25 12:01     ` Aneesh Kumar K.V
  1 sibling, 0 replies; 12+ messages in thread
From: maobibo @ 2020-06-25  9:57 UTC (permalink / raw)
  To: Mike Kravetz, Andrew Morton, Thomas Bogendoerfer, Paul Burton,
	Anshuman Khandual, Mike Rapoport, Daniel Silsby
  Cc: linux-mips, linux-kernel, linux-mm



On 06/25/2020 08:30 AM, Mike Kravetz wrote:
> On 6/24/20 2:26 AM, Bibo Mao wrote:
>> When set_pmd_at is called in function do_huge_pmd_anonymous_page,
>> new tlb entry can be added by software on MIPS platform.
>>
>> Here add update_mmu_cache_pmd when pmd entry is set, and
>> update_mmu_cache_pmd is defined as empty excepts arc/mips platform.
>> This patch has no negative effect on other platforms except arc/mips
>> system.
> 
> I am confused by this comment.  It appears that update_mmu_cache_pmd
> is defined as non-empty on arc, mips, powerpc and sparc architectures.
> Am I missing something?
ohh, sparc is missing here, it not defined as empty. On powerpc it is defined
as empty.

> 
> If those architectures do provide update_mmu_cache_pmd, then the previous
> patch and this one now call update_mmu_cache_pmd with the actual faulting
> address instead of the huge page aligned address.  This was intentional
> for mips.  However, are there any potential issues on the other architectures?
It is not special for mips, only that fault address is useful on mips system.
In function huge_pmd_set_accessed/do_huge_pmd_wp_page, update_mmu_cache_pmd is
called with vmf->address, rather than start address of pmd page.

regards
bibo,mao

> I am no expert in any of those architectures.  arc looks like it could be
> problematic as update_mmu_cache_pmd calls update_mmu_cache and then
> operates on (address & PAGE_MASK).  That could now be different.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3] mm/huge_memory.c: update tlb entry if pmd is changed
  2020-06-25  0:30   ` Mike Kravetz
  2020-06-25  9:57     ` maobibo
@ 2020-06-25 12:01     ` Aneesh Kumar K.V
  2020-06-25 16:46       ` Mike Kravetz
  1 sibling, 1 reply; 12+ messages in thread
From: Aneesh Kumar K.V @ 2020-06-25 12:01 UTC (permalink / raw)
  To: Mike Kravetz, Bibo Mao, Andrew Morton, Thomas Bogendoerfer,
	Paul Burton, Anshuman Khandual, Mike Rapoport, Daniel Silsby
  Cc: linux-mips, linux-kernel, linux-mm

Mike Kravetz <mike.kravetz@oracle.com> writes:

> On 6/24/20 2:26 AM, Bibo Mao wrote:
>> When set_pmd_at is called in function do_huge_pmd_anonymous_page,
>> new tlb entry can be added by software on MIPS platform.
>> 
>> Here add update_mmu_cache_pmd when pmd entry is set, and
>> update_mmu_cache_pmd is defined as empty excepts arc/mips platform.
>> This patch has no negative effect on other platforms except arc/mips
>> system.
>
> I am confused by this comment.  It appears that update_mmu_cache_pmd
> is defined as non-empty on arc, mips, powerpc and sparc architectures.
> Am I missing something?
>
> If those architectures do provide update_mmu_cache_pmd, then the previous
> patch and this one now call update_mmu_cache_pmd with the actual faulting
> address instead of the huge page aligned address.  This was intentional
> for mips.  However, are there any potential issues on the other architectures?
> I am no expert in any of those architectures.  arc looks like it could be
> problematic as update_mmu_cache_pmd calls update_mmu_cache and then
> operates on (address & PAGE_MASK).  That could now be different.
>

Also we added update_mmu_cache_pmd to update a THP entry. That could be
different from a hugetlb entry on some architectures. If we need to do
hugetlb equivalent for update_mmu_cache, we should add a different
function.

-aneesh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3] mm/huge_memory.c: update tlb entry if pmd is changed
  2020-06-25 12:01     ` Aneesh Kumar K.V
@ 2020-06-25 16:46       ` Mike Kravetz
  2020-06-26  8:13         ` Aneesh Kumar K.V
  0 siblings, 1 reply; 12+ messages in thread
From: Mike Kravetz @ 2020-06-25 16:46 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Bibo Mao, Andrew Morton, Thomas Bogendoerfer,
	Paul Burton, Anshuman Khandual, Mike Rapoport, Daniel Silsby
  Cc: linux-mips, linux-kernel, linux-mm

On 6/25/20 5:01 AM, Aneesh Kumar K.V wrote:
> Mike Kravetz <mike.kravetz@oracle.com> writes:
> 
>> On 6/24/20 2:26 AM, Bibo Mao wrote:
>>> When set_pmd_at is called in function do_huge_pmd_anonymous_page,
>>> new tlb entry can be added by software on MIPS platform.
>>>
>>> Here add update_mmu_cache_pmd when pmd entry is set, and
>>> update_mmu_cache_pmd is defined as empty excepts arc/mips platform.
>>> This patch has no negative effect on other platforms except arc/mips
>>> system.
>>
>> I am confused by this comment.  It appears that update_mmu_cache_pmd
>> is defined as non-empty on arc, mips, powerpc and sparc architectures.
>> Am I missing something?
>>
>> If those architectures do provide update_mmu_cache_pmd, then the previous
>> patch and this one now call update_mmu_cache_pmd with the actual faulting
>> address instead of the huge page aligned address.  This was intentional
>> for mips.  However, are there any potential issues on the other architectures?
>> I am no expert in any of those architectures.  arc looks like it could be
>> problematic as update_mmu_cache_pmd calls update_mmu_cache and then
>> operates on (address & PAGE_MASK).  That could now be different.
>>
> 
> Also we added update_mmu_cache_pmd to update a THP entry. That could be
> different from a hugetlb entry on some architectures. If we need to do
> hugetlb equivalent for update_mmu_cache, we should add a different
> function.

I do not know the mips architecture well enough or if the motivation for
this patch was based on THP or hugetlb pages.  However, it will change
the address passed to update_mmu_cache_pmd from huge page aligned to the
actual faulting address.  Will such a change in the passed address impact
the powerpc update_mmu_cache_pmd routine?

-- 
Mike Kravetz

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3] mm/huge_memory.c: update tlb entry if pmd is changed
  2020-06-25 16:46       ` Mike Kravetz
@ 2020-06-26  8:13         ` Aneesh Kumar K.V
  0 siblings, 0 replies; 12+ messages in thread
From: Aneesh Kumar K.V @ 2020-06-26  8:13 UTC (permalink / raw)
  To: Mike Kravetz, Bibo Mao, Andrew Morton, Thomas Bogendoerfer,
	Paul Burton, Anshuman Khandual, Mike Rapoport, Daniel Silsby
  Cc: linux-mips, linux-kernel, linux-mm

On 6/25/20 10:16 PM, Mike Kravetz wrote:
> On 6/25/20 5:01 AM, Aneesh Kumar K.V wrote:
>> Mike Kravetz <mike.kravetz@oracle.com> writes:
>>
>>> On 6/24/20 2:26 AM, Bibo Mao wrote:
>>>> When set_pmd_at is called in function do_huge_pmd_anonymous_page,
>>>> new tlb entry can be added by software on MIPS platform.
>>>>
>>>> Here add update_mmu_cache_pmd when pmd entry is set, and
>>>> update_mmu_cache_pmd is defined as empty excepts arc/mips platform.
>>>> This patch has no negative effect on other platforms except arc/mips
>>>> system.
>>>
>>> I am confused by this comment.  It appears that update_mmu_cache_pmd
>>> is defined as non-empty on arc, mips, powerpc and sparc architectures.
>>> Am I missing something?
>>>
>>> If those architectures do provide update_mmu_cache_pmd, then the previous
>>> patch and this one now call update_mmu_cache_pmd with the actual faulting
>>> address instead of the huge page aligned address.  This was intentional
>>> for mips.  However, are there any potential issues on the other architectures?
>>> I am no expert in any of those architectures.  arc looks like it could be
>>> problematic as update_mmu_cache_pmd calls update_mmu_cache and then
>>> operates on (address & PAGE_MASK).  That could now be different.
>>>
>>
>> Also we added update_mmu_cache_pmd to update a THP entry. That could be
>> different from a hugetlb entry on some architectures. If we need to do
>> hugetlb equivalent for update_mmu_cache, we should add a different
>> function.
> 
> I do not know the mips architecture well enough or if the motivation for
> this patch was based on THP or hugetlb pages.  However, it will change
> the address passed to update_mmu_cache_pmd from huge page aligned to the
> actual faulting address.  Will such a change in the passed address impact
> the powerpc update_mmu_cache_pmd routine?
> 

Right now powerpc update_mmu_cache_pmd() is a dummy function. But I 
agree we should audit arch to make sure such a change can work with 
architectures. My comment was related to the fact that mmu cache update 
w.r.t THP and hugetlb can be different on some platforms. So we may
want to avoid using the same function for both.

-aneesh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] mm: set page fault address for update_mmu_cache_pmd
  2020-06-24  9:26 [PATCH 1/3] mm: set page fault address for update_mmu_cache_pmd Bibo Mao
                   ` (2 preceding siblings ...)
  2020-06-24 19:49 ` [PATCH 1/3] mm: set page fault address for update_mmu_cache_pmd Andrew Morton
@ 2020-06-30 10:09 ` Kirill A. Shutemov
  2020-06-30 10:42   ` maobibo
  3 siblings, 1 reply; 12+ messages in thread
From: Kirill A. Shutemov @ 2020-06-30 10:09 UTC (permalink / raw)
  To: Bibo Mao
  Cc: Andrew Morton, Thomas Bogendoerfer, Paul Burton,
	Anshuman Khandual, Mike Rapoport, Daniel Silsby, linux-mips,
	linux-kernel, linux-mm

On Wed, Jun 24, 2020 at 05:26:30PM +0800, Bibo Mao wrote:
> update_mmu_cache_pmd is used to update tlb for the pmd entry by
> software. On MIPS system, the tlb entry indexed by page fault
> address maybe exists already, only that tlb entry may be small
> page, also it may be huge page. Before updating pmd entry with
> huge page size, older tlb entry need to be invalidated.
> 
> Here page fault address is passed to function update_mmu_cache_pmd,
> rather than pmd huge page start address. The page fault address
> can be used for invalidating older tlb entry.
> 
> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
> ---
>  arch/mips/include/asm/pgtable.h | 9 +++++++++
>  mm/huge_memory.c                | 7 ++++---
>  mm/memory.c                     | 2 +-
>  3 files changed, 14 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
> index dd7a0f5..bd81661 100644
> --- a/arch/mips/include/asm/pgtable.h
> +++ b/arch/mips/include/asm/pgtable.h
> @@ -554,11 +554,20 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
>  #define	__HAVE_ARCH_UPDATE_MMU_TLB
>  #define update_mmu_tlb	update_mmu_cache
>  
> +extern void local_flush_tlb_page(struct vm_area_struct *vma,
> +				unsigned long page);
>  static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
>  	unsigned long address, pmd_t *pmdp)
>  {
>  	pte_t pte = *(pte_t *)pmdp;
>  
> +	/*
> +	 * If pmd_none is true, older tlb entry will be normal page.
> +	 * here to invalidate older tlb entry indexed by address
> +	 * parameter address must be page fault address rather than
> +	 * start address of pmd huge page
> +	 */
> +	local_flush_tlb_page(vma, address);

Can't say I follow what is going on.

Why local? What happens on SMP?

And don't you want to flush PMD_SIZE range around the address?

>  	__update_tlb(vma, address, pte);
>  }
>  
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 78c84be..0f9187b 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -780,6 +780,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>  		pgtable_t pgtable)
>  {
>  	struct mm_struct *mm = vma->vm_mm;
> +	unsigned long start = addr & PMD_MASK;
>  	pmd_t entry;
>  	spinlock_t *ptl;
>  
> @@ -792,7 +793,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>  			}
>  			entry = pmd_mkyoung(*pmd);
>  			entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
> -			if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
> +			if (pmdp_set_access_flags(vma, start, pmd, entry, 1))
>  				update_mmu_cache_pmd(vma, addr, pmd);
>  		}
>  
> @@ -813,7 +814,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>  		pgtable = NULL;
>  	}
>  
> -	set_pmd_at(mm, addr, pmd, entry);
> +	set_pmd_at(mm, start, pmd, entry);
>  	update_mmu_cache_pmd(vma, addr, pmd);
>  
>  out_unlock:
> @@ -864,7 +865,7 @@ vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
>  
>  	track_pfn_insert(vma, &pgprot, pfn);
>  
> -	insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
> +	insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, pgprot, write, pgtable);
>  	return VM_FAULT_NOPAGE;
>  }
>  EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd_prot);
> diff --git a/mm/memory.c b/mm/memory.c
> index dc7f354..c703458 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3592,7 +3592,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
>  
>  	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
>  
> -	update_mmu_cache_pmd(vma, haddr, vmf->pmd);
> +	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
>  
>  	/* fault is handled */
>  	ret = 0;
> -- 
> 1.8.3.1
> 
> 

-- 
 Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] mm: set page fault address for update_mmu_cache_pmd
  2020-06-30 10:09 ` Kirill A. Shutemov
@ 2020-06-30 10:42   ` maobibo
  2020-07-01  2:54     ` maobibo
  0 siblings, 1 reply; 12+ messages in thread
From: maobibo @ 2020-06-30 10:42 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Andrew Morton, Thomas Bogendoerfer, Paul Burton,
	Anshuman Khandual, Mike Rapoport, Daniel Silsby, linux-mips,
	linux-kernel, linux-mm



On 06/30/2020 06:09 PM, Kirill A. Shutemov wrote:
> On Wed, Jun 24, 2020 at 05:26:30PM +0800, Bibo Mao wrote:
>> update_mmu_cache_pmd is used to update tlb for the pmd entry by
>> software. On MIPS system, the tlb entry indexed by page fault
>> address maybe exists already, only that tlb entry may be small
>> page, also it may be huge page. Before updating pmd entry with
>> huge page size, older tlb entry need to be invalidated.
>>
>> Here page fault address is passed to function update_mmu_cache_pmd,
>> rather than pmd huge page start address. The page fault address
>> can be used for invalidating older tlb entry.
>>
>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
>> ---
>>  arch/mips/include/asm/pgtable.h | 9 +++++++++
>>  mm/huge_memory.c                | 7 ++++---
>>  mm/memory.c                     | 2 +-
>>  3 files changed, 14 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
>> index dd7a0f5..bd81661 100644
>> --- a/arch/mips/include/asm/pgtable.h
>> +++ b/arch/mips/include/asm/pgtable.h
>> @@ -554,11 +554,20 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
>>  #define	__HAVE_ARCH_UPDATE_MMU_TLB
>>  #define update_mmu_tlb	update_mmu_cache
>>  
>> +extern void local_flush_tlb_page(struct vm_area_struct *vma,
>> +				unsigned long page);
>>  static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
>>  	unsigned long address, pmd_t *pmdp)
>>  {
>>  	pte_t pte = *(pte_t *)pmdp;
>>  
>> +	/*
>> +	 * If pmd_none is true, older tlb entry will be normal page.
>> +	 * here to invalidate older tlb entry indexed by address
>> +	 * parameter address must be page fault address rather than
>> +	 * start address of pmd huge page
>> +	 */
>> +	local_flush_tlb_page(vma, address);
> 
> Can't say I follow what is going on.
> 
> Why local? What happens on SMP?
> 
> And don't you want to flush PMD_SIZE range around the address?
There exists two conditions:
1. The address is accessed for the first time, there will be one tlb entry with normal page
   size, and privilege for the tlb entry is none. If new tlb entry wants to be added with
   huge page size, the older tlb entry needs to be removed.  Local flushing is enough, if there
   are smp threads running, there will be page fault handing since privilege level is none. During
   page fault handling, the other threads will do the same work, flush local entry, update new entry
   with huge page size.

2. It is not accessed by the first time, there exists old tlb entry with huge page such as COW scenery.
   local_flush_tlb_page is not necessary here, old tlb with huge page will be replace with new tlb
   in function __update_tlb.

For PMD_SIZE range around the address, there exists one tlb entry with huge page size, or one tlb entry
with normal page size and zero privilege. It is impossible that there exists two or more tlb entries
with normal page within PMD_SIZE range, so we do not need flush pmd range size, just flush one tlb entry
is ok.

regards
bibo,mao

> 
>>  	__update_tlb(vma, address, pte);
>>  }
>>  
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 78c84be..0f9187b 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -780,6 +780,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>>  		pgtable_t pgtable)
>>  {
>>  	struct mm_struct *mm = vma->vm_mm;
>> +	unsigned long start = addr & PMD_MASK;
>>  	pmd_t entry;
>>  	spinlock_t *ptl;
>>  
>> @@ -792,7 +793,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>>  			}
>>  			entry = pmd_mkyoung(*pmd);
>>  			entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
>> -			if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
>> +			if (pmdp_set_access_flags(vma, start, pmd, entry, 1))
>>  				update_mmu_cache_pmd(vma, addr, pmd);
>>  		}
>>  
>> @@ -813,7 +814,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>>  		pgtable = NULL;
>>  	}
>>  
>> -	set_pmd_at(mm, addr, pmd, entry);
>> +	set_pmd_at(mm, start, pmd, entry);
>>  	update_mmu_cache_pmd(vma, addr, pmd);
>>  
>>  out_unlock:
>> @@ -864,7 +865,7 @@ vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
>>  
>>  	track_pfn_insert(vma, &pgprot, pfn);
>>  
>> -	insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
>> +	insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, pgprot, write, pgtable);
>>  	return VM_FAULT_NOPAGE;
>>  }
>>  EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd_prot);
>> diff --git a/mm/memory.c b/mm/memory.c
>> index dc7f354..c703458 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -3592,7 +3592,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
>>  
>>  	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
>>  
>> -	update_mmu_cache_pmd(vma, haddr, vmf->pmd);
>> +	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
>>  
>>  	/* fault is handled */
>>  	ret = 0;
>> -- 
>> 1.8.3.1
>>
>>
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] mm: set page fault address for update_mmu_cache_pmd
  2020-06-30 10:42   ` maobibo
@ 2020-07-01  2:54     ` maobibo
  0 siblings, 0 replies; 12+ messages in thread
From: maobibo @ 2020-07-01  2:54 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Andrew Morton, Thomas Bogendoerfer, Paul Burton,
	Anshuman Khandual, Mike Rapoport, Daniel Silsby, linux-mips,
	linux-kernel, linux-mm



On 06/30/2020 06:42 PM, maobibo wrote:
> 
> 
> On 06/30/2020 06:09 PM, Kirill A. Shutemov wrote:
>> On Wed, Jun 24, 2020 at 05:26:30PM +0800, Bibo Mao wrote:
>>> update_mmu_cache_pmd is used to update tlb for the pmd entry by
>>> software. On MIPS system, the tlb entry indexed by page fault
>>> address maybe exists already, only that tlb entry may be small
>>> page, also it may be huge page. Before updating pmd entry with
>>> huge page size, older tlb entry need to be invalidated.
>>>
>>> Here page fault address is passed to function update_mmu_cache_pmd,
>>> rather than pmd huge page start address. The page fault address
>>> can be used for invalidating older tlb entry.
>>>
>>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
>>> ---
>>>  arch/mips/include/asm/pgtable.h | 9 +++++++++
>>>  mm/huge_memory.c                | 7 ++++---
>>>  mm/memory.c                     | 2 +-
>>>  3 files changed, 14 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
>>> index dd7a0f5..bd81661 100644
>>> --- a/arch/mips/include/asm/pgtable.h
>>> +++ b/arch/mips/include/asm/pgtable.h
>>> @@ -554,11 +554,20 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
>>>  #define	__HAVE_ARCH_UPDATE_MMU_TLB
>>>  #define update_mmu_tlb	update_mmu_cache
>>>  
>>> +extern void local_flush_tlb_page(struct vm_area_struct *vma,
>>> +				unsigned long page);
>>>  static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
>>>  	unsigned long address, pmd_t *pmdp)
>>>  {
>>>  	pte_t pte = *(pte_t *)pmdp;
>>>  
>>> +	/*
>>> +	 * If pmd_none is true, older tlb entry will be normal page.
>>> +	 * here to invalidate older tlb entry indexed by address
>>> +	 * parameter address must be page fault address rather than
>>> +	 * start address of pmd huge page
>>> +	 */
>>> +	local_flush_tlb_page(vma, address);
>>
>> Can't say I follow what is going on.
>>
>> Why local? What happens on SMP?
>>
>> And don't you want to flush PMD_SIZE range around the address?
> There exists two conditions:
> 1. The address is accessed for the first time, there will be one tlb entry with normal page
>    size, and privilege for the tlb entry is none. If new tlb entry wants to be added with
>    huge page size, the older tlb entry needs to be removed.  Local flushing is enough, if there
>    are smp threads running, there will be page fault handing since privilege level is none. During
>    page fault handling, the other threads will do the same work, flush local entry, update new entry
>    with huge page size.
> 
> 2. It is not accessed by the first time, there exists old tlb entry with huge page such as COW scenery.
>    local_flush_tlb_page is not necessary here, old tlb with huge page will be replace with new tlb
>    in function __update_tlb.
> 
> For PMD_SIZE range around the address, there exists one tlb entry with huge page size, or one tlb entry
> with normal page size and zero privilege. It is impossible that there exists two or more tlb entries
> with normal page within PMD_SIZE range, so we do not need flush pmd range size, just flush one tlb entry
> is ok.
Sorry for the noise, please discard the patch.

Actually there exists two or more tlb entries with normal page within 
PMD_SIZE range. If multiple threads run on UP or one CPU, these threads
are access the same huge page but different normal pages. Page fault
happens on thread1 and thread1 is sched out during page fault handing.
thread2 is sched in and page fault happens again, there will be two
tlb entries with normal page. This problem exists even without the patch.


> 
> regards
> bibo,mao
> 
>>
>>>  	__update_tlb(vma, address, pte);
>>>  }
>>>  
>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>> index 78c84be..0f9187b 100644
>>> --- a/mm/huge_memory.c
>>> +++ b/mm/huge_memory.c
>>> @@ -780,6 +780,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>>>  		pgtable_t pgtable)
>>>  {
>>>  	struct mm_struct *mm = vma->vm_mm;
>>> +	unsigned long start = addr & PMD_MASK;
>>>  	pmd_t entry;
>>>  	spinlock_t *ptl;
>>>  
>>> @@ -792,7 +793,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>>>  			}
>>>  			entry = pmd_mkyoung(*pmd);
>>>  			entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
>>> -			if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
>>> +			if (pmdp_set_access_flags(vma, start, pmd, entry, 1))
>>>  				update_mmu_cache_pmd(vma, addr, pmd);
>>>  		}
>>>  
>>> @@ -813,7 +814,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>>>  		pgtable = NULL;
>>>  	}
>>>  
>>> -	set_pmd_at(mm, addr, pmd, entry);
>>> +	set_pmd_at(mm, start, pmd, entry);
>>>  	update_mmu_cache_pmd(vma, addr, pmd);
>>>  
>>>  out_unlock:
>>> @@ -864,7 +865,7 @@ vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
>>>  
>>>  	track_pfn_insert(vma, &pgprot, pfn);
>>>  
>>> -	insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
>>> +	insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, pgprot, write, pgtable);
>>>  	return VM_FAULT_NOPAGE;
>>>  }
>>>  EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd_prot);
>>> diff --git a/mm/memory.c b/mm/memory.c
>>> index dc7f354..c703458 100644
>>> --- a/mm/memory.c
>>> +++ b/mm/memory.c
>>> @@ -3592,7 +3592,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
>>>  
>>>  	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
>>>  
>>> -	update_mmu_cache_pmd(vma, haddr, vmf->pmd);
>>> +	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
>>>  
>>>  	/* fault is handled */
>>>  	ret = 0;
>>> -- 
>>> 1.8.3.1
>>>
>>>
>>


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, back to index

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-24  9:26 [PATCH 1/3] mm: set page fault address for update_mmu_cache_pmd Bibo Mao
2020-06-24  9:26 ` [PATCH 2/3] mm/huge_memory.c: update tlb entry if pmd is changed Bibo Mao
2020-06-25  0:30   ` Mike Kravetz
2020-06-25  9:57     ` maobibo
2020-06-25 12:01     ` Aneesh Kumar K.V
2020-06-25 16:46       ` Mike Kravetz
2020-06-26  8:13         ` Aneesh Kumar K.V
2020-06-24  9:26 ` [PATCH 3/3] MIPS: Do not call flush_tlb_all when setting pmd entry Bibo Mao
2020-06-24 19:49 ` [PATCH 1/3] mm: set page fault address for update_mmu_cache_pmd Andrew Morton
2020-06-30 10:09 ` Kirill A. Shutemov
2020-06-30 10:42   ` maobibo
2020-07-01  2:54     ` maobibo

Linux-MIPS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-mips/0 linux-mips/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-mips linux-mips/ https://lore.kernel.org/linux-mips \
		linux-mips@vger.kernel.org
	public-inbox-index linux-mips

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-mips


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git