All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch]x86: avoid unnecessary tlb flush
@ 2010-08-06  3:28 Shaohua Li
  2010-08-06  5:19 ` Andrew Morton
  0 siblings, 1 reply; 9+ messages in thread
From: Shaohua Li @ 2010-08-06  3:28 UTC (permalink / raw)
  To: lkml; +Cc: Ingo Molnar, Andi Kleen, Andrew Morton, hpa

In x86, access and dirty bits are set automatically by CPU when CPU accesses
memory. When we go into the code path of below flush_tlb_nonprotect_page(),
we already set dirty bit for pte and don't need flush tlb. This might mean
tlb entry in some CPUs hasn't dirty bit set, but this doesn't matter. When
the CPUs do page write, they will automatically check the bit and no software
involved. 

On the other hand, flush tlb in below position is harmful. Test creates CPU
number of threads, each thread writes to a same but random address in same vma
range and we measure the total time. Under a 4 socket system, original time is
1.96s, while with the patch, the time is 0.8s. Under a 2 socket system, there is
20% time cut too. perf shows a lot of time are taking to send ipi/handle ipi for
tlb flush.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>

---
 arch/x86/include/asm/pgtable.h |    3 +++
 include/asm-generic/pgtable.h  |    4 ++++
 mm/memory.c                    |    2 +-
 3 files changed, 8 insertions(+), 1 deletion(-)

Index: linux/arch/x86/include/asm/pgtable.h
===================================================================
--- linux.orig/arch/x86/include/asm/pgtable.h	2010-07-29 13:25:12.000000000 +0800
+++ linux/arch/x86/include/asm/pgtable.h	2010-08-03 09:02:07.000000000 +0800
@@ -603,6 +603,9 @@ static inline void ptep_set_wrprotect(st
 	pte_update(mm, addr, ptep);
 }
 
+#define __HAVE_ARCH_FLUSH_TLB_NONPROTECT_PAGE
+#define flush_tlb_nonprotect_page(vma, address)
+
 /*
  * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
  *
Index: linux/include/asm-generic/pgtable.h
===================================================================
--- linux.orig/include/asm-generic/pgtable.h	2010-07-29 13:25:12.000000000 +0800
+++ linux/include/asm-generic/pgtable.h	2010-08-03 09:02:07.000000000 +0800
@@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(st
 #define move_pte(pte, prot, old_addr, new_addr)	(pte)
 #endif
 
+#ifndef __HAVE_ARCH_FLUSH_TLB_NONPROTECT_PAGE
+#define flush_tlb_nonprotect_page(vma, address) flush_tlb_page(vma, address)
+#endif
+
 #ifndef pgprot_noncached
 #define pgprot_noncached(prot)	(prot)
 #endif
Index: linux/mm/memory.c
===================================================================
--- linux.orig/mm/memory.c	2010-08-02 08:50:05.000000000 +0800
+++ linux/mm/memory.c	2010-08-03 09:02:07.000000000 +0800
@@ -3116,7 +3116,7 @@ static inline int handle_pte_fault(struc
 		 * with threads.
 		 */
 		if (flags & FAULT_FLAG_WRITE)
-			flush_tlb_page(vma, address);
+			flush_tlb_nonprotect_page(vma, address);
 	}
 unlock:
 	pte_unmap_unlock(pte, ptl);



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch]x86: avoid unnecessary tlb flush
  2010-08-06  3:28 [patch]x86: avoid unnecessary tlb flush Shaohua Li
@ 2010-08-06  5:19 ` Andrew Morton
  2010-08-13  0:47   ` Shaohua Li
  0 siblings, 1 reply; 9+ messages in thread
From: Andrew Morton @ 2010-08-06  5:19 UTC (permalink / raw)
  To: Shaohua Li; +Cc: lkml, Ingo Molnar, Andi Kleen, hpa

On Fri, 06 Aug 2010 11:28:28 +0800 Shaohua Li <shaohua.li@intel.com> wrote:

> In x86, access and dirty bits are set automatically by CPU when CPU accesses
> memory. When we go into the code path of below flush_tlb_nonprotect_page(),
> we already set dirty bit for pte and don't need flush tlb. This might mean
> tlb entry in some CPUs hasn't dirty bit set, but this doesn't matter. When
> the CPUs do page write, they will automatically check the bit and no software
> involved. 
> 
> On the other hand, flush tlb in below position is harmful. Test creates CPU
> number of threads, each thread writes to a same but random address in same vma
> range and we measure the total time. Under a 4 socket system, original time is
> 1.96s, while with the patch, the time is 0.8s. Under a 2 socket system, there is
> 20% time cut too. perf shows a lot of time are taking to send ipi/handle ipi for
> tlb flush.
> 
> Signed-off-by: Shaohua Li <shaohua.li@intel.com>
> 
> ---
>  arch/x86/include/asm/pgtable.h |    3 +++
>  include/asm-generic/pgtable.h  |    4 ++++
>  mm/memory.c                    |    2 +-
>  3 files changed, 8 insertions(+), 1 deletion(-)
> 
> Index: linux/arch/x86/include/asm/pgtable.h
> ===================================================================
> --- linux.orig/arch/x86/include/asm/pgtable.h	2010-07-29 13:25:12.000000000 +0800
> +++ linux/arch/x86/include/asm/pgtable.h	2010-08-03 09:02:07.000000000 +0800
> @@ -603,6 +603,9 @@ static inline void ptep_set_wrprotect(st
>  	pte_update(mm, addr, ptep);
>  }
>  
> +#define __HAVE_ARCH_FLUSH_TLB_NONPROTECT_PAGE
> +#define flush_tlb_nonprotect_page(vma, address)
> +
>  /*
>   * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
>   *
> Index: linux/include/asm-generic/pgtable.h
> ===================================================================
> --- linux.orig/include/asm-generic/pgtable.h	2010-07-29 13:25:12.000000000 +0800
> +++ linux/include/asm-generic/pgtable.h	2010-08-03 09:02:07.000000000 +0800
> @@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(st
>  #define move_pte(pte, prot, old_addr, new_addr)	(pte)
>  #endif
>  
> +#ifndef __HAVE_ARCH_FLUSH_TLB_NONPROTECT_PAGE
> +#define flush_tlb_nonprotect_page(vma, address) flush_tlb_page(vma, address)
> +#endif

The preferred technique here is

#ifndef flush_tlb_nonprotect_page
#define flush_tlb_nonprotect_page(vma, address) flush_tlb_page(vma, address)
#endif

so no need for __HAVE_ARCH_FLUSH_TLB_NONPROTECT_PAGE. 
include/asm-generic/pgtable.h uses a mix of the two techniques.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch]x86: avoid unnecessary tlb flush
  2010-08-06  5:19 ` Andrew Morton
@ 2010-08-13  0:47   ` Shaohua Li
  2010-08-13 19:29     ` Hugh Dickins
  0 siblings, 1 reply; 9+ messages in thread
From: Shaohua Li @ 2010-08-13  0:47 UTC (permalink / raw)
  To: Andrew Morton; +Cc: lkml, Ingo Molnar, Andi Kleen, hpa

On Fri, 2010-08-06 at 13:19 +0800, Andrew Morton wrote:
> On Fri, 06 Aug 2010 11:28:28 +0800 Shaohua Li <shaohua.li@intel.com> wrote:
> 
> > In x86, access and dirty bits are set automatically by CPU when CPU accesses
> > memory. When we go into the code path of below flush_tlb_nonprotect_page(),
> > we already set dirty bit for pte and don't need flush tlb. This might mean
> > tlb entry in some CPUs hasn't dirty bit set, but this doesn't matter. When
> > the CPUs do page write, they will automatically check the bit and no software
> > involved. 
> > 
> > On the other hand, flush tlb in below position is harmful. Test creates CPU
> > number of threads, each thread writes to a same but random address in same vma
> > range and we measure the total time. Under a 4 socket system, original time is
> > 1.96s, while with the patch, the time is 0.8s. Under a 2 socket system, there is
> > 20% time cut too. perf shows a lot of time are taking to send ipi/handle ipi for
> > tlb flush.
> > 
> > Signed-off-by: Shaohua Li <shaohua.li@intel.com>
> > 
> > ---
> >  arch/x86/include/asm/pgtable.h |    3 +++
> >  include/asm-generic/pgtable.h  |    4 ++++
> >  mm/memory.c                    |    2 +-
> >  3 files changed, 8 insertions(+), 1 deletion(-)
> > 
> > Index: linux/arch/x86/include/asm/pgtable.h
> > ===================================================================
> > --- linux.orig/arch/x86/include/asm/pgtable.h	2010-07-29 13:25:12.000000000 +0800
> > +++ linux/arch/x86/include/asm/pgtable.h	2010-08-03 09:02:07.000000000 +0800
> > @@ -603,6 +603,9 @@ static inline void ptep_set_wrprotect(st
> >  	pte_update(mm, addr, ptep);
> >  }
> >  
> > +#define __HAVE_ARCH_FLUSH_TLB_NONPROTECT_PAGE
> > +#define flush_tlb_nonprotect_page(vma, address)
> > +
> >  /*
> >   * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
> >   *
> > Index: linux/include/asm-generic/pgtable.h
> > ===================================================================
> > --- linux.orig/include/asm-generic/pgtable.h	2010-07-29 13:25:12.000000000 +0800
> > +++ linux/include/asm-generic/pgtable.h	2010-08-03 09:02:07.000000000 +0800
> > @@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(st
> >  #define move_pte(pte, prot, old_addr, new_addr)	(pte)
> >  #endif
> >  
> > +#ifndef __HAVE_ARCH_FLUSH_TLB_NONPROTECT_PAGE
> > +#define flush_tlb_nonprotect_page(vma, address) flush_tlb_page(vma, address)
> > +#endif
> 
> The preferred technique here is
> 
> #ifndef flush_tlb_nonprotect_page
> #define flush_tlb_nonprotect_page(vma, address) flush_tlb_page(vma, address)
> #endif
> 
> so no need for __HAVE_ARCH_FLUSH_TLB_NONPROTECT_PAGE. 
> include/asm-generic/pgtable.h uses a mix of the two techniques.
ok, updated the patch.


In x86, access and dirty bits are set automatically by CPU when CPU accesses
memory. When we go into the code path of below flush_tlb_nonprotect_page(),
we already set dirty bit for pte and don't need flush tlb. This might mean
tlb entry in some CPUs hasn't dirty bit set, but this doesn't matter. When
the CPUs do page write, they will automatically check the bit and no software
involved. 

On the other hand, flush tlb in below position is harmful. Test creates CPU
number of threads, each thread writes to a same but random address in same vma
range and we measure the total time. Under a 4 socket system, original time is
1.96s, while with the patch, the time is 0.8s. Under a 2 socket system, there is
20% time cut too. perf shows a lot of time are taking to send ipi/handle ipi for
tlb flush.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>

---
 arch/x86/include/asm/pgtable.h |    2 ++
 include/asm-generic/pgtable.h  |    4 ++++
 mm/memory.c                    |    2 +-
 3 files changed, 7 insertions(+), 1 deletion(-)

Index: linux/arch/x86/include/asm/pgtable.h
===================================================================
--- linux.orig/arch/x86/include/asm/pgtable.h	2010-08-13 08:23:13.000000000 +0800
+++ linux/arch/x86/include/asm/pgtable.h	2010-08-13 08:24:53.000000000 +0800
@@ -603,6 +603,8 @@ static inline void ptep_set_wrprotect(st
 	pte_update(mm, addr, ptep);
 }
 
+#define flush_tlb_nonprotect_page(vma, address)
+
 /*
  * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
  *
Index: linux/include/asm-generic/pgtable.h
===================================================================
--- linux.orig/include/asm-generic/pgtable.h	2010-08-13 08:23:13.000000000 +0800
+++ linux/include/asm-generic/pgtable.h	2010-08-13 08:24:53.000000000 +0800
@@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(st
 #define move_pte(pte, prot, old_addr, new_addr)	(pte)
 #endif
 
+#ifndef flush_tlb_nonprotect_page
+#define flush_tlb_nonprotect_page(vma, address) flush_tlb_page(vma, address)
+#endif
+
 #ifndef pgprot_noncached
 #define pgprot_noncached(prot)	(prot)
 #endif
Index: linux/mm/memory.c
===================================================================
--- linux.orig/mm/memory.c	2010-08-13 08:23:13.000000000 +0800
+++ linux/mm/memory.c	2010-08-13 08:24:53.000000000 +0800
@@ -3116,7 +3116,7 @@ static inline int handle_pte_fault(struc
 		 * with threads.
 		 */
 		if (flags & FAULT_FLAG_WRITE)
-			flush_tlb_page(vma, address);
+			flush_tlb_nonprotect_page(vma, address);
 	}
 unlock:
 	pte_unmap_unlock(pte, ptl);



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch]x86: avoid unnecessary tlb flush
  2010-08-13  0:47   ` Shaohua Li
@ 2010-08-13 19:29     ` Hugh Dickins
  2010-08-13 21:08       ` H. Peter Anvin
  0 siblings, 1 reply; 9+ messages in thread
From: Hugh Dickins @ 2010-08-13 19:29 UTC (permalink / raw)
  To: Shaohua Li
  Cc: Andrea Arcangeli, Andrew Morton, lkml, Ingo Molnar, Andi Kleen, hpa

On Fri, 13 Aug 2010, Shaohua Li wrote:
> 
> In x86, access and dirty bits are set automatically by CPU when CPU accesses
> memory. When we go into the code path of below flush_tlb_nonprotect_page(),
> we already set dirty bit for pte and don't need flush tlb. This might mean
> tlb entry in some CPUs hasn't dirty bit set, but this doesn't matter. When
> the CPUs do page write, they will automatically check the bit and no software
> involved. 
> 
> On the other hand, flush tlb in below position is harmful. Test creates CPU
> number of threads, each thread writes to a same but random address in same vma
> range and we measure the total time. Under a 4 socket system, original time is
> 1.96s, while with the patch, the time is 0.8s. Under a 2 socket system, there is
> 20% time cut too. perf shows a lot of time are taking to send ipi/handle ipi for
> tlb flush.
> 
> Signed-off-by: Shaohua Li <shaohua.li@intel.com>
> 
> ---
>  arch/x86/include/asm/pgtable.h |    2 ++
>  include/asm-generic/pgtable.h  |    4 ++++
>  mm/memory.c                    |    2 +-
>  3 files changed, 7 insertions(+), 1 deletion(-)
> 
> Index: linux/arch/x86/include/asm/pgtable.h
> ===================================================================
> --- linux.orig/arch/x86/include/asm/pgtable.h	2010-08-13 08:23:13.000000000 +0800
> +++ linux/arch/x86/include/asm/pgtable.h	2010-08-13 08:24:53.000000000 +0800
> @@ -603,6 +603,8 @@ static inline void ptep_set_wrprotect(st
>  	pte_update(mm, addr, ptep);
>  }
>  
> +#define flush_tlb_nonprotect_page(vma, address)
> +
>  /*
>   * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
>   *
> Index: linux/include/asm-generic/pgtable.h
> ===================================================================
> --- linux.orig/include/asm-generic/pgtable.h	2010-08-13 08:23:13.000000000 +0800
> +++ linux/include/asm-generic/pgtable.h	2010-08-13 08:24:53.000000000 +0800
> @@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(st
>  #define move_pte(pte, prot, old_addr, new_addr)	(pte)
>  #endif
>  
> +#ifndef flush_tlb_nonprotect_page
> +#define flush_tlb_nonprotect_page(vma, address) flush_tlb_page(vma, address)
> +#endif
> +
>  #ifndef pgprot_noncached
>  #define pgprot_noncached(prot)	(prot)
>  #endif
> Index: linux/mm/memory.c
> ===================================================================
> --- linux.orig/mm/memory.c	2010-08-13 08:23:13.000000000 +0800
> +++ linux/mm/memory.c	2010-08-13 08:24:53.000000000 +0800
> @@ -3116,7 +3116,7 @@ static inline int handle_pte_fault(struc
>  		 * with threads.
>  		 */
>  		if (flags & FAULT_FLAG_WRITE)
> -			flush_tlb_page(vma, address);
> +			flush_tlb_nonprotect_page(vma, address);
>  	}
>  unlock:
>  	pte_unmap_unlock(pte, ptl);

Just added Andrea to the Cc list: he did that TLB flush in 1a44e149,
I'd feel more comfortable noop-ing it on x86 if you've convinced him.

Hugh

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch]x86: avoid unnecessary tlb flush
  2010-08-13 19:29     ` Hugh Dickins
@ 2010-08-13 21:08       ` H. Peter Anvin
  2010-08-13 23:00         ` Suresh Siddha
  0 siblings, 1 reply; 9+ messages in thread
From: H. Peter Anvin @ 2010-08-13 21:08 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Shaohua Li, Andrea Arcangeli, Andrew Morton, lkml, Ingo Molnar,
	Andi Kleen

On 08/13/2010 12:29 PM, Hugh Dickins wrote:
> 
> Just added Andrea to the Cc list: he did that TLB flush in 1a44e149,
> I'd feel more comfortable noop-ing it on x86 if you've convinced him.
> 
> Hugh

Andrea is probably on his way back from LinuxCon, but looking at the
original patch it might be something that non-x86 architectures need,
but which can be optimized specifically on x86, since x86 has explicit
"no flush needed when going to more permissive" semantics.

	-hpa

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch]x86: avoid unnecessary tlb flush
  2010-08-13 21:08       ` H. Peter Anvin
@ 2010-08-13 23:00         ` Suresh Siddha
  2010-08-16  1:16           ` Shaohua Li
  0 siblings, 1 reply; 9+ messages in thread
From: Suresh Siddha @ 2010-08-13 23:00 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Hugh Dickins, Li, Shaohua, Andrea Arcangeli, Andrew Morton, lkml,
	Ingo Molnar, Andi Kleen

On Fri, 2010-08-13 at 14:08 -0700, H. Peter Anvin wrote:
> On 08/13/2010 12:29 PM, Hugh Dickins wrote:
> > 
> > Just added Andrea to the Cc list: he did that TLB flush in 1a44e149,
> > I'd feel more comfortable noop-ing it on x86 if you've convinced him.
> > 
> > Hugh
> 
> Andrea is probably on his way back from LinuxCon, but looking at the
> original patch it might be something that non-x86 architectures need,
> but which can be optimized specifically on x86, since x86 has explicit
> "no flush needed when going to more permissive" semantics.

Yes. I don't see a problem with the proposed patch. This is the case of
parallel thread execution getting spurious write protection faults for
the same page for which the pte entry is already up to date and the
fault has already flushed the existing spurious TLB entry in the case of
x86.

I prefer a better name for the new flush_tlb_nonprotect_page() to
reflect the above. something like tlb_fix_spurious_fault() or something?

Also for other architectures, in this case, do we really need a global
tlb flush or just the local tlb flush?

Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch]x86: avoid unnecessary tlb flush
  2010-08-13 23:00         ` Suresh Siddha
@ 2010-08-16  1:16           ` Shaohua Li
  2010-08-23  0:43             ` Shaohua Li
  2010-08-23 17:57             ` [tip:x86/mm] x86, mm: Avoid unnecessary TLB flush tip-bot for Shaohua Li
  0 siblings, 2 replies; 9+ messages in thread
From: Shaohua Li @ 2010-08-16  1:16 UTC (permalink / raw)
  To: Siddha, Suresh B
  Cc: H. Peter Anvin, Hugh Dickins, Andrea Arcangeli, Andrew Morton,
	lkml, Ingo Molnar, Andi Kleen

On Sat, Aug 14, 2010 at 07:00:37AM +0800, Siddha, Suresh B wrote:
> On Fri, 2010-08-13 at 14:08 -0700, H. Peter Anvin wrote:
> > On 08/13/2010 12:29 PM, Hugh Dickins wrote:
> > > 
> > > Just added Andrea to the Cc list: he did that TLB flush in 1a44e149,
> > > I'd feel more comfortable noop-ing it on x86 if you've convinced him.
> > > 
> > > Hugh
> > 
> > Andrea is probably on his way back from LinuxCon, but looking at the
> > original patch it might be something that non-x86 architectures need,
> > but which can be optimized specifically on x86, since x86 has explicit
> > "no flush needed when going to more permissive" semantics.
> 
> Yes. I don't see a problem with the proposed patch. This is the case of
> parallel thread execution getting spurious write protection faults for
> the same page for which the pte entry is already up to date and the
> fault has already flushed the existing spurious TLB entry in the case of
> x86.
> 
> I prefer a better name for the new flush_tlb_nonprotect_page() to
> reflect the above. something like tlb_fix_spurious_fault() or something?
this name is better.


In x86, access and dirty bits are set automatically by CPU when CPU accesses
memory. When we go into the code path of below flush_tlb_fix_spurious_fault(),
we already set dirty bit for pte and don't need flush tlb. This might mean
tlb entry in some CPUs hasn't dirty bit set, but this doesn't matter. When
the CPUs do page write, they will automatically check the bit and no software
involved. 

On the other hand, flush tlb in below position is harmful. Test creates CPU
number of threads, each thread writes to a same but random address in same vma
range and we measure the total time. Under a 4 socket system, original time is
1.96s, while with the patch, the time is 0.8s. Under a 2 socket system, there is
20% time cut too. perf shows a lot of time are taking to send ipi/handle ipi for
tlb flush.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>

---
 arch/x86/include/asm/pgtable.h |    2 ++
 include/asm-generic/pgtable.h  |    4 ++++
 mm/memory.c                    |    2 +-
 3 files changed, 7 insertions(+), 1 deletion(-)

Index: linux/arch/x86/include/asm/pgtable.h
===================================================================
--- linux.orig/arch/x86/include/asm/pgtable.h	2010-08-16 09:00:02.000000000 +0800
+++ linux/arch/x86/include/asm/pgtable.h	2010-08-16 09:03:41.000000000 +0800
@@ -603,6 +603,8 @@ static inline void ptep_set_wrprotect(st
 	pte_update(mm, addr, ptep);
 }
 
+#define flush_tlb_fix_spurious_fault(vma, address)
+
 /*
  * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
  *
Index: linux/include/asm-generic/pgtable.h
===================================================================
--- linux.orig/include/asm-generic/pgtable.h	2010-08-16 09:00:02.000000000 +0800
+++ linux/include/asm-generic/pgtable.h	2010-08-16 09:03:41.000000000 +0800
@@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(st
 #define move_pte(pte, prot, old_addr, new_addr)	(pte)
 #endif
 
+#ifndef flush_tlb_fix_spurious_fault
+#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
+#endif
+
 #ifndef pgprot_noncached
 #define pgprot_noncached(prot)	(prot)
 #endif
Index: linux/mm/memory.c
===================================================================
--- linux.orig/mm/memory.c	2010-08-16 09:03:08.000000000 +0800
+++ linux/mm/memory.c	2010-08-16 09:03:41.000000000 +0800
@@ -3140,7 +3140,7 @@ static inline int handle_pte_fault(struc
 		 * with threads.
 		 */
 		if (flags & FAULT_FLAG_WRITE)
-			flush_tlb_page(vma, address);
+			flush_tlb_fix_spurious_fault(vma, address);
 	}
 unlock:
 	pte_unmap_unlock(pte, ptl);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch]x86: avoid unnecessary tlb flush
  2010-08-16  1:16           ` Shaohua Li
@ 2010-08-23  0:43             ` Shaohua Li
  2010-08-23 17:57             ` [tip:x86/mm] x86, mm: Avoid unnecessary TLB flush tip-bot for Shaohua Li
  1 sibling, 0 replies; 9+ messages in thread
From: Shaohua Li @ 2010-08-23  0:43 UTC (permalink / raw)
  To: Siddha, Suresh B
  Cc: H. Peter Anvin, Hugh Dickins, Andrea Arcangeli, Andrew Morton,
	lkml, Ingo Molnar, Andi Kleen

On Mon, Aug 16, 2010 at 09:16:55AM +0800, Shaohua Li wrote:
> On Sat, Aug 14, 2010 at 07:00:37AM +0800, Siddha, Suresh B wrote:
> > On Fri, 2010-08-13 at 14:08 -0700, H. Peter Anvin wrote:
> > > On 08/13/2010 12:29 PM, Hugh Dickins wrote:
> > > > 
> > > > Just added Andrea to the Cc list: he did that TLB flush in 1a44e149,
> > > > I'd feel more comfortable noop-ing it on x86 if you've convinced him.
> > > > 
> > > > Hugh
> > > 
> > > Andrea is probably on his way back from LinuxCon, but looking at the
> > > original patch it might be something that non-x86 architectures need,
> > > but which can be optimized specifically on x86, since x86 has explicit
> > > "no flush needed when going to more permissive" semantics.
> > 
> > Yes. I don't see a problem with the proposed patch. This is the case of
> > parallel thread execution getting spurious write protection faults for
> > the same page for which the pte entry is already up to date and the
> > fault has already flushed the existing spurious TLB entry in the case of
> > x86.
> > 
> > I prefer a better name for the new flush_tlb_nonprotect_page() to
> > reflect the above. something like tlb_fix_spurious_fault() or something?
> this name is better.
Hi Andrea,
can you look at this patch?

Thanks,
Shaohua
 
> In x86, access and dirty bits are set automatically by CPU when CPU accesses
> memory. When we go into the code path of below flush_tlb_fix_spurious_fault(),
> we already set dirty bit for pte and don't need flush tlb. This might mean
> tlb entry in some CPUs hasn't dirty bit set, but this doesn't matter. When
> the CPUs do page write, they will automatically check the bit and no software
> involved. 
> 
> On the other hand, flush tlb in below position is harmful. Test creates CPU
> number of threads, each thread writes to a same but random address in same vma
> range and we measure the total time. Under a 4 socket system, original time is
> 1.96s, while with the patch, the time is 0.8s. Under a 2 socket system, there is
> 20% time cut too. perf shows a lot of time are taking to send ipi/handle ipi for
> tlb flush.
> 
> Signed-off-by: Shaohua Li <shaohua.li@intel.com>
> Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
> 
> ---
>  arch/x86/include/asm/pgtable.h |    2 ++
>  include/asm-generic/pgtable.h  |    4 ++++
>  mm/memory.c                    |    2 +-
>  3 files changed, 7 insertions(+), 1 deletion(-)
> 
> Index: linux/arch/x86/include/asm/pgtable.h
> ===================================================================
> --- linux.orig/arch/x86/include/asm/pgtable.h	2010-08-16 09:00:02.000000000 +0800
> +++ linux/arch/x86/include/asm/pgtable.h	2010-08-16 09:03:41.000000000 +0800
> @@ -603,6 +603,8 @@ static inline void ptep_set_wrprotect(st
>  	pte_update(mm, addr, ptep);
>  }
>  
> +#define flush_tlb_fix_spurious_fault(vma, address)
> +
>  /*
>   * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
>   *
> Index: linux/include/asm-generic/pgtable.h
> ===================================================================
> --- linux.orig/include/asm-generic/pgtable.h	2010-08-16 09:00:02.000000000 +0800
> +++ linux/include/asm-generic/pgtable.h	2010-08-16 09:03:41.000000000 +0800
> @@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(st
>  #define move_pte(pte, prot, old_addr, new_addr)	(pte)
>  #endif
>  
> +#ifndef flush_tlb_fix_spurious_fault
> +#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
> +#endif
> +
>  #ifndef pgprot_noncached
>  #define pgprot_noncached(prot)	(prot)
>  #endif
> Index: linux/mm/memory.c
> ===================================================================
> --- linux.orig/mm/memory.c	2010-08-16 09:03:08.000000000 +0800
> +++ linux/mm/memory.c	2010-08-16 09:03:41.000000000 +0800
> @@ -3140,7 +3140,7 @@ static inline int handle_pte_fault(struc
>  		 * with threads.
>  		 */
>  		if (flags & FAULT_FLAG_WRITE)
> -			flush_tlb_page(vma, address);
> +			flush_tlb_fix_spurious_fault(vma, address);
>  	}
>  unlock:
>  	pte_unmap_unlock(pte, ptl);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tip:x86/mm] x86, mm: Avoid unnecessary TLB flush
  2010-08-16  1:16           ` Shaohua Li
  2010-08-23  0:43             ` Shaohua Li
@ 2010-08-23 17:57             ` tip-bot for Shaohua Li
  1 sibling, 0 replies; 9+ messages in thread
From: tip-bot for Shaohua Li @ 2010-08-23 17:57 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, aarcange, suresh.b.siddha, shaohua.li, tglx

Commit-ID:  61c77326d1df079f202fa79403c3ccd8c5966a81
Gitweb:     http://git.kernel.org/tip/61c77326d1df079f202fa79403c3ccd8c5966a81
Author:     Shaohua Li <shaohua.li@intel.com>
AuthorDate: Mon, 16 Aug 2010 09:16:55 +0800
Committer:  H. Peter Anvin <hpa@zytor.com>
CommitDate: Mon, 23 Aug 2010 10:04:57 -0700

x86, mm: Avoid unnecessary TLB flush

In x86, access and dirty bits are set automatically by CPU when CPU accesses
memory. When we go into the code path of below flush_tlb_fix_spurious_fault(),
we already set dirty bit for pte and don't need flush tlb. This might mean
tlb entry in some CPUs hasn't dirty bit set, but this doesn't matter. When
the CPUs do page write, they will automatically check the bit and no software
involved.

On the other hand, flush tlb in below position is harmful. Test creates CPU
number of threads, each thread writes to a same but random address in same vma
range and we measure the total time. Under a 4 socket system, original time is
1.96s, while with the patch, the time is 0.8s. Under a 2 socket system, there is
20% time cut too. perf shows a lot of time are taking to send ipi/handle ipi for
tlb flush.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
LKML-Reference: <20100816011655.GA362@sli10-desk.sh.intel.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Andrea Archangeli <aarcange@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/pgtable.h |    2 ++
 include/asm-generic/pgtable.h  |    4 ++++
 mm/memory.c                    |    2 +-
 3 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a34c785..2d0a33b 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -603,6 +603,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
 	pte_update(mm, addr, ptep);
 }
 
+#define flush_tlb_fix_spurious_fault(vma, address)
+
 /*
  * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
  *
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index e2bd73e..f4d4120 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 #define move_pte(pte, prot, old_addr, new_addr)	(pte)
 #endif
 
+#ifndef flush_tlb_fix_spurious_fault
+#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
+#endif
+
 #ifndef pgprot_noncached
 #define pgprot_noncached(prot)	(prot)
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 2ed2267..a40da69 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3147,7 +3147,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
 		 * with threads.
 		 */
 		if (flags & FAULT_FLAG_WRITE)
-			flush_tlb_page(vma, address);
+			flush_tlb_fix_spurious_fault(vma, address);
 	}
 unlock:
 	pte_unmap_unlock(pte, ptl);

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2010-08-23 17:58 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-08-06  3:28 [patch]x86: avoid unnecessary tlb flush Shaohua Li
2010-08-06  5:19 ` Andrew Morton
2010-08-13  0:47   ` Shaohua Li
2010-08-13 19:29     ` Hugh Dickins
2010-08-13 21:08       ` H. Peter Anvin
2010-08-13 23:00         ` Suresh Siddha
2010-08-16  1:16           ` Shaohua Li
2010-08-23  0:43             ` Shaohua Li
2010-08-23 17:57             ` [tip:x86/mm] x86, mm: Avoid unnecessary TLB flush tip-bot for Shaohua Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.