All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/3] powerpc/mmu_gather: Enable RCU_TABLE_FREE even for !SMP case
@ 2019-12-18  5:35 ` Aneesh Kumar K.V
  0 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2019-12-18  5:35 UTC (permalink / raw)
  To: akpm, npiggin, mpe, peterz, will
  Cc: linux-mm, linux-kernel, linuxppc-dev, linux-arch, Aneesh Kumar K.V

A follow up patch is going to make sure we correctly invalidate page walk cache
before we free page table pages. In order to keep things simple enable
RCU_TABLE_FREE even for !SMP so that we don't have to fixup the !SMP case
differently in the followup patch

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/Kconfig                         | 2 +-
 arch/powerpc/include/asm/book3s/32/pgalloc.h | 8 --------
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 2 --
 arch/powerpc/include/asm/nohash/pgalloc.h    | 8 --------
 arch/powerpc/mm/book3s64/pgtable.c           | 7 -------
 5 files changed, 1 insertion(+), 26 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1ec34e16ed65..04240205f38c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -222,7 +222,7 @@ config PPC
 	select HAVE_HARDLOCKUP_DETECTOR_PERF	if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
-	select HAVE_RCU_TABLE_FREE		if SMP
+	select HAVE_RCU_TABLE_FREE
 	select HAVE_RCU_TABLE_NO_INVALIDATE	if HAVE_RCU_TABLE_FREE
 	select HAVE_MMU_GATHER_PAGE_SIZE
 	select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 998317702630..dc5c039eb28e 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -49,7 +49,6 @@ static inline void pgtable_free(void *table, unsigned index_size)
 
 #define get_hugepd_cache_index(x)  (x)
 
-#ifdef CONFIG_SMP
 static inline void pgtable_free_tlb(struct mmu_gather *tlb,
 				    void *table, int shift)
 {
@@ -66,13 +65,6 @@ static inline void __tlb_remove_table(void *_table)
 
 	pgtable_free(table, shift);
 }
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
-				    void *table, int shift)
-{
-	pgtable_free(table, shift);
-}
-#endif
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 				  unsigned long address)
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index f6968c811026..a41e91bd0580 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -19,9 +19,7 @@ extern struct vmemmap_backing *vmemmap_list;
 extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
 extern void pmd_fragment_free(unsigned long *);
 extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
-#ifdef CONFIG_SMP
 extern void __tlb_remove_table(void *_table);
-#endif
 void pte_frag_destroy(void *pte_frag);
 
 static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h
index 332b13b4ecdb..29c43665a753 100644
--- a/arch/powerpc/include/asm/nohash/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -46,7 +46,6 @@ static inline void pgtable_free(void *table, int shift)
 
 #define get_hugepd_cache_index(x)	(x)
 
-#ifdef CONFIG_SMP
 static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
 {
 	unsigned long pgf = (unsigned long)table;
@@ -64,13 +63,6 @@ static inline void __tlb_remove_table(void *_table)
 	pgtable_free(table, shift);
 }
 
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
-	pgtable_free(table, shift);
-}
-#endif
-
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 				  unsigned long address)
 {
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 75483b40fcb1..2bf7e1b4fd82 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -378,7 +378,6 @@ static inline void pgtable_free(void *table, int index)
 	}
 }
 
-#ifdef CONFIG_SMP
 void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
 {
 	unsigned long pgf = (unsigned long)table;
@@ -395,12 +394,6 @@ void __tlb_remove_table(void *_table)
 
 	return pgtable_free(table, index);
 }
-#else
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
-{
-	return pgtable_free(table, index);
-}
-#endif
 
 #ifdef CONFIG_PROC_FS
 atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 1/3] powerpc/mmu_gather: Enable RCU_TABLE_FREE even for !SMP case
@ 2019-12-18  5:35 ` Aneesh Kumar K.V
  0 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2019-12-18  5:35 UTC (permalink / raw)
  To: akpm, npiggin, mpe, peterz, will
  Cc: linux-arch, linux-mm, linuxppc-dev, linux-kernel, Aneesh Kumar K.V

A follow up patch is going to make sure we correctly invalidate page walk cache
before we free page table pages. In order to keep things simple enable
RCU_TABLE_FREE even for !SMP so that we don't have to fixup the !SMP case
differently in the followup patch

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/Kconfig                         | 2 +-
 arch/powerpc/include/asm/book3s/32/pgalloc.h | 8 --------
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 2 --
 arch/powerpc/include/asm/nohash/pgalloc.h    | 8 --------
 arch/powerpc/mm/book3s64/pgtable.c           | 7 -------
 5 files changed, 1 insertion(+), 26 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1ec34e16ed65..04240205f38c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -222,7 +222,7 @@ config PPC
 	select HAVE_HARDLOCKUP_DETECTOR_PERF	if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
-	select HAVE_RCU_TABLE_FREE		if SMP
+	select HAVE_RCU_TABLE_FREE
 	select HAVE_RCU_TABLE_NO_INVALIDATE	if HAVE_RCU_TABLE_FREE
 	select HAVE_MMU_GATHER_PAGE_SIZE
 	select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 998317702630..dc5c039eb28e 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -49,7 +49,6 @@ static inline void pgtable_free(void *table, unsigned index_size)
 
 #define get_hugepd_cache_index(x)  (x)
 
-#ifdef CONFIG_SMP
 static inline void pgtable_free_tlb(struct mmu_gather *tlb,
 				    void *table, int shift)
 {
@@ -66,13 +65,6 @@ static inline void __tlb_remove_table(void *_table)
 
 	pgtable_free(table, shift);
 }
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
-				    void *table, int shift)
-{
-	pgtable_free(table, shift);
-}
-#endif
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 				  unsigned long address)
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index f6968c811026..a41e91bd0580 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -19,9 +19,7 @@ extern struct vmemmap_backing *vmemmap_list;
 extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
 extern void pmd_fragment_free(unsigned long *);
 extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
-#ifdef CONFIG_SMP
 extern void __tlb_remove_table(void *_table);
-#endif
 void pte_frag_destroy(void *pte_frag);
 
 static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h
index 332b13b4ecdb..29c43665a753 100644
--- a/arch/powerpc/include/asm/nohash/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -46,7 +46,6 @@ static inline void pgtable_free(void *table, int shift)
 
 #define get_hugepd_cache_index(x)	(x)
 
-#ifdef CONFIG_SMP
 static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
 {
 	unsigned long pgf = (unsigned long)table;
@@ -64,13 +63,6 @@ static inline void __tlb_remove_table(void *_table)
 	pgtable_free(table, shift);
 }
 
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
-	pgtable_free(table, shift);
-}
-#endif
-
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 				  unsigned long address)
 {
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 75483b40fcb1..2bf7e1b4fd82 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -378,7 +378,6 @@ static inline void pgtable_free(void *table, int index)
 	}
 }
 
-#ifdef CONFIG_SMP
 void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
 {
 	unsigned long pgf = (unsigned long)table;
@@ -395,12 +394,6 @@ void __tlb_remove_table(void *_table)
 
 	return pgtable_free(table, index);
 }
-#else
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
-{
-	return pgtable_free(table, index);
-}
-#endif
 
 #ifdef CONFIG_PROC_FS
 atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 2/3] mm/mmu_gather: Invalidate TLB correctly on batch allocation failure and flush
  2019-12-18  5:35 ` Aneesh Kumar K.V
@ 2019-12-18  5:35   ` Aneesh Kumar K.V
  -1 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2019-12-18  5:35 UTC (permalink / raw)
  To: akpm, npiggin, mpe, peterz, will
  Cc: linux-mm, linux-kernel, linuxppc-dev, linux-arch, Aneesh Kumar K . V

From: Peter Zijlstra <peterz@infradead.org>

Architectures for which we have hardware walkers of Linux page table should
flush TLB on mmu gather batch allocation failures and batch flush. Some
architectures like POWER supports multiple translation modes (hash and radix)
and in the case of POWER only radix translation mode needs the above TLBI.
This is because for hash translation mode kernel wants to avoid this extra
flush since there are no hardware walkers of linux page table. With radix
translation, the hardware also walks linux page table and with that, kernel
needs to make sure to TLB invalidate page walk cache before page table pages are
freed.

More details in
commit: d86564a2f085 ("mm/tlb, x86/mm: Support invalidating TLB caches for RCU_TABLE_FREE")

Fixes: a46cc7a90fd8 ("powerpc/mm/radix: Improve TLB/PWC flushes")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/Kconfig                    |  3 ---
 arch/powerpc/Kconfig            |  1 -
 arch/powerpc/include/asm/tlb.h  | 11 +++++++++++
 arch/sparc/Kconfig              |  1 -
 arch/sparc/include/asm/tlb_64.h |  9 +++++++++
 include/asm-generic/tlb.h       | 22 +++++++++++++++-------
 mm/mmu_gather.c                 | 16 ++++++++--------
 7 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 48b5e103bdb0..208aad121630 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -396,9 +396,6 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE
 config HAVE_RCU_TABLE_FREE
 	bool
 
-config HAVE_RCU_TABLE_NO_INVALIDATE
-	bool
-
 config HAVE_MMU_GATHER_PAGE_SIZE
 	bool
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 04240205f38c..f9970f87612e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -223,7 +223,6 @@ config PPC
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_RCU_TABLE_FREE
-	select HAVE_RCU_TABLE_NO_INVALIDATE	if HAVE_RCU_TABLE_FREE
 	select HAVE_MMU_GATHER_PAGE_SIZE
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE		if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index b2c0be93929d..7f3a8b902325 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -26,6 +26,17 @@
 
 #define tlb_flush tlb_flush
 extern void tlb_flush(struct mmu_gather *tlb);
+/*
+ * book3s:
+ * Hash does not use the linux page-tables, so we can avoid
+ * the TLB invalidate for page-table freeing, Radix otoh does use the
+ * page-tables and needs the TLBI.
+ *
+ * nohash:
+ * We still do TLB invalidate in the __pte_free_tlb routine before we
+ * add the page table pages to mmu gather table batch.
+ */
+#define tlb_needs_table_invalidate()	radix_enabled()
 
 /* Get the generic bits... */
 #include <asm-generic/tlb.h>
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index eb24cb1afc11..18e9fb6fcf1b 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -65,7 +65,6 @@ config SPARC64
 	select HAVE_KRETPROBES
 	select HAVE_KPROBES
 	select HAVE_RCU_TABLE_FREE if SMP
-	select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_DYNAMIC_FTRACE
diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h
index a2f3fa61ee36..8cb8f3833239 100644
--- a/arch/sparc/include/asm/tlb_64.h
+++ b/arch/sparc/include/asm/tlb_64.h
@@ -28,6 +28,15 @@ void flush_tlb_pending(void);
 #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
 #define tlb_flush(tlb)	flush_tlb_pending()
 
+/*
+ * SPARC64's hardware TLB fill does not use the Linux page-tables
+ * and therefore we don't need a TLBI when freeing page-table pages.
+ */
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+#define tlb_needs_table_invalidate()	(false)
+#endif
+
 #include <asm-generic/tlb.h>
 
 #endif /* _SPARC64_TLB_H */
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 2b10036fefd0..9e22ac369d1d 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -137,13 +137,6 @@
  *  When used, an architecture is expected to provide __tlb_remove_table()
  *  which does the actual freeing of these pages.
  *
- *  HAVE_RCU_TABLE_NO_INVALIDATE
- *
- *  This makes HAVE_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before
- *  freeing the page-table pages. This can be avoided if you use
- *  HAVE_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux
- *  page-tables natively.
- *
  *  MMU_GATHER_NO_RANGE
  *
  *  Use this if your architecture lacks an efficient flush_tlb_range().
@@ -189,8 +182,23 @@ struct mmu_table_batch {
 
 extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 
+/*
+ * This allows an architecture that does not use the linux page-tables for
+ * hardware to skip the TLBI when freeing page tables.
+ */
+#ifndef tlb_needs_table_invalidate
+#define tlb_needs_table_invalidate() (true)
+#endif
+
+#else
+
+#ifdef tlb_needs_table_invalidate
+#error tlb_needs_table_invalidate() requires HAVE_RCU_TABLE_FREE
 #endif
 
+#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+
+
 #ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
 /*
  * If we can't allocate a page to make a big batch of page pointers
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 7d70e5c78f97..7c1b8f67af7b 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -102,14 +102,14 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
  */
 static inline void tlb_table_invalidate(struct mmu_gather *tlb)
 {
-#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE
-	/*
-	 * Invalidate page-table caches used by hardware walkers. Then we still
-	 * need to RCU-sched wait while freeing the pages because software
-	 * walkers can still be in-flight.
-	 */
-	tlb_flush_mmu_tlbonly(tlb);
-#endif
+	if (tlb_needs_table_invalidate()) {
+		/*
+		 * Invalidate page-table caches used by hardware walkers. Then
+		 * we still need to RCU-sched wait while freeing the pages
+		 * because software walkers can still be in-flight.
+		 */
+		tlb_flush_mmu_tlbonly(tlb);
+	}
 }
 
 static void tlb_remove_table_smp_sync(void *arg)
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 2/3] mm/mmu_gather: Invalidate TLB correctly on batch allocation failure and flush
@ 2019-12-18  5:35   ` Aneesh Kumar K.V
  0 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2019-12-18  5:35 UTC (permalink / raw)
  To: akpm, npiggin, mpe, peterz, will
  Cc: linux-arch, linux-mm, linuxppc-dev, linux-kernel, Aneesh Kumar K . V

From: Peter Zijlstra <peterz@infradead.org>

Architectures for which we have hardware walkers of Linux page table should
flush TLB on mmu gather batch allocation failures and batch flush. Some
architectures like POWER supports multiple translation modes (hash and radix)
and in the case of POWER only radix translation mode needs the above TLBI.
This is because for hash translation mode kernel wants to avoid this extra
flush since there are no hardware walkers of linux page table. With radix
translation, the hardware also walks linux page table and with that, kernel
needs to make sure to TLB invalidate page walk cache before page table pages are
freed.

More details in
commit: d86564a2f085 ("mm/tlb, x86/mm: Support invalidating TLB caches for RCU_TABLE_FREE")

Fixes: a46cc7a90fd8 ("powerpc/mm/radix: Improve TLB/PWC flushes")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/Kconfig                    |  3 ---
 arch/powerpc/Kconfig            |  1 -
 arch/powerpc/include/asm/tlb.h  | 11 +++++++++++
 arch/sparc/Kconfig              |  1 -
 arch/sparc/include/asm/tlb_64.h |  9 +++++++++
 include/asm-generic/tlb.h       | 22 +++++++++++++++-------
 mm/mmu_gather.c                 | 16 ++++++++--------
 7 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 48b5e103bdb0..208aad121630 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -396,9 +396,6 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE
 config HAVE_RCU_TABLE_FREE
 	bool
 
-config HAVE_RCU_TABLE_NO_INVALIDATE
-	bool
-
 config HAVE_MMU_GATHER_PAGE_SIZE
 	bool
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 04240205f38c..f9970f87612e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -223,7 +223,6 @@ config PPC
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_RCU_TABLE_FREE
-	select HAVE_RCU_TABLE_NO_INVALIDATE	if HAVE_RCU_TABLE_FREE
 	select HAVE_MMU_GATHER_PAGE_SIZE
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE		if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index b2c0be93929d..7f3a8b902325 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -26,6 +26,17 @@
 
 #define tlb_flush tlb_flush
 extern void tlb_flush(struct mmu_gather *tlb);
+/*
+ * book3s:
+ * Hash does not use the linux page-tables, so we can avoid
+ * the TLB invalidate for page-table freeing, Radix otoh does use the
+ * page-tables and needs the TLBI.
+ *
+ * nohash:
+ * We still do TLB invalidate in the __pte_free_tlb routine before we
+ * add the page table pages to mmu gather table batch.
+ */
+#define tlb_needs_table_invalidate()	radix_enabled()
 
 /* Get the generic bits... */
 #include <asm-generic/tlb.h>
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index eb24cb1afc11..18e9fb6fcf1b 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -65,7 +65,6 @@ config SPARC64
 	select HAVE_KRETPROBES
 	select HAVE_KPROBES
 	select HAVE_RCU_TABLE_FREE if SMP
-	select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_DYNAMIC_FTRACE
diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h
index a2f3fa61ee36..8cb8f3833239 100644
--- a/arch/sparc/include/asm/tlb_64.h
+++ b/arch/sparc/include/asm/tlb_64.h
@@ -28,6 +28,15 @@ void flush_tlb_pending(void);
 #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
 #define tlb_flush(tlb)	flush_tlb_pending()
 
+/*
+ * SPARC64's hardware TLB fill does not use the Linux page-tables
+ * and therefore we don't need a TLBI when freeing page-table pages.
+ */
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+#define tlb_needs_table_invalidate()	(false)
+#endif
+
 #include <asm-generic/tlb.h>
 
 #endif /* _SPARC64_TLB_H */
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 2b10036fefd0..9e22ac369d1d 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -137,13 +137,6 @@
  *  When used, an architecture is expected to provide __tlb_remove_table()
  *  which does the actual freeing of these pages.
  *
- *  HAVE_RCU_TABLE_NO_INVALIDATE
- *
- *  This makes HAVE_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before
- *  freeing the page-table pages. This can be avoided if you use
- *  HAVE_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux
- *  page-tables natively.
- *
  *  MMU_GATHER_NO_RANGE
  *
  *  Use this if your architecture lacks an efficient flush_tlb_range().
@@ -189,8 +182,23 @@ struct mmu_table_batch {
 
 extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 
+/*
+ * This allows an architecture that does not use the linux page-tables for
+ * hardware to skip the TLBI when freeing page tables.
+ */
+#ifndef tlb_needs_table_invalidate
+#define tlb_needs_table_invalidate() (true)
+#endif
+
+#else
+
+#ifdef tlb_needs_table_invalidate
+#error tlb_needs_table_invalidate() requires HAVE_RCU_TABLE_FREE
 #endif
 
+#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+
+
 #ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
 /*
  * If we can't allocate a page to make a big batch of page pointers
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 7d70e5c78f97..7c1b8f67af7b 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -102,14 +102,14 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
  */
 static inline void tlb_table_invalidate(struct mmu_gather *tlb)
 {
-#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE
-	/*
-	 * Invalidate page-table caches used by hardware walkers. Then we still
-	 * need to RCU-sched wait while freeing the pages because software
-	 * walkers can still be in-flight.
-	 */
-	tlb_flush_mmu_tlbonly(tlb);
-#endif
+	if (tlb_needs_table_invalidate()) {
+		/*
+		 * Invalidate page-table caches used by hardware walkers. Then
+		 * we still need to RCU-sched wait while freeing the pages
+		 * because software walkers can still be in-flight.
+		 */
+		tlb_flush_mmu_tlbonly(tlb);
+	}
 }
 
 static void tlb_remove_table_smp_sync(void *arg)
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 3/3] asm-generic/tlb: Avoid potential double flush
  2019-12-18  5:35 ` Aneesh Kumar K.V
@ 2019-12-18  5:35   ` Aneesh Kumar K.V
  -1 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2019-12-18  5:35 UTC (permalink / raw)
  To: akpm, npiggin, mpe, peterz, will
  Cc: linux-mm, linux-kernel, linuxppc-dev, linux-arch, Aneesh Kumar K.V

From: Peter Zijlstra <peterz@infradead.org>

Aneesh reported that:

	tlb_flush_mmu()
	  tlb_flush_mmu_tlbonly()
	    tlb_flush()			<-- #1
	  tlb_flush_mmu_free()
	    tlb_table_flush()
	      tlb_table_invalidate()
		tlb_flush_mmu_tlbonly()
		  tlb_flush()		<-- #2

does two TLBIs when tlb->fullmm, because __tlb_reset_range() will not
clear tlb->end in that case.

Observe that any caller to __tlb_adjust_range() also sets at least one
of the tlb->freed_tables || tlb->cleared_p* bits, and those are
unconditionally cleared by __tlb_reset_range().

Change the condition for actually issuing TLBI to having one of those
bits set, as opposed to having tlb->end != 0.

Reported-by: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 include/asm-generic/tlb.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 9e22ac369d1d..b36b3bef5661 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -402,7 +402,12 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
 
 static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 {
-	if (!tlb->end)
+	/*
+	 * Anything calling __tlb_adjust_range() also sets at least one of
+	 * these bits.
+	 */
+	if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds ||
+	      tlb->cleared_puds || tlb->cleared_p4ds))
 		return;
 
 	tlb_flush(tlb);
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 3/3] asm-generic/tlb: Avoid potential double flush
@ 2019-12-18  5:35   ` Aneesh Kumar K.V
  0 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2019-12-18  5:35 UTC (permalink / raw)
  To: akpm, npiggin, mpe, peterz, will
  Cc: linux-arch, linux-mm, linuxppc-dev, linux-kernel, Aneesh Kumar K.V

From: Peter Zijlstra <peterz@infradead.org>

Aneesh reported that:

	tlb_flush_mmu()
	  tlb_flush_mmu_tlbonly()
	    tlb_flush()			<-- #1
	  tlb_flush_mmu_free()
	    tlb_table_flush()
	      tlb_table_invalidate()
		tlb_flush_mmu_tlbonly()
		  tlb_flush()		<-- #2

does two TLBIs when tlb->fullmm, because __tlb_reset_range() will not
clear tlb->end in that case.

Observe that any caller to __tlb_adjust_range() also sets at least one
of the tlb->freed_tables || tlb->cleared_p* bits, and those are
unconditionally cleared by __tlb_reset_range().

Change the condition for actually issuing TLBI to having one of those
bits set, as opposed to having tlb->end != 0.

Reported-by: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 include/asm-generic/tlb.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 9e22ac369d1d..b36b3bef5661 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -402,7 +402,12 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
 
 static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 {
-	if (!tlb->end)
+	/*
+	 * Anything calling __tlb_adjust_range() also sets at least one of
+	 * these bits.
+	 */
+	if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds ||
+	      tlb->cleared_puds || tlb->cleared_p4ds))
 		return;
 
 	tlb_flush(tlb);
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/3] powerpc/mmu_gather: Enable RCU_TABLE_FREE even for !SMP case
  2019-12-18  5:35 ` Aneesh Kumar K.V
@ 2019-12-18  9:14   ` Peter Zijlstra
  -1 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2019-12-18  9:14 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: akpm, npiggin, mpe, will, linux-mm, linux-kernel, linuxppc-dev,
	linux-arch


I'm going to assume you'll take these 3 patches through the powerpc tree
for convenience, a few small nits, but otherwise ACK on the lot.

On Wed, Dec 18, 2019 at 11:05:28AM +0530, Aneesh Kumar K.V wrote:
> A follow up patch is going to make sure we correctly invalidate page walk cache
> before we free page table pages. In order to keep things simple enable
> RCU_TABLE_FREE even for !SMP so that we don't have to fixup the !SMP case
> differently in the followup patch

Perhaps more clearly state that !SMP is broken for Radix/PWC. The above
sorta implies it is, but it's not spelled out in any detail.

> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/3] powerpc/mmu_gather: Enable RCU_TABLE_FREE even for !SMP case
@ 2019-12-18  9:14   ` Peter Zijlstra
  0 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2019-12-18  9:14 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: linux-arch, linuxppc-dev, linux-kernel, npiggin, linux-mm, akpm, will


I'm going to assume you'll take these 3 patches through the powerpc tree
for convenience, a few small nits, but otherwise ACK on the lot.

On Wed, Dec 18, 2019 at 11:05:28AM +0530, Aneesh Kumar K.V wrote:
> A follow up patch is going to make sure we correctly invalidate page walk cache
> before we free page table pages. In order to keep things simple enable
> RCU_TABLE_FREE even for !SMP so that we don't have to fixup the !SMP case
> differently in the followup patch

Perhaps more clearly state that !SMP is broken for Radix/PWC. The above
sorta implies it is, but it's not spelled out in any detail.

> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/3] mm/mmu_gather: Invalidate TLB correctly on batch allocation failure and flush
  2019-12-18  5:35   ` Aneesh Kumar K.V
@ 2019-12-18  9:17     ` Peter Zijlstra
  -1 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2019-12-18  9:17 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: akpm, npiggin, mpe, will, linux-mm, linux-kernel, linuxppc-dev,
	linux-arch

On Wed, Dec 18, 2019 at 11:05:29AM +0530, Aneesh Kumar K.V wrote:
> From: Peter Zijlstra <peterz@infradead.org>
> 
> Architectures for which we have hardware walkers of Linux page table should
> flush TLB on mmu gather batch allocation failures and batch flush. Some
> architectures like POWER supports multiple translation modes (hash and radix)

nohash, hash and radix in fact :-)

> and in the case of POWER only radix translation mode needs the above TLBI.

> This is because for hash translation mode kernel wants to avoid this extra
> flush since there are no hardware walkers of linux page table. With radix
> translation, the hardware also walks linux page table and with that, kernel
> needs to make sure to TLB invalidate page walk cache before page table pages are
> freed.
> 
> More details in
> commit: d86564a2f085 ("mm/tlb, x86/mm: Support invalidating TLB caches for RCU_TABLE_FREE")
> 
> Fixes: a46cc7a90fd8 ("powerpc/mm/radix: Improve TLB/PWC flushes")
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---

> diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
> index b2c0be93929d..7f3a8b902325 100644
> --- a/arch/powerpc/include/asm/tlb.h
> +++ b/arch/powerpc/include/asm/tlb.h
> @@ -26,6 +26,17 @@
>  
>  #define tlb_flush tlb_flush
>  extern void tlb_flush(struct mmu_gather *tlb);
> +/*
> + * book3s:
> + * Hash does not use the linux page-tables, so we can avoid
> + * the TLB invalidate for page-table freeing, Radix otoh does use the
> + * page-tables and needs the TLBI.
> + *
> + * nohash:
> + * We still do TLB invalidate in the __pte_free_tlb routine before we
> + * add the page table pages to mmu gather table batch.

I'm a little confused though; if nohash is a software TLB fill, why do
you need a TLBI for tables?

> + */
> +#define tlb_needs_table_invalidate()	radix_enabled()
>  
>  /* Get the generic bits... */
>  #include <asm-generic/tlb.h>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/3] mm/mmu_gather: Invalidate TLB correctly on batch allocation failure and flush
@ 2019-12-18  9:17     ` Peter Zijlstra
  0 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2019-12-18  9:17 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: linux-arch, linuxppc-dev, linux-kernel, npiggin, linux-mm, akpm, will

On Wed, Dec 18, 2019 at 11:05:29AM +0530, Aneesh Kumar K.V wrote:
> From: Peter Zijlstra <peterz@infradead.org>
> 
> Architectures for which we have hardware walkers of Linux page table should
> flush TLB on mmu gather batch allocation failures and batch flush. Some
> architectures like POWER supports multiple translation modes (hash and radix)

nohash, hash and radix in fact :-)

> and in the case of POWER only radix translation mode needs the above TLBI.

> This is because for hash translation mode kernel wants to avoid this extra
> flush since there are no hardware walkers of linux page table. With radix
> translation, the hardware also walks linux page table and with that, kernel
> needs to make sure to TLB invalidate page walk cache before page table pages are
> freed.
> 
> More details in
> commit: d86564a2f085 ("mm/tlb, x86/mm: Support invalidating TLB caches for RCU_TABLE_FREE")
> 
> Fixes: a46cc7a90fd8 ("powerpc/mm/radix: Improve TLB/PWC flushes")
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---

> diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
> index b2c0be93929d..7f3a8b902325 100644
> --- a/arch/powerpc/include/asm/tlb.h
> +++ b/arch/powerpc/include/asm/tlb.h
> @@ -26,6 +26,17 @@
>  
>  #define tlb_flush tlb_flush
>  extern void tlb_flush(struct mmu_gather *tlb);
> +/*
> + * book3s:
> + * Hash does not use the linux page-tables, so we can avoid
> + * the TLB invalidate for page-table freeing, Radix otoh does use the
> + * page-tables and needs the TLBI.
> + *
> + * nohash:
> + * We still do TLB invalidate in the __pte_free_tlb routine before we
> + * add the page table pages to mmu gather table batch.

I'm a little confused though; if nohash is a software TLB fill, why do
you need a TLBI for tables?

> + */
> +#define tlb_needs_table_invalidate()	radix_enabled()
>  
>  /* Get the generic bits... */
>  #include <asm-generic/tlb.h>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 3/3] asm-generic/tlb: Avoid potential double flush
  2019-12-18  5:35   ` Aneesh Kumar K.V
@ 2019-12-18  9:19     ` Peter Zijlstra
  -1 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2019-12-18  9:19 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: akpm, npiggin, mpe, will, linux-mm, linux-kernel, linuxppc-dev,
	linux-arch

On Wed, Dec 18, 2019 at 11:05:30AM +0530, Aneesh Kumar K.V wrote:

> --- a/include/asm-generic/tlb.h
> +++ b/include/asm-generic/tlb.h
> @@ -402,7 +402,12 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
>  
>  static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
>  {
> -	if (!tlb->end)
> +	/*
> +	 * Anything calling __tlb_adjust_range() also sets at least one of
> +	 * these bits.
> +	 */
> +	if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds ||
> +	      tlb->cleared_puds || tlb->cleared_p4ds))
>  		return;

FWIW I looked at the GCC generated assembly output for this (x86_64) and
it did a single load and mask as expected.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 3/3] asm-generic/tlb: Avoid potential double flush
@ 2019-12-18  9:19     ` Peter Zijlstra
  0 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2019-12-18  9:19 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: linux-arch, linuxppc-dev, linux-kernel, npiggin, linux-mm, akpm, will

On Wed, Dec 18, 2019 at 11:05:30AM +0530, Aneesh Kumar K.V wrote:

> --- a/include/asm-generic/tlb.h
> +++ b/include/asm-generic/tlb.h
> @@ -402,7 +402,12 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
>  
>  static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
>  {
> -	if (!tlb->end)
> +	/*
> +	 * Anything calling __tlb_adjust_range() also sets at least one of
> +	 * these bits.
> +	 */
> +	if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds ||
> +	      tlb->cleared_puds || tlb->cleared_p4ds))
>  		return;

FWIW I looked at the GCC generated assembly output for this (x86_64) and
it did a single load and mask as expected.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/3] mm/mmu_gather: Invalidate TLB correctly on batch allocation failure and flush
  2019-12-18  9:17     ` Peter Zijlstra
@ 2019-12-18 11:37       ` Aneesh Kumar K.V
  -1 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2019-12-18 11:37 UTC (permalink / raw)
  To: Peter Zijlstra, Paul Mackerras
  Cc: akpm, npiggin, mpe, will, linux-mm, linux-kernel, linuxppc-dev,
	linux-arch

On 12/18/19 2:47 PM, Peter Zijlstra wrote:
> On Wed, Dec 18, 2019 at 11:05:29AM +0530, Aneesh Kumar K.V wrote:
>> From: Peter Zijlstra <peterz@infradead.org>
>>
>> Architectures for which we have hardware walkers of Linux page table should
>> flush TLB on mmu gather batch allocation failures and batch flush. Some
>> architectures like POWER supports multiple translation modes (hash and radix)
> 
> nohash, hash and radix in fact :-)
> 
>> and in the case of POWER only radix translation mode needs the above TLBI.
> 
>> This is because for hash translation mode kernel wants to avoid this extra
>> flush since there are no hardware walkers of linux page table. With radix
>> translation, the hardware also walks linux page table and with that, kernel
>> needs to make sure to TLB invalidate page walk cache before page table pages are
>> freed.
>>
>> More details in
>> commit: d86564a2f085 ("mm/tlb, x86/mm: Support invalidating TLB caches for RCU_TABLE_FREE")
>>
>> Fixes: a46cc7a90fd8 ("powerpc/mm/radix: Improve TLB/PWC flushes")
>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
> 
>> diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
>> index b2c0be93929d..7f3a8b902325 100644
>> --- a/arch/powerpc/include/asm/tlb.h
>> +++ b/arch/powerpc/include/asm/tlb.h
>> @@ -26,6 +26,17 @@
>>   
>>   #define tlb_flush tlb_flush
>>   extern void tlb_flush(struct mmu_gather *tlb);
>> +/*
>> + * book3s:
>> + * Hash does not use the linux page-tables, so we can avoid
>> + * the TLB invalidate for page-table freeing, Radix otoh does use the
>> + * page-tables and needs the TLBI.
>> + *
>> + * nohash:
>> + * We still do TLB invalidate in the __pte_free_tlb routine before we
>> + * add the page table pages to mmu gather table batch.
> 
> I'm a little confused though; if nohash is a software TLB fill, why do
> you need a TLBI for tables?
> 

nohash (AKA book3e) has different mmu modes. I don't follow all the 
details w.r.t book3e. Paul or Michael might be able to explain the need 
for table flush with book3e.

Documentation/powerpc/cpu-faimilies.rst shows different hardware assist 
TLB fill support.

What I wanted to convey with the above comment way we handle only radix 
translation mode with tlb_needs_table_invalidate() check. Other 
translations (hash,  different variants of book3e) are all good because 
of the reason outlined above.

-aneesh

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/3] mm/mmu_gather: Invalidate TLB correctly on batch allocation failure and flush
@ 2019-12-18 11:37       ` Aneesh Kumar K.V
  0 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2019-12-18 11:37 UTC (permalink / raw)
  To: Peter Zijlstra, Paul Mackerras
  Cc: linux-arch, linuxppc-dev, linux-kernel, npiggin, linux-mm, akpm, will

On 12/18/19 2:47 PM, Peter Zijlstra wrote:
> On Wed, Dec 18, 2019 at 11:05:29AM +0530, Aneesh Kumar K.V wrote:
>> From: Peter Zijlstra <peterz@infradead.org>
>>
>> Architectures for which we have hardware walkers of Linux page table should
>> flush TLB on mmu gather batch allocation failures and batch flush. Some
>> architectures like POWER supports multiple translation modes (hash and radix)
> 
> nohash, hash and radix in fact :-)
> 
>> and in the case of POWER only radix translation mode needs the above TLBI.
> 
>> This is because for hash translation mode kernel wants to avoid this extra
>> flush since there are no hardware walkers of linux page table. With radix
>> translation, the hardware also walks linux page table and with that, kernel
>> needs to make sure to TLB invalidate page walk cache before page table pages are
>> freed.
>>
>> More details in
>> commit: d86564a2f085 ("mm/tlb, x86/mm: Support invalidating TLB caches for RCU_TABLE_FREE")
>>
>> Fixes: a46cc7a90fd8 ("powerpc/mm/radix: Improve TLB/PWC flushes")
>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
> 
>> diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
>> index b2c0be93929d..7f3a8b902325 100644
>> --- a/arch/powerpc/include/asm/tlb.h
>> +++ b/arch/powerpc/include/asm/tlb.h
>> @@ -26,6 +26,17 @@
>>   
>>   #define tlb_flush tlb_flush
>>   extern void tlb_flush(struct mmu_gather *tlb);
>> +/*
>> + * book3s:
>> + * Hash does not use the linux page-tables, so we can avoid
>> + * the TLB invalidate for page-table freeing, Radix otoh does use the
>> + * page-tables and needs the TLBI.
>> + *
>> + * nohash:
>> + * We still do TLB invalidate in the __pte_free_tlb routine before we
>> + * add the page table pages to mmu gather table batch.
> 
> I'm a little confused though; if nohash is a software TLB fill, why do
> you need a TLBI for tables?
> 

nohash (AKA book3e) has different mmu modes. I don't follow all the 
details w.r.t book3e. Paul or Michael might be able to explain the need 
for table flush with book3e.

Documentation/powerpc/cpu-faimilies.rst shows different hardware assist 
TLB fill support.

What I wanted to convey with the above comment way we handle only radix 
translation mode with tlb_needs_table_invalidate() check. Other 
translations (hash,  different variants of book3e) are all good because 
of the reason outlined above.

-aneesh

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/3] mm/mmu_gather: Invalidate TLB correctly on batch allocation failure and flush
  2019-12-18 11:37       ` Aneesh Kumar K.V
@ 2019-12-18 13:13         ` Michael Ellerman
  -1 siblings, 0 replies; 18+ messages in thread
From: Michael Ellerman @ 2019-12-18 13:13 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Peter Zijlstra, Paul Mackerras
  Cc: akpm, npiggin, will, linux-mm, linux-kernel, linuxppc-dev,
	linux-arch, Scott Wood

"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> On 12/18/19 2:47 PM, Peter Zijlstra wrote:
>> On Wed, Dec 18, 2019 at 11:05:29AM +0530, Aneesh Kumar K.V wrote:
>>> From: Peter Zijlstra <peterz@infradead.org>
>>>
>>> Architectures for which we have hardware walkers of Linux page table should
>>> flush TLB on mmu gather batch allocation failures and batch flush. Some
>>> architectures like POWER supports multiple translation modes (hash and radix)
>> 
>> nohash, hash and radix in fact :-)
>> 
>>> and in the case of POWER only radix translation mode needs the above TLBI.
>> 
>>> This is because for hash translation mode kernel wants to avoid this extra
>>> flush since there are no hardware walkers of linux page table. With radix
>>> translation, the hardware also walks linux page table and with that, kernel
>>> needs to make sure to TLB invalidate page walk cache before page table pages are
>>> freed.
>>>
>>> More details in
>>> commit: d86564a2f085 ("mm/tlb, x86/mm: Support invalidating TLB caches for RCU_TABLE_FREE")
>>>
>>> Fixes: a46cc7a90fd8 ("powerpc/mm/radix: Improve TLB/PWC flushes")
>>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org
>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>>> ---
>> 
>>> diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
>>> index b2c0be93929d..7f3a8b902325 100644
>>> --- a/arch/powerpc/include/asm/tlb.h
>>> +++ b/arch/powerpc/include/asm/tlb.h
>>> @@ -26,6 +26,17 @@
>>>   
>>>   #define tlb_flush tlb_flush
>>>   extern void tlb_flush(struct mmu_gather *tlb);
>>> +/*
>>> + * book3s:
>>> + * Hash does not use the linux page-tables, so we can avoid
>>> + * the TLB invalidate for page-table freeing, Radix otoh does use the
>>> + * page-tables and needs the TLBI.
>>> + *
>>> + * nohash:
>>> + * We still do TLB invalidate in the __pte_free_tlb routine before we
>>> + * add the page table pages to mmu gather table batch.
>> 
>> I'm a little confused though; if nohash is a software TLB fill, why do
>> you need a TLBI for tables?
>> 
>
> nohash (AKA book3e) has different mmu modes. I don't follow all the 
> details w.r.t book3e. Paul or Michael might be able to explain the need 
> for table flush with book3e.

Some of the Book3E CPUs have a partial hardware table walker. The IBM one (A2)
did, before we ripped that support out. And the Freescale (NXP) e6500
does, see eg:

  28efc35fe68d ("powerpc/e6500: TLB miss handler with hardware tablewalk support")

They only support walking one level IIRC, ie. you can create a TLB entry
that points to a PTE page, and the hardware will dereference that to get
a PTE and load that into the TLB.

cheers

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/3] mm/mmu_gather: Invalidate TLB correctly on batch allocation failure and flush
@ 2019-12-18 13:13         ` Michael Ellerman
  0 siblings, 0 replies; 18+ messages in thread
From: Michael Ellerman @ 2019-12-18 13:13 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Peter Zijlstra, Paul Mackerras
  Cc: linux-arch, will, linux-kernel, npiggin, Scott Wood, linux-mm,
	akpm, linuxppc-dev

"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> On 12/18/19 2:47 PM, Peter Zijlstra wrote:
>> On Wed, Dec 18, 2019 at 11:05:29AM +0530, Aneesh Kumar K.V wrote:
>>> From: Peter Zijlstra <peterz@infradead.org>
>>>
>>> Architectures for which we have hardware walkers of Linux page table should
>>> flush TLB on mmu gather batch allocation failures and batch flush. Some
>>> architectures like POWER supports multiple translation modes (hash and radix)
>> 
>> nohash, hash and radix in fact :-)
>> 
>>> and in the case of POWER only radix translation mode needs the above TLBI.
>> 
>>> This is because for hash translation mode kernel wants to avoid this extra
>>> flush since there are no hardware walkers of linux page table. With radix
>>> translation, the hardware also walks linux page table and with that, kernel
>>> needs to make sure to TLB invalidate page walk cache before page table pages are
>>> freed.
>>>
>>> More details in
>>> commit: d86564a2f085 ("mm/tlb, x86/mm: Support invalidating TLB caches for RCU_TABLE_FREE")
>>>
>>> Fixes: a46cc7a90fd8 ("powerpc/mm/radix: Improve TLB/PWC flushes")
>>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org
>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>>> ---
>> 
>>> diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
>>> index b2c0be93929d..7f3a8b902325 100644
>>> --- a/arch/powerpc/include/asm/tlb.h
>>> +++ b/arch/powerpc/include/asm/tlb.h
>>> @@ -26,6 +26,17 @@
>>>   
>>>   #define tlb_flush tlb_flush
>>>   extern void tlb_flush(struct mmu_gather *tlb);
>>> +/*
>>> + * book3s:
>>> + * Hash does not use the linux page-tables, so we can avoid
>>> + * the TLB invalidate for page-table freeing, Radix otoh does use the
>>> + * page-tables and needs the TLBI.
>>> + *
>>> + * nohash:
>>> + * We still do TLB invalidate in the __pte_free_tlb routine before we
>>> + * add the page table pages to mmu gather table batch.
>> 
>> I'm a little confused though; if nohash is a software TLB fill, why do
>> you need a TLBI for tables?
>> 
>
> nohash (AKA book3e) has different mmu modes. I don't follow all the 
> details w.r.t book3e. Paul or Michael might be able to explain the need 
> for table flush with book3e.

Some of the Book3E CPUs have a partial hardware table walker. The IBM one (A2)
did, before we ripped that support out. And the Freescale (NXP) e6500
does, see eg:

  28efc35fe68d ("powerpc/e6500: TLB miss handler with hardware tablewalk support")

They only support walking one level IIRC, ie. you can create a TLB entry
that points to a PTE page, and the hardware will dereference that to get
a PTE and load that into the TLB.

cheers

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/3] mm/mmu_gather: Invalidate TLB correctly on batch allocation failure and flush
  2019-12-18 13:13         ` Michael Ellerman
@ 2019-12-18 14:15           ` Peter Zijlstra
  -1 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2019-12-18 14:15 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Aneesh Kumar K.V, Paul Mackerras, akpm, npiggin, will, linux-mm,
	linux-kernel, linuxppc-dev, linux-arch, Scott Wood

On Thu, Dec 19, 2019 at 12:13:48AM +1100, Michael Ellerman wrote:

> >> I'm a little confused though; if nohash is a software TLB fill, why do
> >> you need a TLBI for tables?
> >> 
> >
> > nohash (AKA book3e) has different mmu modes. I don't follow all the 
> > details w.r.t book3e. Paul or Michael might be able to explain the need 
> > for table flush with book3e.
> 
> Some of the Book3E CPUs have a partial hardware table walker. The IBM one (A2)
> did, before we ripped that support out. And the Freescale (NXP) e6500
> does, see eg:
> 
>   28efc35fe68d ("powerpc/e6500: TLB miss handler with hardware tablewalk support")
> 
> They only support walking one level IIRC, ie. you can create a TLB entry
> that points to a PTE page, and the hardware will dereference that to get
> a PTE and load that into the TLB.

Shiny!, all the embedded goodness. Thanks for the info.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/3] mm/mmu_gather: Invalidate TLB correctly on batch allocation failure and flush
@ 2019-12-18 14:15           ` Peter Zijlstra
  0 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2019-12-18 14:15 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linux-arch, Scott Wood, Aneesh Kumar K.V, linuxppc-dev,
	linux-kernel, npiggin, linux-mm, akpm, will

On Thu, Dec 19, 2019 at 12:13:48AM +1100, Michael Ellerman wrote:

> >> I'm a little confused though; if nohash is a software TLB fill, why do
> >> you need a TLBI for tables?
> >> 
> >
> > nohash (AKA book3e) has different mmu modes. I don't follow all the 
> > details w.r.t book3e. Paul or Michael might be able to explain the need 
> > for table flush with book3e.
> 
> Some of the Book3E CPUs have a partial hardware table walker. The IBM one (A2)
> did, before we ripped that support out. And the Freescale (NXP) e6500
> does, see eg:
> 
>   28efc35fe68d ("powerpc/e6500: TLB miss handler with hardware tablewalk support")
> 
> They only support walking one level IIRC, ie. you can create a TLB entry
> that points to a PTE page, and the hardware will dereference that to get
> a PTE and load that into the TLB.

Shiny!, all the embedded goodness. Thanks for the info.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2019-12-18 14:17 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-18  5:35 [PATCH v2 1/3] powerpc/mmu_gather: Enable RCU_TABLE_FREE even for !SMP case Aneesh Kumar K.V
2019-12-18  5:35 ` Aneesh Kumar K.V
2019-12-18  5:35 ` [PATCH v2 2/3] mm/mmu_gather: Invalidate TLB correctly on batch allocation failure and flush Aneesh Kumar K.V
2019-12-18  5:35   ` Aneesh Kumar K.V
2019-12-18  9:17   ` Peter Zijlstra
2019-12-18  9:17     ` Peter Zijlstra
2019-12-18 11:37     ` Aneesh Kumar K.V
2019-12-18 11:37       ` Aneesh Kumar K.V
2019-12-18 13:13       ` Michael Ellerman
2019-12-18 13:13         ` Michael Ellerman
2019-12-18 14:15         ` Peter Zijlstra
2019-12-18 14:15           ` Peter Zijlstra
2019-12-18  5:35 ` [PATCH v2 3/3] asm-generic/tlb: Avoid potential double flush Aneesh Kumar K.V
2019-12-18  5:35   ` Aneesh Kumar K.V
2019-12-18  9:19   ` Peter Zijlstra
2019-12-18  9:19     ` Peter Zijlstra
2019-12-18  9:14 ` [PATCH v2 1/3] powerpc/mmu_gather: Enable RCU_TABLE_FREE even for !SMP case Peter Zijlstra
2019-12-18  9:14   ` Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.