All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 1/2] powerpc/64s/hash: Add stress_slb kernel boot option to increase SLB faults
@ 2020-05-11 12:58 Michael Ellerman
  2020-05-11 12:58 ` [PATCH v4 2/2] powerpc/64s/hash: Add stress_hpt kernel boot option to increase hash faults Michael Ellerman
  2020-06-09  5:29 ` [PATCH v4 1/2] powerpc/64s/hash: Add stress_slb kernel boot option to increase SLB faults Michael Ellerman
  0 siblings, 2 replies; 4+ messages in thread
From: Michael Ellerman @ 2020-05-11 12:58 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: npiggin

From: Nicholas Piggin <npiggin@gmail.com>

This option increases the number of SLB misses by limiting the number
of kernel SLB entries, and increased flushing of cached lookaside
information. This helps stress test difficult to hit paths in the
kernel.

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Relocate the code into arch/powerpc/mm, s/torture/stress/]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../admin-guide/kernel-parameters.txt         |   5 +
 arch/powerpc/mm/book3s64/hash_utils.c         |   6 +
 arch/powerpc/mm/book3s64/internal.h           |  16 ++
 arch/powerpc/mm/book3s64/slb.c                | 166 +++++++++++++-----
 4 files changed, 148 insertions(+), 45 deletions(-)
 create mode 100644 arch/powerpc/mm/book3s64/internal.h

v4: mpe: Relocate the code into arch/powerpc/mm, s/torture/stress/

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f2a93c8679e8..26ef1d74e642 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -871,6 +871,11 @@
 			can be useful when debugging issues that require an SLB
 			miss to occur.
 
+	stress_slb	[PPC]
+			Limits the number of kernel SLB entries, and flushes
+			them frequently to increase the rate of SLB faults
+			on kernel addresses.
+
 	disable=	[IPV6]
 			See Documentation/networking/ipv6.txt.
 
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 3d727f73a8db..622c6e8e9fa6 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -66,6 +66,9 @@
 
 #include <mm/mmu_decl.h>
 
+#include "internal.h"
+
+
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
 #else
@@ -870,6 +873,9 @@ static void __init htab_initialize(void)
 		printk(KERN_INFO "Using 1TB segments\n");
 	}
 
+	if (stress_slb_enabled)
+		static_branch_enable(&stress_slb_key);
+
 	/*
 	 * Calculate the required size of the htab.  We want the number of
 	 * PTEGs to equal one half the number of real pages.
diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h
new file mode 100644
index 000000000000..7eda0d30d765
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/internal.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H
+#define ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H
+
+#include <linux/jump_label.h>
+
+extern bool stress_slb_enabled;
+
+DECLARE_STATIC_KEY_FALSE(stress_slb_key);
+
+static inline bool stress_slb(void)
+{
+	return static_branch_unlikely(&stress_slb_key);
+}
+
+#endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 716204aee3da..8141e8b40ee5 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -25,6 +25,9 @@
 #include <asm/udbg.h>
 #include <asm/code-patching.h>
 
+#include "internal.h"
+
+
 enum slb_index {
 	LINEAR_INDEX	= 0, /* Kernel linear map  (0xc000000000000000) */
 	KSTACK_INDEX	= 1, /* Kernel stack map */
@@ -54,6 +57,17 @@ static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
 	return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags);
 }
 
+bool stress_slb_enabled __initdata;
+
+static int __init parse_stress_slb(char *p)
+{
+	stress_slb_enabled = true;
+	return 0;
+}
+early_param("stress_slb", parse_stress_slb);
+
+__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_slb_key);
+
 static void assert_slb_presence(bool present, unsigned long ea)
 {
 #ifdef CONFIG_DEBUG_VM
@@ -68,7 +82,7 @@ static void assert_slb_presence(bool present, unsigned long ea)
 	 * slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware
 	 * ignores all other bits from 0-27, so just clear them all.
 	 */
-	ea &= ~((1UL << 28) - 1);
+	ea &= ~((1UL << SID_SHIFT) - 1);
 	asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0");
 
 	WARN_ON(present == (tmp == 0));
@@ -153,14 +167,42 @@ void slb_flush_all_realmode(void)
 	asm volatile("slbmte %0,%0; slbia" : : "r" (0));
 }
 
+static __always_inline void __slb_flush_and_restore_bolted(bool preserve_kernel_lookaside)
+{
+	struct slb_shadow *p = get_slb_shadow();
+	unsigned long ksp_esid_data, ksp_vsid_data;
+	u32 ih;
+
+	/*
+	 * SLBIA IH=1 on ISA v2.05 and newer processors may preserve lookaside
+	 * information created with Class=0 entries, which we use for kernel
+	 * SLB entries (the SLB entries themselves are still invalidated).
+	 *
+	 * Older processors will ignore this optimisation. Over-invalidation
+	 * is fine because we never rely on lookaside information existing.
+	 */
+	if (preserve_kernel_lookaside)
+		ih = 1;
+	else
+		ih = 0;
+
+	ksp_esid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
+	ksp_vsid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
+
+	asm volatile(PPC_SLBIA(%0)"	\n"
+		     "slbmte	%1, %2	\n"
+		     :: "i" (ih),
+			"r" (ksp_vsid_data),
+			"r" (ksp_esid_data)
+		     : "memory");
+}
+
 /*
  * This flushes non-bolted entries, it can be run in virtual mode. Must
  * be called with interrupts disabled.
  */
 void slb_flush_and_restore_bolted(void)
 {
-	struct slb_shadow *p = get_slb_shadow();
-
 	BUILD_BUG_ON(SLB_NUM_BOLTED != 2);
 
 	WARN_ON(!irqs_disabled());
@@ -171,13 +213,10 @@ void slb_flush_and_restore_bolted(void)
 	 */
 	hard_irq_disable();
 
-	asm volatile("isync\n"
-		     "slbia\n"
-		     "slbmte  %0, %1\n"
-		     "isync\n"
-		     :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)),
-			"r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid))
-		     : "memory");
+	isync();
+	__slb_flush_and_restore_bolted(false);
+	isync();
+
 	assert_slb_presence(true, get_paca()->kstack);
 
 	get_paca()->slb_cache_ptr = 0;
@@ -400,6 +439,30 @@ void preload_new_slb_context(unsigned long start, unsigned long sp)
 	local_irq_enable();
 }
 
+static void slb_cache_slbie_kernel(unsigned int index)
+{
+	unsigned long slbie_data = get_paca()->slb_cache[index];
+	unsigned long ksp = get_paca()->kstack;
+
+	slbie_data <<= SID_SHIFT;
+	slbie_data |= 0xc000000000000000ULL;
+	if ((ksp & slb_esid_mask(mmu_kernel_ssize)) == slbie_data)
+		return;
+	slbie_data |= mmu_kernel_ssize << SLBIE_SSIZE_SHIFT;
+
+	asm volatile("slbie %0" : : "r" (slbie_data));
+}
+
+static void slb_cache_slbie_user(unsigned int index)
+{
+	unsigned long slbie_data = get_paca()->slb_cache[index];
+
+	slbie_data <<= SID_SHIFT;
+	slbie_data |= user_segment_size(slbie_data) << SLBIE_SSIZE_SHIFT;
+	slbie_data |= SLBIE_C; /* user slbs have C=1 */
+
+	asm volatile("slbie %0" : : "r" (slbie_data));
+}
 
 /* Flush all user entries from the segment table of the current processor. */
 void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
@@ -414,8 +477,14 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 	 * which would update the slb_cache/slb_cache_ptr fields in the PACA.
 	 */
 	hard_irq_disable();
-	asm volatile("isync" : : : "memory");
-	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+	isync();
+	if (stress_slb()) {
+		__slb_flush_and_restore_bolted(false);
+		isync();
+		get_paca()->slb_cache_ptr = 0;
+		get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+
+	} else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 		/*
 		 * SLBIA IH=3 invalidates all Class=1 SLBEs and their
 		 * associated lookaside structures, which matches what
@@ -423,47 +492,29 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 		 * cache.
 		 */
 		asm volatile(PPC_SLBIA(3));
+
 	} else {
 		unsigned long offset = get_paca()->slb_cache_ptr;
 
 		if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
 		    offset <= SLB_CACHE_ENTRIES) {
-			unsigned long slbie_data = 0;
-
-			for (i = 0; i < offset; i++) {
-				unsigned long ea;
-
-				ea = (unsigned long)
-					get_paca()->slb_cache[i] << SID_SHIFT;
-				/*
-				 * Could assert_slb_presence(true) here, but
-				 * hypervisor or machine check could have come
-				 * in and removed the entry at this point.
-				 */
-
-				slbie_data = ea;
-				slbie_data |= user_segment_size(slbie_data)
-						<< SLBIE_SSIZE_SHIFT;
-				slbie_data |= SLBIE_C; /* user slbs have C=1 */
-				asm volatile("slbie %0" : : "r" (slbie_data));
-			}
+			/*
+			 * Could assert_slb_presence(true) here, but
+			 * hypervisor or machine check could have come
+			 * in and removed the entry at this point.
+			 */
+
+			for (i = 0; i < offset; i++)
+				slb_cache_slbie_user(i);
 
 			/* Workaround POWER5 < DD2.1 issue */
 			if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
-				asm volatile("slbie %0" : : "r" (slbie_data));
+				slb_cache_slbie_user(0);
 
 		} else {
-			struct slb_shadow *p = get_slb_shadow();
-			unsigned long ksp_esid_data =
-				be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
-			unsigned long ksp_vsid_data =
-				be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
-
-			asm volatile(PPC_SLBIA(1) "\n"
-				     "slbmte	%0,%1\n"
-				     "isync"
-				     :: "r"(ksp_vsid_data),
-					"r"(ksp_esid_data));
+			/* Flush but retain kernel lookaside information */
+			__slb_flush_and_restore_bolted(true);
+			isync();
 
 			get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
 		}
@@ -503,7 +554,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 	 * address accesses by the kernel (user mode won't happen until
 	 * rfid, which is safe).
 	 */
-	asm volatile("isync" : : : "memory");
+	isync();
 }
 
 void slb_set_size(u16 size)
@@ -571,6 +622,9 @@ static void slb_cache_update(unsigned long esid_data)
 	if (cpu_has_feature(CPU_FTR_ARCH_300))
 		return; /* ISAv3.0B and later does not use slb_cache */
 
+	if (stress_slb())
+		return;
+
 	/*
 	 * Now update slb cache entries
 	 */
@@ -580,7 +634,7 @@ static void slb_cache_update(unsigned long esid_data)
 		 * We have space in slb cache for optimized switch_slb().
 		 * Top 36 bits from esid_data as per ISA
 		 */
-		local_paca->slb_cache[slb_cache_index++] = esid_data >> 28;
+		local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
 		local_paca->slb_cache_ptr++;
 	} else {
 		/*
@@ -671,6 +725,28 @@ static long slb_insert_entry(unsigned long ea, unsigned long context,
 	 * accesses user memory before it returns to userspace with rfid.
 	 */
 	assert_slb_presence(false, ea);
+	if (stress_slb()) {
+		int slb_cache_index = local_paca->slb_cache_ptr;
+
+		/*
+		 * stress_slb() does not use slb cache, repurpose as a
+		 * cache of inserted (non-bolted) kernel SLB entries. All
+		 * non-bolted kernel entries are flushed on any user fault,
+		 * or if there are already 3 non-boled kernel entries.
+		 */
+		BUILD_BUG_ON(SLB_CACHE_ENTRIES < 3);
+		if (!kernel || slb_cache_index == 3) {
+			int i;
+
+			for (i = 0; i < slb_cache_index; i++)
+				slb_cache_slbie_kernel(i);
+			slb_cache_index = 0;
+		}
+
+		if (kernel)
+			local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
+		local_paca->slb_cache_ptr = slb_cache_index;
+	}
 	asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
 
 	barrier();
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v4 2/2] powerpc/64s/hash: Add stress_hpt kernel boot option to increase hash faults
  2020-05-11 12:58 [PATCH v4 1/2] powerpc/64s/hash: Add stress_slb kernel boot option to increase SLB faults Michael Ellerman
@ 2020-05-11 12:58 ` Michael Ellerman
  2020-05-13  4:50   ` Nicholas Piggin
  2020-06-09  5:29 ` [PATCH v4 1/2] powerpc/64s/hash: Add stress_slb kernel boot option to increase SLB faults Michael Ellerman
  1 sibling, 1 reply; 4+ messages in thread
From: Michael Ellerman @ 2020-05-11 12:58 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: npiggin

From: Nicholas Piggin <npiggin@gmail.com>

This option increases the number of hash misses by limiting the number
of kernel HPT entries, by accessing the address immediately after
installing the PTE, then removing it again (except in the case of CI
entries that must not be accessed, these are removed on the next hash
fault).

This helps stress test difficult to hit paths in the kernel.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Relocate the code into arch/powerpc/mm, s/torture/stress/]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../admin-guide/kernel-parameters.txt         |  9 ++++
 arch/powerpc/mm/book3s64/hash_4k.c            |  6 +++
 arch/powerpc/mm/book3s64/hash_64k.c           | 11 ++++
 arch/powerpc/mm/book3s64/hash_utils.c         | 54 +++++++++++++++++++
 arch/powerpc/mm/book3s64/internal.h           | 10 ++++
 5 files changed, 90 insertions(+)

v4: mpe: Relocate the code into arch/powerpc/mm, s/torture/stress/

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 26ef1d74e642..c446a176f9c5 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -876,6 +876,15 @@
 			them frequently to increase the rate of SLB faults
 			on kernel addresses.
 
+	stress_hpt	[PPC]
+			Limits the number of kernel HPT entries in the hash
+			page table to increase the rate of hash page table
+			faults on kernel addresses.
+
+			This may hang when run on processors / emulators which
+			do not have a TLB, or flush it more often than
+			required, QEMU seems to have problems.
+
 	disable=	[IPV6]
 			See Documentation/networking/ipv6.txt.
 
diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c
index 22e787123cdf..ff65b3028745 100644
--- a/arch/powerpc/mm/book3s64/hash_4k.c
+++ b/arch/powerpc/mm/book3s64/hash_4k.c
@@ -16,6 +16,9 @@
 #include <asm/machdep.h>
 #include <asm/mmu.h>
 
+#include "internal.h"
+
+
 int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		   pte_t *ptep, unsigned long trap, unsigned long flags,
 		   int ssize, int subpg_prot)
@@ -118,6 +121,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		}
 		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
 		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
+
+		if (stress_hpt())
+			hpt_do_stress(ea, access, rflags, hpte_group);
 	}
 	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
 	return 0;
diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c
index 7084ce2951e6..11d21ec4e14d 100644
--- a/arch/powerpc/mm/book3s64/hash_64k.c
+++ b/arch/powerpc/mm/book3s64/hash_64k.c
@@ -16,6 +16,9 @@
 #include <asm/machdep.h>
 #include <asm/mmu.h>
 
+#include "internal.h"
+
+
 /*
  * Return true, if the entry has a slot value which
  * the software considers as invalid.
@@ -216,6 +219,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 	new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
 	new_pte |= H_PAGE_HASHPTE;
 
+	if (stress_hpt())
+		hpt_do_stress(ea, access, rflags, hpte_group);
+
 	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
 	return 0;
 }
@@ -327,7 +333,12 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 
 		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
 		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
+
+		if (stress_hpt())
+			hpt_do_stress(ea, access, rflags, hpte_group);
 	}
+
 	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
+
 	return 0;
 }
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 622c6e8e9fa6..f048d23338de 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -857,6 +857,20 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
 	pr_info("Partition table %p\n", partition_tb);
 }
 
+__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_hpt_key);
+
+static bool stress_hpt_enabled __initdata;
+
+/* per-CPU array allocated if we enable stress_hpt. */
+static unsigned long *stress_hpt_last_group __ro_after_init;
+
+static int __init parse_stress_hpt(char *p)
+{
+	stress_hpt_enabled = true;
+	return 0;
+}
+early_param("stress_hpt", parse_stress_hpt);
+
 static void __init htab_initialize(void)
 {
 	unsigned long table;
@@ -876,6 +890,15 @@ static void __init htab_initialize(void)
 	if (stress_slb_enabled)
 		static_branch_enable(&stress_slb_key);
 
+	if (stress_hpt_enabled) {
+		unsigned long *tmp;
+		static_branch_enable(&stress_hpt_key);
+		tmp = memblock_alloc(sizeof(unsigned long) * NR_CPUS,
+				     sizeof(unsigned long));
+		memset(tmp, 0xff, sizeof(unsigned long) * NR_CPUS);
+		stress_hpt_last_group = tmp;
+	}
+
 	/*
 	 * Calculate the required size of the htab.  We want the number of
 	 * PTEGs to equal one half the number of real pages.
@@ -1860,6 +1883,37 @@ long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
 	return slot;
 }
 
+void hpt_do_stress(unsigned long ea, unsigned long access,
+		   unsigned long rflags, unsigned long hpte_group)
+{
+	unsigned long last_group;
+	int cpu = raw_smp_processor_id();
+
+	last_group = stress_hpt_last_group[cpu];
+	if (last_group != -1UL) {
+		while (mmu_hash_ops.hpte_remove(last_group) != -1)
+			;
+		stress_hpt_last_group[cpu] = -1UL;
+	}
+
+	if (ea >= PAGE_OFFSET) {
+		/*
+		 * We would really like to prefetch here to get the TLB loaded,
+		 * then remove the PTE before returning to userspace, to
+		 * increase the hash fault rate.
+		 *
+		 * Unfortunately QEMU TCG does not model the TLB in a way that
+		 * makes this possible, and systemsim (mambo) emulator does not
+		 * bring in TLBs with prefetches (although loads/stores do
+		 * work for non-CI PTEs).
+		 *
+		 * So remember this PTE and clear it on the next hash fault.
+		 */
+		stress_hpt_last_group[cpu] = hpte_group;
+	}
+}
+
+
 #ifdef CONFIG_DEBUG_PAGEALLOC
 static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
 {
diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h
index 7eda0d30d765..de824375b555 100644
--- a/arch/powerpc/mm/book3s64/internal.h
+++ b/arch/powerpc/mm/book3s64/internal.h
@@ -13,4 +13,14 @@ static inline bool stress_slb(void)
 	return static_branch_unlikely(&stress_slb_key);
 }
 
+DECLARE_STATIC_KEY_FALSE(stress_hpt_key);
+
+static inline bool stress_hpt(void)
+{
+	return static_branch_unlikely(&stress_hpt_key);
+}
+
+void hpt_do_stress(unsigned long ea, unsigned long access,
+		   unsigned long rflags, unsigned long hpte_group);
+
 #endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v4 2/2] powerpc/64s/hash: Add stress_hpt kernel boot option to increase hash faults
  2020-05-11 12:58 ` [PATCH v4 2/2] powerpc/64s/hash: Add stress_hpt kernel boot option to increase hash faults Michael Ellerman
@ 2020-05-13  4:50   ` Nicholas Piggin
  0 siblings, 0 replies; 4+ messages in thread
From: Nicholas Piggin @ 2020-05-13  4:50 UTC (permalink / raw)
  To: linuxppc-dev, Michael Ellerman

Excerpts from Michael Ellerman's message of May 11, 2020 10:58 pm:
> +void hpt_do_stress(unsigned long ea, unsigned long access,
> +		   unsigned long rflags, unsigned long hpte_group)
> +{
> +	unsigned long last_group;
> +	int cpu = raw_smp_processor_id();
> +
> +	last_group = stress_hpt_last_group[cpu];
> +	if (last_group != -1UL) {
> +		while (mmu_hash_ops.hpte_remove(last_group) != -1)
> +			;

This seems to have issues causing hangs and livelocking, particularly on 
SMP guests. I think another CPU taking a fault and inserting an HPTE
into this group can get stuck when it has its entry removed before it can
return from the fault and load its TLB, so it faults again.

The hpte_remove hypercall must be slow enough on a guest that this loop 
doesn't finish before the other CPU comes in and puts another entry in 
there. Which explains why I didn't see it on bare metal.

Removing the loop doesn't end up generating a lot of faults because most 
HPTEs are userspace, so they end up overwhelming the kernel HPTE 
removal.

Using hpte_invalidate to invalidate the specific entry might be the go,
although it removes some element of randomness at least on PowerNV -- 
the kernel TLB is still there and will fault "some time" when the TLB 
entry drops out.

Maybe hpt stress should go into the hash implementation. I'm thinking 
about what to do.

Better drop this patch for now, but the SLB one should be good to go.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v4 1/2] powerpc/64s/hash: Add stress_slb kernel boot option to increase SLB faults
  2020-05-11 12:58 [PATCH v4 1/2] powerpc/64s/hash: Add stress_slb kernel boot option to increase SLB faults Michael Ellerman
  2020-05-11 12:58 ` [PATCH v4 2/2] powerpc/64s/hash: Add stress_hpt kernel boot option to increase hash faults Michael Ellerman
@ 2020-06-09  5:29 ` Michael Ellerman
  1 sibling, 0 replies; 4+ messages in thread
From: Michael Ellerman @ 2020-06-09  5:29 UTC (permalink / raw)
  To: linuxppc-dev, Michael Ellerman; +Cc: npiggin

On Mon, 11 May 2020 22:58:24 +1000, Michael Ellerman wrote:
> This option increases the number of SLB misses by limiting the number
> of kernel SLB entries, and increased flushing of cached lookaside
> information. This helps stress test difficult to hit paths in the
> kernel.
> 
> [mpe: Relocate the code into arch/powerpc/mm, s/torture/stress/]

Applied to powerpc/next.

[1/1] powerpc/64s/hash: Add stress_slb kernel boot option to increase SLB faults
      https://git.kernel.org/powerpc/c/82a1b8ed5604cccf30b6ff03bcd61640cd26369b

cheers

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-06-09  6:32 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-11 12:58 [PATCH v4 1/2] powerpc/64s/hash: Add stress_slb kernel boot option to increase SLB faults Michael Ellerman
2020-05-11 12:58 ` [PATCH v4 2/2] powerpc/64s/hash: Add stress_hpt kernel boot option to increase hash faults Michael Ellerman
2020-05-13  4:50   ` Nicholas Piggin
2020-06-09  5:29 ` [PATCH v4 1/2] powerpc/64s/hash: Add stress_slb kernel boot option to increase SLB faults Michael Ellerman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.