All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] powerpc: mm_cpumask cleanups and lazy tlb mm
@ 2023-05-24  6:08 Nicholas Piggin
  2023-05-24  6:08 ` [PATCH 1/4] powerpc: Account mm_cpumask and active_cpus in init_mm Nicholas Piggin
                   ` (4 more replies)
  0 siblings, 5 replies; 9+ messages in thread
From: Nicholas Piggin @ 2023-05-24  6:08 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

In the process of doing patch 4, I found a few things we could improve
and tighten up with mm_cpumask handling, so added those first. They're
mostly just debugging, no real fixes or dependency on patch 4 there.

Thanks,
Nick

Nicholas Piggin (4):
  powerpc: Account mm_cpumask and active_cpus in init_mm
  powerpc/64s: Use dec_mm_active_cpus helper
  powerpc: Add mm_cpumask warning when context switching
  powerpc/64s/radix: combine final TLB flush and lazy tlb mm shootdown
    IPIs

 arch/powerpc/include/asm/book3s/64/mmu.h |  2 +-
 arch/powerpc/include/asm/mmu_context.h   |  1 +
 arch/powerpc/kernel/setup-common.c       |  6 ++++-
 arch/powerpc/kernel/smp.c                | 12 ++++++++++
 arch/powerpc/mm/book3s64/radix_tlb.c     | 28 ++++++++++++++++++++++--
 arch/powerpc/mm/mmu_context.c            |  8 +++++--
 6 files changed, 51 insertions(+), 6 deletions(-)

-- 
2.40.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/4] powerpc: Account mm_cpumask and active_cpus in init_mm
  2023-05-24  6:08 [PATCH 0/4] powerpc: mm_cpumask cleanups and lazy tlb mm Nicholas Piggin
@ 2023-05-24  6:08 ` Nicholas Piggin
  2023-05-24  6:08 ` [PATCH 2/4] powerpc/64s: Use dec_mm_active_cpus helper Nicholas Piggin
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 9+ messages in thread
From: Nicholas Piggin @ 2023-05-24  6:08 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

init_mm mm_cpumask and context.active_cpus is not maintained at boot
and hotplug. This seems to be harmless because init_mm does not have a
userspace and so never gets user TLBs flushed, but it looks odd and it
prevents some sanity checks being added.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/setup-common.c |  6 +++++-
 arch/powerpc/kernel/smp.c          | 12 ++++++++++++
 arch/powerpc/mm/mmu_context.c      |  1 +
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index d2a446216444..16843294d978 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -969,8 +969,12 @@ void __init setup_arch(char **cmdline_p)
 	klp_init_thread_info(&init_task);
 
 	setup_initial_init_mm(_stext, _etext, _edata, _end);
-
+	/* sched_init() does the mmgrab(&init_mm) for the primary CPU */
+	VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
+	cpumask_set_cpu(smp_processor_id(), mm_cpumask(&init_mm));
+	inc_mm_active_cpus(&init_mm);
 	mm_iommu_init(&init_mm);
+
 	irqstack_early_init();
 	exc_lvl_early_init();
 	emergency_stack_init();
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 265801a3e94c..76a57dc753c8 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -47,6 +47,7 @@
 #include <asm/smp.h>
 #include <asm/time.h>
 #include <asm/machdep.h>
+#include <asm/mmu_context.h>
 #include <asm/cputhreads.h>
 #include <asm/cputable.h>
 #include <asm/mpic.h>
@@ -1615,6 +1616,9 @@ void start_secondary(void *unused)
 
 	mmgrab_lazy_tlb(&init_mm);
 	current->active_mm = &init_mm;
+	VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
+	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
+	inc_mm_active_cpus(&init_mm);
 
 	smp_store_cpu_info(cpu);
 	set_dec(tb_ticks_per_jiffy);
@@ -1750,6 +1754,14 @@ int __cpu_disable(void)
 
 void __cpu_die(unsigned int cpu)
 {
+	/*
+	 * This could perhaps be a generic call in idlea_task_dead(), but
+	 * that requires testing from all archs, so first put it here to
+	 */
+	VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(&init_mm)));
+	dec_mm_active_cpus(&init_mm);
+	cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+
 	if (smp_ops->cpu_die)
 		smp_ops->cpu_die(cpu);
 }
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 1fb9c99f8679..894468975a44 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -47,6 +47,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
 	/* Mark this context has been used on the new CPU */
 	if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
+		VM_WARN_ON_ONCE(next == &init_mm);
 		cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
 		inc_mm_active_cpus(next);
 
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/4] powerpc/64s: Use dec_mm_active_cpus helper
  2023-05-24  6:08 [PATCH 0/4] powerpc: mm_cpumask cleanups and lazy tlb mm Nicholas Piggin
  2023-05-24  6:08 ` [PATCH 1/4] powerpc: Account mm_cpumask and active_cpus in init_mm Nicholas Piggin
@ 2023-05-24  6:08 ` Nicholas Piggin
  2023-05-24  6:08 ` [PATCH 3/4] powerpc: Add mm_cpumask warning when context switching Nicholas Piggin
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 9+ messages in thread
From: Nicholas Piggin @ 2023-05-24  6:08 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

Avoid open-coded atomic_dec on mm->context.active_cpus and use the
function made for it. Add CONFIG_DEBUG_VM underflow checking on the
counter.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/book3s/64/mmu.h | 2 +-
 arch/powerpc/include/asm/mmu_context.h   | 1 +
 arch/powerpc/mm/book3s64/radix_tlb.c     | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 570a4960cf17..5cf0e9c953b3 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -261,7 +261,7 @@ static inline void radix_init_pseries(void) { }
 #define arch_clear_mm_cpumask_cpu(cpu, mm)				\
 	do {								\
 		if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {		\
-			atomic_dec(&(mm)->context.active_cpus);		\
+			dec_mm_active_cpus(mm);				\
 			cpumask_clear_cpu(cpu, mm_cpumask(mm));		\
 		}							\
 	} while (0)
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 57f5017111f4..37bffa0f7918 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -127,6 +127,7 @@ static inline void inc_mm_active_cpus(struct mm_struct *mm)
 
 static inline void dec_mm_active_cpus(struct mm_struct *mm)
 {
+	VM_WARN_ON_ONCE(atomic_read(&mm->context.active_cpus) <= 0);
 	atomic_dec(&mm->context.active_cpus);
 }
 
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 90953cf9f648..8160c1630c3d 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -808,7 +808,7 @@ void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
 	 * that's what the caller expects.
 	 */
 	if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
-		atomic_dec(&mm->context.active_cpus);
+		dec_mm_active_cpus(mm);
 		cpumask_clear_cpu(cpu, mm_cpumask(mm));
 		always_flush = true;
 	}
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/4] powerpc: Add mm_cpumask warning when context switching
  2023-05-24  6:08 [PATCH 0/4] powerpc: mm_cpumask cleanups and lazy tlb mm Nicholas Piggin
  2023-05-24  6:08 ` [PATCH 1/4] powerpc: Account mm_cpumask and active_cpus in init_mm Nicholas Piggin
  2023-05-24  6:08 ` [PATCH 2/4] powerpc/64s: Use dec_mm_active_cpus helper Nicholas Piggin
@ 2023-05-24  6:08 ` Nicholas Piggin
  2023-08-18  7:22   ` Michael Ellerman
  2023-05-24  6:08 ` [PATCH 4/4] powerpc/64s/radix: combine final TLB flush and lazy tlb mm shootdown IPIs Nicholas Piggin
  2023-08-10  6:02 ` [PATCH 0/4] powerpc: mm_cpumask cleanups and lazy tlb mm Michael Ellerman
  4 siblings, 1 reply; 9+ messages in thread
From: Nicholas Piggin @ 2023-05-24  6:08 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

When context switching away from an mm, add a CONFIG_DEBUG_VM warning
check to ensure this CPU is still set in the mask. This could catch
bugs where the mask is improperly trimmed while the CPU is still using
the mm.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/mm/mmu_context.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 894468975a44..b24c19078eb1 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -43,12 +43,13 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
 void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			struct task_struct *tsk)
 {
+	int cpu = smp_processor_id();
 	bool new_on_cpu = false;
 
 	/* Mark this context has been used on the new CPU */
-	if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
+	if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
 		VM_WARN_ON_ONCE(next == &init_mm);
-		cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+		cpumask_set_cpu(cpu, mm_cpumask(next));
 		inc_mm_active_cpus(next);
 
 		/*
@@ -101,6 +102,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * sub architectures. Out of line for now
 	 */
 	switch_mmu_context(prev, next, tsk);
+
+	VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(prev)));
 }
 
 #ifndef CONFIG_PPC_BOOK3S_64
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/4] powerpc/64s/radix: combine final TLB flush and lazy tlb mm shootdown IPIs
  2023-05-24  6:08 [PATCH 0/4] powerpc: mm_cpumask cleanups and lazy tlb mm Nicholas Piggin
                   ` (2 preceding siblings ...)
  2023-05-24  6:08 ` [PATCH 3/4] powerpc: Add mm_cpumask warning when context switching Nicholas Piggin
@ 2023-05-24  6:08 ` Nicholas Piggin
  2023-07-18  2:54   ` Michael Ellerman
  2023-08-10  6:02 ` [PATCH 0/4] powerpc: mm_cpumask cleanups and lazy tlb mm Michael Ellerman
  4 siblings, 1 reply; 9+ messages in thread
From: Nicholas Piggin @ 2023-05-24  6:08 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

This performs lazy tlb mm shootdown when doing the exit TLB flush when
all mm users go away and user mappings are removed, which avoids having
to do the lazy tlb mm shootdown IPIs on the final mmput when all kernel
references disappear.

powerpc/64s uses a broadcast TLBIE for the exit TLB flush if remote CPUs
need to be invalidated (unless TLBIE is disabled), so this doesn't
necessarily save IPIs but it does avoid a broadcast TLBIE which is quite
expensive.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/mm/book3s64/radix_tlb.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 8160c1630c3d..e2aaee6df1f6 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -1301,7 +1301,31 @@ void radix__tlb_flush(struct mmu_gather *tlb)
 	 * See the comment for radix in arch_exit_mmap().
 	 */
 	if (tlb->fullmm) {
-		__flush_all_mm(mm, true);
+		if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
+			/*
+			 * Shootdown based lazy tlb mm refcounting means we
+			 * have to IPI everyone in the mm_cpumask anyway soon
+			 * when the mm goes away, so might as well do it as
+			 * part of the final flush now.
+			 *
+			 * If lazy shootdown was improved to reduce IPIs (e.g.,
+			 * by batching), then it may end up being better to use
+			 * tlbies here instead.
+			 */
+			smp_mb(); /* see radix__flush_tlb_mm */
+			exit_flush_lazy_tlbs(mm);
+			_tlbiel_pid(mm->context.id, RIC_FLUSH_ALL);
+
+			/*
+			 * It should not be possible to have coprocessors still
+			 * attached here.
+			 */
+			if (WARN_ON_ONCE(atomic_read(&mm->context.copros) > 0))
+				__flush_all_mm(mm, true);
+		} else {
+			__flush_all_mm(mm, true);
+		}
+
 	} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
 		if (!tlb->freed_tables)
 			radix__flush_tlb_mm(mm);
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4] powerpc/64s/radix: combine final TLB flush and lazy tlb mm shootdown IPIs
  2023-05-24  6:08 ` [PATCH 4/4] powerpc/64s/radix: combine final TLB flush and lazy tlb mm shootdown IPIs Nicholas Piggin
@ 2023-07-18  2:54   ` Michael Ellerman
  2023-07-21  1:37     ` Nicholas Piggin
  0 siblings, 1 reply; 9+ messages in thread
From: Michael Ellerman @ 2023-07-18  2:54 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev; +Cc: Nicholas Piggin

Nicholas Piggin <npiggin@gmail.com> writes:
> This performs lazy tlb mm shootdown when doing the exit TLB flush when
> all mm users go away and user mappings are removed, which avoids having
> to do the lazy tlb mm shootdown IPIs on the final mmput when all kernel
> references disappear.
>
> powerpc/64s uses a broadcast TLBIE for the exit TLB flush if remote CPUs
> need to be invalidated (unless TLBIE is disabled), so this doesn't
> necessarily save IPIs but it does avoid a broadcast TLBIE which is quite
> expensive.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/mm/book3s64/radix_tlb.c | 26 +++++++++++++++++++++++++-
>  1 file changed, 25 insertions(+), 1 deletion(-)

This gives me:

[    1.438910][    T1] Run /init as init process
[    1.442759][   T96] ------------[ cut here ]------------
[    1.442836][   T96] WARNING: CPU: 0 PID: 96 at kernel/smp.c:748 smp_call_function_many_cond+0xe0/0xad0
[    1.442920][   T96] Modules linked in:
[    1.442960][   T96] CPU: 0 PID: 96 Comm: init Not tainted 6.5.0-rc2-g1954d181ea09 #168
[    1.443028][   T96] Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,git-6b6c16 hv:linux,kvm pSeries
[    1.443126][   T96] NIP:  c0000000002aab20 LR: c0000000000a5fc4 CTR: 0000000000000000
[    1.443199][   T96] REGS: c00000000c36f5b0 TRAP: 0700   Not tainted  (6.5.0-rc2-g1954d181ea09)
[    1.443280][   T96] MSR:  8000000000029033 <SF,EE,ME,IR,DR,RI,LE>  CR: 44008244  XER: 20040000
[    1.443382][   T96] CFAR: c0000000002ab524 IRQMASK: 0
[    1.443382][   T96] GPR00: c0000000000a5fc4 c00000000c36f850 c0000000017f9000 c00000000617c580
[    1.443382][   T96] GPR04: c0000000000a55b0 c00000000617bd00 0000000000000001 0000000000000001
[    1.443382][   T96] GPR08: c0000000029fc88c c00000000c25aa00 0000000000000000 0000000044008244
[    1.443382][   T96] GPR12: 00000000fd780000 c0000000036c0000 0000000000000000 c000000004042a00
[    1.443382][   T96] GPR16: 0000000000000001 0000000000000000 0000000000000000 0000000000000000
[    1.443382][   T96] GPR20: ffffffffffffffff c0000000000a5fc4 0000000000000000 c0000000029f85d0
[    1.443382][   T96] GPR24: c00000000c25b518 0000000000000000 c00000000617be60 c00000000617bd00
[    1.443382][   T96] GPR28: c00000000617c580 c0000000000a55b0 0000000000000000 0000000000000000
[    1.443994][   T96] NIP [c0000000002aab20] smp_call_function_many_cond+0xe0/0xad0
[    1.444069][   T96] LR [c0000000000a5fc4] radix__tlb_flush+0xf4/0x190
[    1.444133][   T96] Call Trace:
[    1.444172][   T96] [c00000000c36f850] [ffffffffffffffff] 0xffffffffffffffff (unreliable)
[    1.444250][   T96] [c00000000c36f920] [c0000000029f7fe0] __cpu_possible_mask+0x0/0x100
[    1.444326][   T96] [c00000000c36f950] [c0000000004f346c] tlb_finish_mmu+0x16c/0x220
[    1.444402][   T96] [c00000000c36f980] [c0000000004ee894] exit_mmap+0x1b4/0x580
[    1.444474][   T96] [c00000000c36faa0] [c00000000014c140] __mmput+0x60/0x1c0
[    1.444546][   T96] [c00000000c36fae0] [c0000000005cf014] begin_new_exec+0x5d4/0xec0
[    1.444622][   T96] [c00000000c36fb60] [c00000000066c6e8] load_elf_binary+0x4a8/0x1cf0
[    1.444697][   T96] [c00000000c36fc60] [c0000000005cc410] bprm_execve+0x3b0/0xa60
[    1.444773][   T96] [c00000000c36fd30] [c0000000005ce3a0] do_execveat_common+0x1d0/0x300
[    1.444852][   T96] [c00000000c36fde0] [c0000000005ce524] sys_execve+0x54/0x70
[    1.444928][   T96] [c00000000c36fe10] [c000000000031c24] system_call_exception+0x134/0x360
[    1.445000][   T96] [c00000000c36fe50] [c00000000000d6a0] system_call_common+0x160/0x2c4
[    1.445070][   T96] --- interrupt: c00 at 0x7fffb664cc98
[    1.445119][   T96] NIP:  00007fffb664cc98 LR: 000000001004bcb0 CTR: 0000000000000000
[    1.445189][   T96] REGS: c00000000c36fe80 TRAP: 0c00   Not tainted  (6.5.0-rc2-g1954d181ea09)
[    1.445271][   T96] MSR:  800000000280f033 <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 22004842  XER: 00000000
[    1.445390][   T96] IRQMASK: 0
[    1.445390][   T96] GPR00: 000000000000000b 00007fffd9d11ec0 00007fffb6767300 000000002b3f06e8
[    1.445390][   T96] GPR04: 000000002b3f0780 000000002b3f07b0 0000000000000000 0000000000000000
[    1.445390][   T96] GPR08: 000000002b3f06e8 0000000000000000 0000000000000000 0000000000000000
[    1.445390][   T96] GPR12: 0000000000000000 00007fffb683a930 00000000100f0ff8 0000000000000000
[    1.445390][   T96] GPR16: 0000000000000000 00007fffd9d12020 000000002b3f0780 0000000000000000
[    1.445390][   T96] GPR20: 000000002b3f0778 000000002b3f1330 0000000000000000 00000000100c6cb0
[    1.445390][   T96] GPR24: 0000000000000000 0000000000000000 ffffffffffffffff ffffffffffffffff
[    1.445390][   T96] GPR28: 00000000100d34ae 00000000100c6cf8 000000002b3f0780 000000002b3f06e8
[    1.446042][   T96] NIP [00007fffb664cc98] 0x7fffb664cc98
[    1.446095][   T96] LR [000000001004bcb0] 0x1004bcb0
[    1.446147][   T96] --- interrupt: c00
[    1.446186][   T96] Code: 81490000 394a0001 91490000 e8ed0030 3d420097 394ae900 7cea382e 81490000 2c070000 394affff 91490000 41820044 <0fe00000> faa100e0 f8410018 f9c10040
[    1.446356][   T96] irq event stamp: 458
[    1.446395][   T96] hardirqs last  enabled at (457): [<c000000000568638>] __slab_free+0x228/0x560
[    1.446481][   T96] hardirqs last disabled at (458): [<c00000000002a6a0>] interrupt_enter_prepare+0x90/0x220
[    1.446577][   T96] softirqs last  enabled at (0): [<c00000000014f658>] copy_process+0x9f8/0x20b0
[    1.446661][   T96] softirqs last disabled at (0): [<0000000000000000>] 0x0
[    1.446723][   T96] ---[ end trace 0000000000000000 ]---

Which is:

static void smp_call_function_many_cond(const struct cpumask *mask,
					smp_call_func_t func, void *info,
					unsigned int scf_flags,
					smp_cond_func_t cond_func)
{
	int cpu, last_cpu, this_cpu = smp_processor_id();
	struct call_function_data *cfd;
	bool wait = scf_flags & SCF_WAIT;
	int nr_cpus = 0;
	bool run_remote = false;
	bool run_local = false;

	lockdep_assert_preemption_disabled();

Called from exit_flush_lazy_tlbs().

cheers

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4] powerpc/64s/radix: combine final TLB flush and lazy tlb mm shootdown IPIs
  2023-07-18  2:54   ` Michael Ellerman
@ 2023-07-21  1:37     ` Nicholas Piggin
  0 siblings, 0 replies; 9+ messages in thread
From: Nicholas Piggin @ 2023-07-21  1:37 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev

On Tue Jul 18, 2023 at 12:54 PM AEST, Michael Ellerman wrote:
> Nicholas Piggin <npiggin@gmail.com> writes:
> > This performs lazy tlb mm shootdown when doing the exit TLB flush when
> > all mm users go away and user mappings are removed, which avoids having
> > to do the lazy tlb mm shootdown IPIs on the final mmput when all kernel
> > references disappear.
> >
> > powerpc/64s uses a broadcast TLBIE for the exit TLB flush if remote CPUs
> > need to be invalidated (unless TLBIE is disabled), so this doesn't
> > necessarily save IPIs but it does avoid a broadcast TLBIE which is quite
> > expensive.
> >
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> >  arch/powerpc/mm/book3s64/radix_tlb.c | 26 +++++++++++++++++++++++++-
> >  1 file changed, 25 insertions(+), 1 deletion(-)
>
> This gives me:
>
> [    1.438910][    T1] Run /init as init process
> [    1.442759][   T96] ------------[ cut here ]------------
> [    1.442836][   T96] WARNING: CPU: 0 PID: 96 at kernel/smp.c:748 smp_call_function_many_cond+0xe0/0xad0
> [    1.442920][   T96] Modules linked in:
> [    1.442960][   T96] CPU: 0 PID: 96 Comm: init Not tainted 6.5.0-rc2-g1954d181ea09 #168
> [    1.443028][   T96] Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,git-6b6c16 hv:linux,kvm pSeries
> [    1.443126][   T96] NIP:  c0000000002aab20 LR: c0000000000a5fc4 CTR: 0000000000000000
> [    1.443199][   T96] REGS: c00000000c36f5b0 TRAP: 0700   Not tainted  (6.5.0-rc2-g1954d181ea09)
> [    1.443280][   T96] MSR:  8000000000029033 <SF,EE,ME,IR,DR,RI,LE>  CR: 44008244  XER: 20040000
> [    1.443382][   T96] CFAR: c0000000002ab524 IRQMASK: 0
> [    1.443382][   T96] GPR00: c0000000000a5fc4 c00000000c36f850 c0000000017f9000 c00000000617c580
> [    1.443382][   T96] GPR04: c0000000000a55b0 c00000000617bd00 0000000000000001 0000000000000001
> [    1.443382][   T96] GPR08: c0000000029fc88c c00000000c25aa00 0000000000000000 0000000044008244
> [    1.443382][   T96] GPR12: 00000000fd780000 c0000000036c0000 0000000000000000 c000000004042a00
> [    1.443382][   T96] GPR16: 0000000000000001 0000000000000000 0000000000000000 0000000000000000
> [    1.443382][   T96] GPR20: ffffffffffffffff c0000000000a5fc4 0000000000000000 c0000000029f85d0
> [    1.443382][   T96] GPR24: c00000000c25b518 0000000000000000 c00000000617be60 c00000000617bd00
> [    1.443382][   T96] GPR28: c00000000617c580 c0000000000a55b0 0000000000000000 0000000000000000
> [    1.443994][   T96] NIP [c0000000002aab20] smp_call_function_many_cond+0xe0/0xad0
> [    1.444069][   T96] LR [c0000000000a5fc4] radix__tlb_flush+0xf4/0x190
> [    1.444133][   T96] Call Trace:
> [    1.444172][   T96] [c00000000c36f850] [ffffffffffffffff] 0xffffffffffffffff (unreliable)
> [    1.444250][   T96] [c00000000c36f920] [c0000000029f7fe0] __cpu_possible_mask+0x0/0x100
> [    1.444326][   T96] [c00000000c36f950] [c0000000004f346c] tlb_finish_mmu+0x16c/0x220
> [    1.444402][   T96] [c00000000c36f980] [c0000000004ee894] exit_mmap+0x1b4/0x580
> [    1.444474][   T96] [c00000000c36faa0] [c00000000014c140] __mmput+0x60/0x1c0
> [    1.444546][   T96] [c00000000c36fae0] [c0000000005cf014] begin_new_exec+0x5d4/0xec0
> [    1.444622][   T96] [c00000000c36fb60] [c00000000066c6e8] load_elf_binary+0x4a8/0x1cf0
> [    1.444697][   T96] [c00000000c36fc60] [c0000000005cc410] bprm_execve+0x3b0/0xa60
> [    1.444773][   T96] [c00000000c36fd30] [c0000000005ce3a0] do_execveat_common+0x1d0/0x300
> [    1.444852][   T96] [c00000000c36fde0] [c0000000005ce524] sys_execve+0x54/0x70
> [    1.444928][   T96] [c00000000c36fe10] [c000000000031c24] system_call_exception+0x134/0x360
> [    1.445000][   T96] [c00000000c36fe50] [c00000000000d6a0] system_call_common+0x160/0x2c4
> [    1.445070][   T96] --- interrupt: c00 at 0x7fffb664cc98
> [    1.445119][   T96] NIP:  00007fffb664cc98 LR: 000000001004bcb0 CTR: 0000000000000000
> [    1.445189][   T96] REGS: c00000000c36fe80 TRAP: 0c00   Not tainted  (6.5.0-rc2-g1954d181ea09)
> [    1.445271][   T96] MSR:  800000000280f033 <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 22004842  XER: 00000000
> [    1.445390][   T96] IRQMASK: 0
> [    1.445390][   T96] GPR00: 000000000000000b 00007fffd9d11ec0 00007fffb6767300 000000002b3f06e8
> [    1.445390][   T96] GPR04: 000000002b3f0780 000000002b3f07b0 0000000000000000 0000000000000000
> [    1.445390][   T96] GPR08: 000000002b3f06e8 0000000000000000 0000000000000000 0000000000000000
> [    1.445390][   T96] GPR12: 0000000000000000 00007fffb683a930 00000000100f0ff8 0000000000000000
> [    1.445390][   T96] GPR16: 0000000000000000 00007fffd9d12020 000000002b3f0780 0000000000000000
> [    1.445390][   T96] GPR20: 000000002b3f0778 000000002b3f1330 0000000000000000 00000000100c6cb0
> [    1.445390][   T96] GPR24: 0000000000000000 0000000000000000 ffffffffffffffff ffffffffffffffff
> [    1.445390][   T96] GPR28: 00000000100d34ae 00000000100c6cf8 000000002b3f0780 000000002b3f06e8
> [    1.446042][   T96] NIP [00007fffb664cc98] 0x7fffb664cc98
> [    1.446095][   T96] LR [000000001004bcb0] 0x1004bcb0
> [    1.446147][   T96] --- interrupt: c00
> [    1.446186][   T96] Code: 81490000 394a0001 91490000 e8ed0030 3d420097 394ae900 7cea382e 81490000 2c070000 394affff 91490000 41820044 <0fe00000> faa100e0 f8410018 f9c10040
> [    1.446356][   T96] irq event stamp: 458
> [    1.446395][   T96] hardirqs last  enabled at (457): [<c000000000568638>] __slab_free+0x228/0x560
> [    1.446481][   T96] hardirqs last disabled at (458): [<c00000000002a6a0>] interrupt_enter_prepare+0x90/0x220
> [    1.446577][   T96] softirqs last  enabled at (0): [<c00000000014f658>] copy_process+0x9f8/0x20b0
> [    1.446661][   T96] softirqs last disabled at (0): [<0000000000000000>] 0x0
> [    1.446723][   T96] ---[ end trace 0000000000000000 ]---
>
> Which is:
>
> static void smp_call_function_many_cond(const struct cpumask *mask,
> 					smp_call_func_t func, void *info,
> 					unsigned int scf_flags,
> 					smp_cond_func_t cond_func)
> {
> 	int cpu, last_cpu, this_cpu = smp_processor_id();
> 	struct call_function_data *cfd;
> 	bool wait = scf_flags & SCF_WAIT;
> 	int nr_cpus = 0;
> 	bool run_remote = false;
> 	bool run_local = false;
>
> 	lockdep_assert_preemption_disabled();
>
> Called from exit_flush_lazy_tlbs().

Hmm, I guess it just needs this?

diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index f1a1ec0d9820..30480e248183 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -1322,6 +1322,7 @@ void radix__tlb_flush(struct mmu_gather *tlb)
                         * by batching), then it may end up being better to use
                         * tlbies here instead.
                         */
+                       preempt_disable();
                        smp_mb(); /* see radix__flush_tlb_mm */
                        exit_flush_lazy_tlbs(mm);
                        _tlbiel_pid(mm->context.id, RIC_FLUSH_ALL);
@@ -1332,6 +1333,7 @@ void radix__tlb_flush(struct mmu_gather *tlb)
                         */
                        if (WARN_ON_ONCE(atomic_read(&mm->context.copros) > 0))
                                __flush_all_mm(mm, true);
+                       preempt_enable();
                } else {
                        __flush_all_mm(mm, true);
                }


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/4] powerpc: mm_cpumask cleanups and lazy tlb mm
  2023-05-24  6:08 [PATCH 0/4] powerpc: mm_cpumask cleanups and lazy tlb mm Nicholas Piggin
                   ` (3 preceding siblings ...)
  2023-05-24  6:08 ` [PATCH 4/4] powerpc/64s/radix: combine final TLB flush and lazy tlb mm shootdown IPIs Nicholas Piggin
@ 2023-08-10  6:02 ` Michael Ellerman
  4 siblings, 0 replies; 9+ messages in thread
From: Michael Ellerman @ 2023-08-10  6:02 UTC (permalink / raw)
  To: linuxppc-dev, Nicholas Piggin

On Wed, 24 May 2023 16:08:17 +1000, Nicholas Piggin wrote:
> In the process of doing patch 4, I found a few things we could improve
> and tighten up with mm_cpumask handling, so added those first. They're
> mostly just debugging, no real fixes or dependency on patch 4 there.
> 
> Thanks,
> Nick
> 
> [...]

Applied to powerpc/next.

[1/4] powerpc: Account mm_cpumask and active_cpus in init_mm
      https://git.kernel.org/powerpc/c/c3c2e93753484bb4e935ed8205c1f569907f5970
[2/4] powerpc/64s: Use dec_mm_active_cpus helper
      https://git.kernel.org/powerpc/c/f74b2a6c01a0b319070ccee7dea0cc4dad694041
[3/4] powerpc: Add mm_cpumask warning when context switching
      https://git.kernel.org/powerpc/c/177255afb40548fdf504384b361d18d6cbe35d1e
[4/4] powerpc/64s/radix: combine final TLB flush and lazy tlb mm shootdown IPIs
      https://git.kernel.org/powerpc/c/e43c0a0c3c2870e1ee29519dc249471adf19ab5f

cheers

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/4] powerpc: Add mm_cpumask warning when context switching
  2023-05-24  6:08 ` [PATCH 3/4] powerpc: Add mm_cpumask warning when context switching Nicholas Piggin
@ 2023-08-18  7:22   ` Michael Ellerman
  0 siblings, 0 replies; 9+ messages in thread
From: Michael Ellerman @ 2023-08-18  7:22 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev; +Cc: Nicholas Piggin

Nicholas Piggin <npiggin@gmail.com> writes:
> When context switching away from an mm, add a CONFIG_DEBUG_VM warning
> check to ensure this CPU is still set in the mask. This could catch
> bugs where the mask is improperly trimmed while the CPU is still using
> the mm.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/mm/mmu_context.c | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
> index 894468975a44..b24c19078eb1 100644
> --- a/arch/powerpc/mm/mmu_context.c
> +++ b/arch/powerpc/mm/mmu_context.c
> @@ -101,6 +102,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
>  	 * sub architectures. Out of line for now
>  	 */
>  	switch_mmu_context(prev, next, tsk);
> +
> +	VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(prev)));

This is popping during CPU hotunplug. I guess some confusion about when
the mask is cleared.

cheers


[  145.150374][    T0] ------------[ cut here ]------------
[  145.150459][    T0] WARNING: CPU: 5 PID: 0 at arch/powerpc/mm/mmu_context.c:106 switch_mm_irqs_off+0x320/0x340
[  145.150519][    T0] Modules linked in: bonding pseries_rng rng_core binfmt_misc aes_gcm_p10_crypto zram vmx_crypto gf128mul crc32c_vpmsum papr_scm ip6_tables ip_tables x_tables fuse autofs4
[  145.150588][    T0] CPU: 5 PID: 0 Comm: swapper/5 Not tainted 6.5.0-rc3-00084-g01477eb5e323 #47
[  145.150592][    T0] Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1030.00 (NH1030_019) hv:phyp pSeries
[  145.150595][    T0] NIP:  c0000000000cbc30 LR: c0000000000cbab0 CTR: c000000000181a40
[  145.150598][    T0] REGS: c00000000985fb10 TRAP: 0700   Not tainted  (6.5.0-rc3-00084-g01477eb5e323)
[  145.150602][    T0] MSR:  800000000282b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE>  CR: 24000208  XER: 0000011e
[  145.150625][    T0] CFAR: c0000000000cbae4 IRQMASK: 1 
[  145.150625][    T0] GPR00: c0000000000cbab0 c00000000985fdb0 c0000000027aaf00 c000000b02955f00 
[  145.150625][    T0] GPR04: c0000000043e0b00 c0000000067c7000 0000000000000000 0000000000030e2d 
[  145.150625][    T0] GPR08: c00000000451af00 0000000000000001 c00000000451af00 c00000000451af00 
[  145.150625][    T0] GPR12: c000000000181a40 c00000050fffa300 0000000000000000 000000001eed51a0 
[  145.150625][    T0] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 
[  145.150625][    T0] GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000001 
[  145.150625][    T0] GPR24: 0000000000000005 000000000000dedc c000000004468470 0000000000000001 
[  145.150625][    T0] GPR28: c0000000067c7000 0000000000000000 0000000000000005 c000000b02956780 
[  145.150711][    T0] NIP [c0000000000cbc30] switch_mm_irqs_off+0x320/0x340
[  145.150716][    T0] LR [c0000000000cbab0] switch_mm_irqs_off+0x1a0/0x340
[  145.150721][    T0] Call Trace:
[  145.150724][    T0] [c00000000985fdb0] [c000000000448688] __smp_call_single_queue+0x198/0x1f0 (unreliable)
[  145.150732][    T0] [c00000000985fdf0] [c0000000002af958] idle_task_exit+0xf8/0x230
[  145.150740][    T0] [c00000000985fe40] [c000000000181aac] pseries_cpu_offline_self+0x6c/0x230
[  145.150748][    T0] [c00000000985feb0] [c000000000092bb4] arch_cpu_idle_dead+0x64/0x90
[  145.150755][    T0] [c00000000985fee0] [c0000000002fc09c] do_idle+0x25c/0x740
[  145.150761][    T0] [c00000000985ff60] [c0000000002fcd14] cpu_startup_entry+0x84/0xa0
[  145.150765][    T0] [c00000000985ff90] [c000000000092500] start_secondary+0x4e0/0x510
[  145.150772][    T0] [c00000000985ffe0] [c00000000000e258] start_secondary_prolog+0x10/0x14
[  145.150788][    T0] Code: 0fe00000 3ce201d7 e947a198 394a0001 f947a198 4bfffd70 60000000 60420000 3d4201d7 e92aa210 39290001 f92aa210 <0fe00000> 3d4201d7 e8010050 e92aa218 
[  145.150828][    T0] irq event stamp: 49758
[  145.150831][    T0] hardirqs last  enabled at (49757): [<c0000000004330c8>] tick_nohz_idle_enter+0x118/0x2b0
[  145.150836][    T0] hardirqs last disabled at (49758): [<c0000000002fc038>] do_idle+0x1f8/0x740
[  145.150839][    T0] softirqs last  enabled at (49724): [<c000000001f66398>] __do_softirq+0x5c8/0x7f4
[  145.150847][    T0] softirqs last disabled at (49703): [<c00000000001bc90>] do_softirq_own_stack+0x50/0x80
[  145.150852][    T0] ---[ end trace 0000000000000000 ]---

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-08-18  7:23 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-24  6:08 [PATCH 0/4] powerpc: mm_cpumask cleanups and lazy tlb mm Nicholas Piggin
2023-05-24  6:08 ` [PATCH 1/4] powerpc: Account mm_cpumask and active_cpus in init_mm Nicholas Piggin
2023-05-24  6:08 ` [PATCH 2/4] powerpc/64s: Use dec_mm_active_cpus helper Nicholas Piggin
2023-05-24  6:08 ` [PATCH 3/4] powerpc: Add mm_cpumask warning when context switching Nicholas Piggin
2023-08-18  7:22   ` Michael Ellerman
2023-05-24  6:08 ` [PATCH 4/4] powerpc/64s/radix: combine final TLB flush and lazy tlb mm shootdown IPIs Nicholas Piggin
2023-07-18  2:54   ` Michael Ellerman
2023-07-21  1:37     ` Nicholas Piggin
2023-08-10  6:02 ` [PATCH 0/4] powerpc: mm_cpumask cleanups and lazy tlb mm Michael Ellerman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.