* [PATCH v2 0/5] shoot lazy tlbs
@ 2020-12-14 6:53 ` Nicholas Piggin
0 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2020-12-14 6:53 UTC (permalink / raw)
To: linux-kernel
Cc: Nicholas Piggin, linux-arch, linuxppc-dev, linux-mm,
Anton Blanchard, Andy Lutomirski
This is another rebase, on top of mainline now (don't need the
asm-generic tree), and without any x86 or membarrier changes.
This makes the series far smaller and more manageable and
without the controversial bits.
Thanks,
Nick
Nicholas Piggin (5):
lazy tlb: introduce lazy mm refcount helper functions
lazy tlb: allow lazy tlb mm switching to be configurable
lazy tlb: shoot lazies, a non-refcounting lazy tlb option
powerpc: use lazy mm refcount helper functions
powerpc/64s: enable MMU_LAZY_TLB_SHOOTDOWN
arch/Kconfig | 30 ++++++++++
arch/arm/mach-rpc/ecard.c | 2 +-
arch/powerpc/Kconfig | 1 +
arch/powerpc/kernel/smp.c | 2 +-
arch/powerpc/mm/book3s64/radix_tlb.c | 4 +-
fs/exec.c | 4 +-
include/linux/sched/mm.h | 20 +++++++
kernel/cpu.c | 2 +-
kernel/exit.c | 2 +-
kernel/fork.c | 52 ++++++++++++++++
kernel/kthread.c | 11 ++--
kernel/sched/core.c | 88 ++++++++++++++++++++--------
kernel/sched/sched.h | 4 +-
13 files changed, 184 insertions(+), 38 deletions(-)
--
2.23.0
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v2 0/5] shoot lazy tlbs
@ 2020-12-14 6:53 ` Nicholas Piggin
0 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2020-12-14 6:53 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Nicholas Piggin, linux-mm, Andy Lutomirski, linuxppc-dev
This is another rebase, on top of mainline now (don't need the
asm-generic tree), and without any x86 or membarrier changes.
This makes the series far smaller and more manageable and
without the controversial bits.
Thanks,
Nick
Nicholas Piggin (5):
lazy tlb: introduce lazy mm refcount helper functions
lazy tlb: allow lazy tlb mm switching to be configurable
lazy tlb: shoot lazies, a non-refcounting lazy tlb option
powerpc: use lazy mm refcount helper functions
powerpc/64s: enable MMU_LAZY_TLB_SHOOTDOWN
arch/Kconfig | 30 ++++++++++
arch/arm/mach-rpc/ecard.c | 2 +-
arch/powerpc/Kconfig | 1 +
arch/powerpc/kernel/smp.c | 2 +-
arch/powerpc/mm/book3s64/radix_tlb.c | 4 +-
fs/exec.c | 4 +-
include/linux/sched/mm.h | 20 +++++++
kernel/cpu.c | 2 +-
kernel/exit.c | 2 +-
kernel/fork.c | 52 ++++++++++++++++
kernel/kthread.c | 11 ++--
kernel/sched/core.c | 88 ++++++++++++++++++++--------
kernel/sched/sched.h | 4 +-
13 files changed, 184 insertions(+), 38 deletions(-)
--
2.23.0
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v2 1/5] lazy tlb: introduce lazy mm refcount helper functions
2020-12-14 6:53 ` Nicholas Piggin
@ 2020-12-14 6:53 ` Nicholas Piggin
-1 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2020-12-14 6:53 UTC (permalink / raw)
To: linux-kernel
Cc: Nicholas Piggin, linux-arch, linuxppc-dev, linux-mm,
Anton Blanchard, Andy Lutomirski
Add explicit _lazy_tlb annotated functions for lazy mm refcounting.
This makes things a bit more explicit, and allows explicit refcounting
to be removed if it is not used.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/arm/mach-rpc/ecard.c | 2 +-
arch/powerpc/mm/book3s64/radix_tlb.c | 4 ++--
fs/exec.c | 4 ++--
include/linux/sched/mm.h | 11 +++++++++++
kernel/cpu.c | 2 +-
kernel/exit.c | 2 +-
kernel/kthread.c | 11 +++++++----
kernel/sched/core.c | 15 ++++++++-------
8 files changed, 33 insertions(+), 18 deletions(-)
diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c
index 827b50f1c73e..1b4a41aad793 100644
--- a/arch/arm/mach-rpc/ecard.c
+++ b/arch/arm/mach-rpc/ecard.c
@@ -253,7 +253,7 @@ static int ecard_init_mm(void)
current->mm = mm;
current->active_mm = mm;
activate_mm(active_mm, mm);
- mmdrop(active_mm);
+ mmdrop_lazy_tlb(active_mm);
ecard_init_pgtables(mm);
return 0;
}
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index b487b489d4b6..74708aef333e 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -658,10 +658,10 @@ static void do_exit_flush_lazy_tlb(void *arg)
if (current->active_mm == mm) {
WARN_ON_ONCE(current->mm != NULL);
/* Is a kernel thread and is using mm as the lazy tlb */
- mmgrab(&init_mm);
+ mmgrab_lazy_tlb(&init_mm);
current->active_mm = &init_mm;
switch_mm_irqs_off(mm, &init_mm, current);
- mmdrop(mm);
+ mmdrop_lazy_tlb(mm);
}
atomic_dec(&mm->context.active_cpus);
diff --git a/fs/exec.c b/fs/exec.c
index 547a2390baf5..56fc23dcbe4d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1028,9 +1028,9 @@ static int exec_mmap(struct mm_struct *mm)
setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
mm_update_next_owner(old_mm);
mmput(old_mm);
- return 0;
+ } else {
+ mmdrop_lazy_tlb(active_mm);
}
- mmdrop(active_mm);
return 0;
}
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index d5ece7a9a403..94a117160083 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,17 @@ static inline void mmdrop(struct mm_struct *mm)
__mmdrop(mm);
}
+/* Helpers for lazy TLB mm refcounting */
+static inline void mmgrab_lazy_tlb(struct mm_struct *mm)
+{
+ mmgrab(mm);
+}
+
+static inline void mmdrop_lazy_tlb(struct mm_struct *mm)
+{
+ mmdrop(mm);
+}
+
/**
* mmget() - Pin the address space associated with a &struct mm_struct.
* @mm: The address space to pin.
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2b8d7a5db383..a54cdfa08d71 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -576,7 +576,7 @@ static int finish_cpu(unsigned int cpu)
*/
if (mm != &init_mm)
idle->active_mm = &init_mm;
- mmdrop(mm);
+ mmdrop_lazy_tlb(mm);
return 0;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index 1f236ed375f8..3711a74fcf4a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -474,7 +474,7 @@ static void exit_mm(void)
__set_current_state(TASK_RUNNING);
mmap_read_lock(mm);
}
- mmgrab(mm);
+ mmgrab_lazy_tlb(mm);
BUG_ON(mm != current->active_mm);
/* more a memory barrier than a real lock */
task_lock(current);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 933a625621b8..da189e0d26ed 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1240,14 +1240,14 @@ void kthread_use_mm(struct mm_struct *mm)
WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD));
WARN_ON_ONCE(tsk->mm);
+ mmgrab(mm);
+
task_lock(tsk);
/* Hold off tlb flush IPIs while switching mm's */
local_irq_disable();
active_mm = tsk->active_mm;
- if (active_mm != mm) {
- mmgrab(mm);
+ if (active_mm != mm)
tsk->active_mm = mm;
- }
tsk->mm = mm;
switch_mm_irqs_off(active_mm, mm, tsk);
local_irq_enable();
@@ -1257,7 +1257,7 @@ void kthread_use_mm(struct mm_struct *mm)
#endif
if (active_mm != mm)
- mmdrop(active_mm);
+ mmdrop_lazy_tlb(active_mm);
to_kthread(tsk)->oldfs = force_uaccess_begin();
}
@@ -1280,10 +1280,13 @@ void kthread_unuse_mm(struct mm_struct *mm)
sync_mm_rss(mm);
local_irq_disable();
tsk->mm = NULL;
+ mmgrab_lazy_tlb(mm);
/* active_mm is still 'mm' */
enter_lazy_tlb(mm, tsk);
local_irq_enable();
task_unlock(tsk);
+
+ mmdrop(mm);
}
EXPORT_SYMBOL_GPL(kthread_unuse_mm);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e7e453492cff..c2f8ea43d29b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3629,13 +3629,14 @@ static struct rq *finish_task_switch(struct task_struct *prev)
* rq->curr, before returning to userspace, so provide them here:
*
* - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
- * provided by mmdrop(),
+ * provided by mmdrop_lazy_tlb(),
* - a sync_core for SYNC_CORE.
*/
if (mm) {
membarrier_mm_sync_core_before_usermode(mm);
- mmdrop(mm);
+ mmdrop_lazy_tlb(mm);
}
+
if (unlikely(prev_state == TASK_DEAD)) {
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
@@ -3739,9 +3740,9 @@ context_switch(struct rq *rq, struct task_struct *prev,
/*
* kernel -> kernel lazy + transfer active
- * user -> kernel lazy + mmgrab() active
+ * user -> kernel lazy + mmgrab_lazy_tlb() active
*
- * kernel -> user switch + mmdrop() active
+ * kernel -> user switch + mmdrop_lazy_tlb() active
* user -> user switch
*/
if (!next->mm) { // to kernel
@@ -3749,7 +3750,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
next->active_mm = prev->active_mm;
if (prev->mm) // from user
- mmgrab(prev->active_mm);
+ mmgrab_lazy_tlb(prev->active_mm);
else
prev->active_mm = NULL;
} else { // to user
@@ -3765,7 +3766,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
switch_mm_irqs_off(prev->active_mm, next->mm, next);
if (!prev->mm) { // from kernel
- /* will mmdrop() in finish_task_switch(). */
+ /* will mmdrop_lazy_tlb() in finish_task_switch(). */
rq->prev_mm = prev->active_mm;
prev->active_mm = NULL;
}
@@ -7206,7 +7207,7 @@ void __init sched_init(void)
/*
* The boot idle thread does lazy MMU switching as well:
*/
- mmgrab(&init_mm);
+ mmgrab_lazy_tlb(&init_mm);
enter_lazy_tlb(&init_mm, current);
/*
--
2.23.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 1/5] lazy tlb: introduce lazy mm refcount helper functions
@ 2020-12-14 6:53 ` Nicholas Piggin
0 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2020-12-14 6:53 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Nicholas Piggin, linux-mm, Andy Lutomirski, linuxppc-dev
Add explicit _lazy_tlb annotated functions for lazy mm refcounting.
This makes things a bit more explicit, and allows explicit refcounting
to be removed if it is not used.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/arm/mach-rpc/ecard.c | 2 +-
arch/powerpc/mm/book3s64/radix_tlb.c | 4 ++--
fs/exec.c | 4 ++--
include/linux/sched/mm.h | 11 +++++++++++
kernel/cpu.c | 2 +-
kernel/exit.c | 2 +-
kernel/kthread.c | 11 +++++++----
kernel/sched/core.c | 15 ++++++++-------
8 files changed, 33 insertions(+), 18 deletions(-)
diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c
index 827b50f1c73e..1b4a41aad793 100644
--- a/arch/arm/mach-rpc/ecard.c
+++ b/arch/arm/mach-rpc/ecard.c
@@ -253,7 +253,7 @@ static int ecard_init_mm(void)
current->mm = mm;
current->active_mm = mm;
activate_mm(active_mm, mm);
- mmdrop(active_mm);
+ mmdrop_lazy_tlb(active_mm);
ecard_init_pgtables(mm);
return 0;
}
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index b487b489d4b6..74708aef333e 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -658,10 +658,10 @@ static void do_exit_flush_lazy_tlb(void *arg)
if (current->active_mm == mm) {
WARN_ON_ONCE(current->mm != NULL);
/* Is a kernel thread and is using mm as the lazy tlb */
- mmgrab(&init_mm);
+ mmgrab_lazy_tlb(&init_mm);
current->active_mm = &init_mm;
switch_mm_irqs_off(mm, &init_mm, current);
- mmdrop(mm);
+ mmdrop_lazy_tlb(mm);
}
atomic_dec(&mm->context.active_cpus);
diff --git a/fs/exec.c b/fs/exec.c
index 547a2390baf5..56fc23dcbe4d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1028,9 +1028,9 @@ static int exec_mmap(struct mm_struct *mm)
setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
mm_update_next_owner(old_mm);
mmput(old_mm);
- return 0;
+ } else {
+ mmdrop_lazy_tlb(active_mm);
}
- mmdrop(active_mm);
return 0;
}
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index d5ece7a9a403..94a117160083 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,17 @@ static inline void mmdrop(struct mm_struct *mm)
__mmdrop(mm);
}
+/* Helpers for lazy TLB mm refcounting */
+static inline void mmgrab_lazy_tlb(struct mm_struct *mm)
+{
+ mmgrab(mm);
+}
+
+static inline void mmdrop_lazy_tlb(struct mm_struct *mm)
+{
+ mmdrop(mm);
+}
+
/**
* mmget() - Pin the address space associated with a &struct mm_struct.
* @mm: The address space to pin.
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2b8d7a5db383..a54cdfa08d71 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -576,7 +576,7 @@ static int finish_cpu(unsigned int cpu)
*/
if (mm != &init_mm)
idle->active_mm = &init_mm;
- mmdrop(mm);
+ mmdrop_lazy_tlb(mm);
return 0;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index 1f236ed375f8..3711a74fcf4a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -474,7 +474,7 @@ static void exit_mm(void)
__set_current_state(TASK_RUNNING);
mmap_read_lock(mm);
}
- mmgrab(mm);
+ mmgrab_lazy_tlb(mm);
BUG_ON(mm != current->active_mm);
/* more a memory barrier than a real lock */
task_lock(current);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 933a625621b8..da189e0d26ed 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1240,14 +1240,14 @@ void kthread_use_mm(struct mm_struct *mm)
WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD));
WARN_ON_ONCE(tsk->mm);
+ mmgrab(mm);
+
task_lock(tsk);
/* Hold off tlb flush IPIs while switching mm's */
local_irq_disable();
active_mm = tsk->active_mm;
- if (active_mm != mm) {
- mmgrab(mm);
+ if (active_mm != mm)
tsk->active_mm = mm;
- }
tsk->mm = mm;
switch_mm_irqs_off(active_mm, mm, tsk);
local_irq_enable();
@@ -1257,7 +1257,7 @@ void kthread_use_mm(struct mm_struct *mm)
#endif
if (active_mm != mm)
- mmdrop(active_mm);
+ mmdrop_lazy_tlb(active_mm);
to_kthread(tsk)->oldfs = force_uaccess_begin();
}
@@ -1280,10 +1280,13 @@ void kthread_unuse_mm(struct mm_struct *mm)
sync_mm_rss(mm);
local_irq_disable();
tsk->mm = NULL;
+ mmgrab_lazy_tlb(mm);
/* active_mm is still 'mm' */
enter_lazy_tlb(mm, tsk);
local_irq_enable();
task_unlock(tsk);
+
+ mmdrop(mm);
}
EXPORT_SYMBOL_GPL(kthread_unuse_mm);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e7e453492cff..c2f8ea43d29b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3629,13 +3629,14 @@ static struct rq *finish_task_switch(struct task_struct *prev)
* rq->curr, before returning to userspace, so provide them here:
*
* - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
- * provided by mmdrop(),
+ * provided by mmdrop_lazy_tlb(),
* - a sync_core for SYNC_CORE.
*/
if (mm) {
membarrier_mm_sync_core_before_usermode(mm);
- mmdrop(mm);
+ mmdrop_lazy_tlb(mm);
}
+
if (unlikely(prev_state == TASK_DEAD)) {
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
@@ -3739,9 +3740,9 @@ context_switch(struct rq *rq, struct task_struct *prev,
/*
* kernel -> kernel lazy + transfer active
- * user -> kernel lazy + mmgrab() active
+ * user -> kernel lazy + mmgrab_lazy_tlb() active
*
- * kernel -> user switch + mmdrop() active
+ * kernel -> user switch + mmdrop_lazy_tlb() active
* user -> user switch
*/
if (!next->mm) { // to kernel
@@ -3749,7 +3750,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
next->active_mm = prev->active_mm;
if (prev->mm) // from user
- mmgrab(prev->active_mm);
+ mmgrab_lazy_tlb(prev->active_mm);
else
prev->active_mm = NULL;
} else { // to user
@@ -3765,7 +3766,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
switch_mm_irqs_off(prev->active_mm, next->mm, next);
if (!prev->mm) { // from kernel
- /* will mmdrop() in finish_task_switch(). */
+ /* will mmdrop_lazy_tlb() in finish_task_switch(). */
rq->prev_mm = prev->active_mm;
prev->active_mm = NULL;
}
@@ -7206,7 +7207,7 @@ void __init sched_init(void)
/*
* The boot idle thread does lazy MMU switching as well:
*/
- mmgrab(&init_mm);
+ mmgrab_lazy_tlb(&init_mm);
enter_lazy_tlb(&init_mm, current);
/*
--
2.23.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 2/5] lazy tlb: allow lazy tlb mm switching to be configurable
2020-12-14 6:53 ` Nicholas Piggin
@ 2020-12-14 6:53 ` Nicholas Piggin
-1 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2020-12-14 6:53 UTC (permalink / raw)
To: linux-kernel
Cc: Nicholas Piggin, linux-arch, linuxppc-dev, linux-mm,
Anton Blanchard, Andy Lutomirski
Add CONFIG_MMU_LAZY_TLB which can be configured out to disable
the lazy tlb mechanism entirely, and switches to init_mm when
switching to a kernel thread.
NOMMU systems could easily go without this and save a bit of code
and the refcount atomics, because their mm switch is a no-op. They
have not been switched over by default because the arch code needs
to be audited and tested for lazy tlb mm refcounting and converted
to _lazy_tlb refcounting if necessary.
CONFIG_MMU_LAZY_TLB_REFCOUNT is also added, but it must always
be enabled if CONFIG_MMU_LAZY_TLB is enabled until the next patch
which provides an alternate scheme.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/Kconfig | 17 +++++++++
include/linux/sched/mm.h | 13 +++++--
kernel/sched/core.c | 75 ++++++++++++++++++++++++++++++----------
kernel/sched/sched.h | 4 ++-
4 files changed, 87 insertions(+), 22 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index ba4e966484ab..84faaba66364 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -430,6 +430,23 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
irqs disabled over activate_mm. Architectures that do IPI based TLB
shootdowns should enable this.
+# Should make this depend on MMU, because there is little use for lazy mm switching
+# with NOMMU. Must audit NOMMU architecture code for lazy mm refcounting first.
+config MMU_LAZY_TLB
+ def_bool y
+ help
+ Enable "lazy TLB" mmu context switching for kernel threads.
+ If this is disabled then switching to a kernel thread always
+ switches to init_mm. If mm switches are inexpensive or free
+ (in the case of NOMMU) then this could be disabled.
+
+config MMU_LAZY_TLB_REFCOUNT
+ def_bool y
+ depends on MMU_LAZY_TLB
+ help
+ This must be enabled if MMU_LAZY_TLB is enabled until the next
+ patch.
+
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 94a117160083..5edf8e942c84 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -52,12 +52,21 @@ static inline void mmdrop(struct mm_struct *mm)
/* Helpers for lazy TLB mm refcounting */
static inline void mmgrab_lazy_tlb(struct mm_struct *mm)
{
- mmgrab(mm);
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
+ mmgrab(mm);
}
static inline void mmdrop_lazy_tlb(struct mm_struct *mm)
{
- mmdrop(mm);
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) {
+ mmdrop(mm);
+ } else {
+ /*
+ * mmdrop_lazy_tlb must provide a full memory barrier, see the
+ * membarrier comment finish_task_switch which relies on this.
+ */
+ smp_mb();
+ }
}
/**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c2f8ea43d29b..9c1dc9406e4b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3579,7 +3579,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
__releases(rq->lock)
{
struct rq *rq = this_rq();
- struct mm_struct *mm = rq->prev_mm;
+ struct mm_struct *mm = NULL;
long prev_state;
/*
@@ -3598,7 +3598,10 @@ static struct rq *finish_task_switch(struct task_struct *prev)
current->comm, current->pid, preempt_count()))
preempt_count_set(FORK_PREEMPT_COUNT);
- rq->prev_mm = NULL;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ mm = rq->prev_lazy_mm;
+ rq->prev_lazy_mm = NULL;
+#endif
/*
* A task struct has one reference for the use as "current".
@@ -3722,22 +3725,10 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
calculate_sigpending();
}
-/*
- * context_switch - switch to the new MM and the new thread's register state.
- */
-static __always_inline struct rq *
-context_switch(struct rq *rq, struct task_struct *prev,
- struct task_struct *next, struct rq_flags *rf)
+static __always_inline void
+context_switch_mm(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next)
{
- prepare_task_switch(rq, prev, next);
-
- /*
- * For paravirt, this is coupled with an exit in switch_to to
- * combine the page table reload and the switch backend into
- * one hypercall.
- */
- arch_start_context_switch(prev);
-
/*
* kernel -> kernel lazy + transfer active
* user -> kernel lazy + mmgrab_lazy_tlb() active
@@ -3766,11 +3757,57 @@ context_switch(struct rq *rq, struct task_struct *prev,
switch_mm_irqs_off(prev->active_mm, next->mm, next);
if (!prev->mm) { // from kernel
- /* will mmdrop_lazy_tlb() in finish_task_switch(). */
- rq->prev_mm = prev->active_mm;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ /* Will mmdrop_lazy_tlb() in finish_task_switch(). */
+ rq->prev_lazy_mm = prev->active_mm;
prev->active_mm = NULL;
+#else
+ /*
+ * Without MMU_LAZY_REFCOUNT there is no lazy
+ * tracking (because no rq->prev_lazy_mm) in
+ * finish_task_switch, so no mmdrop_lazy_tlb(),
+ * so no memory barrier for membarrier (see the
+ * membarrier comment in finish_task_switch()).
+ * Do it here.
+ */
+ smp_mb();
+#endif
}
}
+}
+
+static __always_inline void
+context_switch_mm_nolazy(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next)
+{
+ if (!next->mm)
+ next->active_mm = &init_mm;
+ membarrier_switch_mm(rq, prev->active_mm, next->active_mm);
+ switch_mm_irqs_off(prev->active_mm, next->active_mm, next);
+ if (!prev->mm)
+ prev->active_mm = NULL;
+}
+
+/*
+ * context_switch - switch to the new MM and the new thread's register state.
+ */
+static __always_inline struct rq *
+context_switch(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next, struct rq_flags *rf)
+{
+ prepare_task_switch(rq, prev, next);
+
+ /*
+ * For paravirt, this is coupled with an exit in switch_to to
+ * combine the page table reload and the switch backend into
+ * one hypercall.
+ */
+ arch_start_context_switch(prev);
+
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB))
+ context_switch_mm(rq, prev, next);
+ else
+ context_switch_mm_nolazy(rq, prev, next);
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index df80bfcea92e..3b72aec5a2f2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -950,7 +950,9 @@ struct rq {
struct task_struct *idle;
struct task_struct *stop;
unsigned long next_balance;
- struct mm_struct *prev_mm;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ struct mm_struct *prev_lazy_mm;
+#endif
unsigned int clock_update_flags;
u64 clock;
--
2.23.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 2/5] lazy tlb: allow lazy tlb mm switching to be configurable
@ 2020-12-14 6:53 ` Nicholas Piggin
0 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2020-12-14 6:53 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Nicholas Piggin, linux-mm, Andy Lutomirski, linuxppc-dev
Add CONFIG_MMU_LAZY_TLB which can be configured out to disable
the lazy tlb mechanism entirely, and switches to init_mm when
switching to a kernel thread.
NOMMU systems could easily go without this and save a bit of code
and the refcount atomics, because their mm switch is a no-op. They
have not been switched over by default because the arch code needs
to be audited and tested for lazy tlb mm refcounting and converted
to _lazy_tlb refcounting if necessary.
CONFIG_MMU_LAZY_TLB_REFCOUNT is also added, but it must always
be enabled if CONFIG_MMU_LAZY_TLB is enabled until the next patch
which provides an alternate scheme.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/Kconfig | 17 +++++++++
include/linux/sched/mm.h | 13 +++++--
kernel/sched/core.c | 75 ++++++++++++++++++++++++++++++----------
kernel/sched/sched.h | 4 ++-
4 files changed, 87 insertions(+), 22 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index ba4e966484ab..84faaba66364 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -430,6 +430,23 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
irqs disabled over activate_mm. Architectures that do IPI based TLB
shootdowns should enable this.
+# Should make this depend on MMU, because there is little use for lazy mm switching
+# with NOMMU. Must audit NOMMU architecture code for lazy mm refcounting first.
+config MMU_LAZY_TLB
+ def_bool y
+ help
+ Enable "lazy TLB" mmu context switching for kernel threads.
+ If this is disabled then switching to a kernel thread always
+ switches to init_mm. If mm switches are inexpensive or free
+ (in the case of NOMMU) then this could be disabled.
+
+config MMU_LAZY_TLB_REFCOUNT
+ def_bool y
+ depends on MMU_LAZY_TLB
+ help
+ This must be enabled if MMU_LAZY_TLB is enabled until the next
+ patch.
+
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 94a117160083..5edf8e942c84 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -52,12 +52,21 @@ static inline void mmdrop(struct mm_struct *mm)
/* Helpers for lazy TLB mm refcounting */
static inline void mmgrab_lazy_tlb(struct mm_struct *mm)
{
- mmgrab(mm);
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
+ mmgrab(mm);
}
static inline void mmdrop_lazy_tlb(struct mm_struct *mm)
{
- mmdrop(mm);
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) {
+ mmdrop(mm);
+ } else {
+ /*
+ * mmdrop_lazy_tlb must provide a full memory barrier, see the
+ * membarrier comment finish_task_switch which relies on this.
+ */
+ smp_mb();
+ }
}
/**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c2f8ea43d29b..9c1dc9406e4b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3579,7 +3579,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
__releases(rq->lock)
{
struct rq *rq = this_rq();
- struct mm_struct *mm = rq->prev_mm;
+ struct mm_struct *mm = NULL;
long prev_state;
/*
@@ -3598,7 +3598,10 @@ static struct rq *finish_task_switch(struct task_struct *prev)
current->comm, current->pid, preempt_count()))
preempt_count_set(FORK_PREEMPT_COUNT);
- rq->prev_mm = NULL;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ mm = rq->prev_lazy_mm;
+ rq->prev_lazy_mm = NULL;
+#endif
/*
* A task struct has one reference for the use as "current".
@@ -3722,22 +3725,10 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
calculate_sigpending();
}
-/*
- * context_switch - switch to the new MM and the new thread's register state.
- */
-static __always_inline struct rq *
-context_switch(struct rq *rq, struct task_struct *prev,
- struct task_struct *next, struct rq_flags *rf)
+static __always_inline void
+context_switch_mm(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next)
{
- prepare_task_switch(rq, prev, next);
-
- /*
- * For paravirt, this is coupled with an exit in switch_to to
- * combine the page table reload and the switch backend into
- * one hypercall.
- */
- arch_start_context_switch(prev);
-
/*
* kernel -> kernel lazy + transfer active
* user -> kernel lazy + mmgrab_lazy_tlb() active
@@ -3766,11 +3757,57 @@ context_switch(struct rq *rq, struct task_struct *prev,
switch_mm_irqs_off(prev->active_mm, next->mm, next);
if (!prev->mm) { // from kernel
- /* will mmdrop_lazy_tlb() in finish_task_switch(). */
- rq->prev_mm = prev->active_mm;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ /* Will mmdrop_lazy_tlb() in finish_task_switch(). */
+ rq->prev_lazy_mm = prev->active_mm;
prev->active_mm = NULL;
+#else
+ /*
+ * Without MMU_LAZY_REFCOUNT there is no lazy
+ * tracking (because no rq->prev_lazy_mm) in
+ * finish_task_switch, so no mmdrop_lazy_tlb(),
+ * so no memory barrier for membarrier (see the
+ * membarrier comment in finish_task_switch()).
+ * Do it here.
+ */
+ smp_mb();
+#endif
}
}
+}
+
+static __always_inline void
+context_switch_mm_nolazy(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next)
+{
+ if (!next->mm)
+ next->active_mm = &init_mm;
+ membarrier_switch_mm(rq, prev->active_mm, next->active_mm);
+ switch_mm_irqs_off(prev->active_mm, next->active_mm, next);
+ if (!prev->mm)
+ prev->active_mm = NULL;
+}
+
+/*
+ * context_switch - switch to the new MM and the new thread's register state.
+ */
+static __always_inline struct rq *
+context_switch(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next, struct rq_flags *rf)
+{
+ prepare_task_switch(rq, prev, next);
+
+ /*
+ * For paravirt, this is coupled with an exit in switch_to to
+ * combine the page table reload and the switch backend into
+ * one hypercall.
+ */
+ arch_start_context_switch(prev);
+
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB))
+ context_switch_mm(rq, prev, next);
+ else
+ context_switch_mm_nolazy(rq, prev, next);
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index df80bfcea92e..3b72aec5a2f2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -950,7 +950,9 @@ struct rq {
struct task_struct *idle;
struct task_struct *stop;
unsigned long next_balance;
- struct mm_struct *prev_mm;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ struct mm_struct *prev_lazy_mm;
+#endif
unsigned int clock_update_flags;
u64 clock;
--
2.23.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 3/5] lazy tlb: shoot lazies, a non-refcounting lazy tlb option
2020-12-14 6:53 ` Nicholas Piggin
@ 2020-12-14 6:53 ` Nicholas Piggin
-1 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2020-12-14 6:53 UTC (permalink / raw)
To: linux-kernel
Cc: Nicholas Piggin, linux-arch, linuxppc-dev, linux-mm,
Anton Blanchard, Andy Lutomirski
On big systems, the mm refcount can become highly contented when doing
a lot of context switching with threaded applications (particularly
switching between the idle thread and an application thread).
Abandoning lazy tlb slows switching down quite a bit in the important
user->idle->user cases, so instead implement a non-refcounted scheme
that causes __mmdrop() to IPI all CPUs in the mm_cpumask and shoot down
any remaining lazy ones.
Shootdown IPIs are some concern, but they have not been observed to be
a big problem with this scheme (the powerpc implementation generated
314 additional interrupts on a 144 CPU system during a kernel compile).
There are a number of strategies that could be employed to reduce IPIs
if they turn out to be a problem for some workload.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/Kconfig | 17 +++++++++++++++--
kernel/fork.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 67 insertions(+), 2 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 84faaba66364..e69c974369cc 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -443,9 +443,22 @@ config MMU_LAZY_TLB
config MMU_LAZY_TLB_REFCOUNT
def_bool y
depends on MMU_LAZY_TLB
+ depends on !MMU_LAZY_TLB_SHOOTDOWN
help
- This must be enabled if MMU_LAZY_TLB is enabled until the next
- patch.
+ This refcounts the mm that is used as the lazy TLB mm when switching
+ switching to a kernel thread.
+
+config MMU_LAZY_TLB_SHOOTDOWN
+ bool
+ depends on MMU_LAZY_TLB
+ help
+ Instead of refcounting the "lazy tlb" mm struct, which can cause
+ contention with multi-threaded apps on large multiprocessor systems,
+ this option causes __mmdrop to IPI all CPUs in the mm_cpumask and
+ switch to init_mm if they were using the to-be-freed mm as the lazy
+ tlb. To implement this, architectures must use _lazy_tlb variants of
+ mm refcounting, and mm_cpumask must include at least all possible
+ CPUs in which mm might be lazy.
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
diff --git a/kernel/fork.c b/kernel/fork.c
index 6d266388d380..74b972d2d8a9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -669,6 +669,53 @@ static void check_mm(struct mm_struct *mm)
#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
+static void do_shoot_lazy_tlb(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ if (current->active_mm == mm) {
+ WARN_ON_ONCE(current->mm);
+ current->active_mm = &init_mm;
+ switch_mm(mm, &init_mm, current);
+ }
+}
+
+static void do_check_lazy_tlb(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ WARN_ON_ONCE(current->active_mm == mm);
+}
+
+static void shoot_lazy_tlbs(struct mm_struct *mm)
+{
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
+ /*
+ * IPI overheads have not found to be expensive, but they could
+ * be reduced in a number of possible ways, for example (in
+ * roughly increasing order of complexity):
+ * - A batch of mms requiring IPIs could be gathered and freed
+ * at once.
+ * - CPUs could store their active mm somewhere that can be
+ * remotely checked without a lock, to filter out
+ * false-positives in the cpumask.
+ * - After mm_users or mm_count reaches zero, switching away
+ * from the mm could clear mm_cpumask to reduce some IPIs
+ * (some batching or delaying would help).
+ * - A delayed freeing and RCU-like quiescing sequence based on
+ * mm switching to avoid IPIs completely.
+ */
+ on_each_cpu_mask(mm_cpumask(mm), do_shoot_lazy_tlb, (void *)mm, 1);
+ if (IS_ENABLED(CONFIG_DEBUG_VM))
+ on_each_cpu(do_check_lazy_tlb, (void *)mm, 1);
+ } else {
+ /*
+ * In this case, lazy tlb mms are refounted and would not reach
+ * __mmdrop until all CPUs have switched away and mmdrop()ed.
+ */
+ }
+}
+
/*
* Called when the last reference to the mm
* is dropped: either by a lazy thread or by
@@ -678,7 +725,12 @@ void __mmdrop(struct mm_struct *mm)
{
BUG_ON(mm == &init_mm);
WARN_ON_ONCE(mm == current->mm);
+
+ /* Ensure no CPUs are using this as their lazy tlb mm */
+ shoot_lazy_tlbs(mm);
+
WARN_ON_ONCE(mm == current->active_mm);
+
mm_free_pgd(mm);
destroy_context(mm);
mmu_notifier_subscriptions_destroy(mm);
--
2.23.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 3/5] lazy tlb: shoot lazies, a non-refcounting lazy tlb option
@ 2020-12-14 6:53 ` Nicholas Piggin
0 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2020-12-14 6:53 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Nicholas Piggin, linux-mm, Andy Lutomirski, linuxppc-dev
On big systems, the mm refcount can become highly contented when doing
a lot of context switching with threaded applications (particularly
switching between the idle thread and an application thread).
Abandoning lazy tlb slows switching down quite a bit in the important
user->idle->user cases, so instead implement a non-refcounted scheme
that causes __mmdrop() to IPI all CPUs in the mm_cpumask and shoot down
any remaining lazy ones.
Shootdown IPIs are some concern, but they have not been observed to be
a big problem with this scheme (the powerpc implementation generated
314 additional interrupts on a 144 CPU system during a kernel compile).
There are a number of strategies that could be employed to reduce IPIs
if they turn out to be a problem for some workload.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/Kconfig | 17 +++++++++++++++--
kernel/fork.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 67 insertions(+), 2 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 84faaba66364..e69c974369cc 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -443,9 +443,22 @@ config MMU_LAZY_TLB
config MMU_LAZY_TLB_REFCOUNT
def_bool y
depends on MMU_LAZY_TLB
+ depends on !MMU_LAZY_TLB_SHOOTDOWN
help
- This must be enabled if MMU_LAZY_TLB is enabled until the next
- patch.
+ This refcounts the mm that is used as the lazy TLB mm when switching
+ switching to a kernel thread.
+
+config MMU_LAZY_TLB_SHOOTDOWN
+ bool
+ depends on MMU_LAZY_TLB
+ help
+ Instead of refcounting the "lazy tlb" mm struct, which can cause
+ contention with multi-threaded apps on large multiprocessor systems,
+ this option causes __mmdrop to IPI all CPUs in the mm_cpumask and
+ switch to init_mm if they were using the to-be-freed mm as the lazy
+ tlb. To implement this, architectures must use _lazy_tlb variants of
+ mm refcounting, and mm_cpumask must include at least all possible
+ CPUs in which mm might be lazy.
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
diff --git a/kernel/fork.c b/kernel/fork.c
index 6d266388d380..74b972d2d8a9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -669,6 +669,53 @@ static void check_mm(struct mm_struct *mm)
#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
+static void do_shoot_lazy_tlb(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ if (current->active_mm == mm) {
+ WARN_ON_ONCE(current->mm);
+ current->active_mm = &init_mm;
+ switch_mm(mm, &init_mm, current);
+ }
+}
+
+static void do_check_lazy_tlb(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ WARN_ON_ONCE(current->active_mm == mm);
+}
+
+static void shoot_lazy_tlbs(struct mm_struct *mm)
+{
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
+ /*
+ * IPI overheads have not found to be expensive, but they could
+ * be reduced in a number of possible ways, for example (in
+ * roughly increasing order of complexity):
+ * - A batch of mms requiring IPIs could be gathered and freed
+ * at once.
+ * - CPUs could store their active mm somewhere that can be
+ * remotely checked without a lock, to filter out
+ * false-positives in the cpumask.
+ * - After mm_users or mm_count reaches zero, switching away
+ * from the mm could clear mm_cpumask to reduce some IPIs
+ * (some batching or delaying would help).
+ * - A delayed freeing and RCU-like quiescing sequence based on
+ * mm switching to avoid IPIs completely.
+ */
+ on_each_cpu_mask(mm_cpumask(mm), do_shoot_lazy_tlb, (void *)mm, 1);
+ if (IS_ENABLED(CONFIG_DEBUG_VM))
+ on_each_cpu(do_check_lazy_tlb, (void *)mm, 1);
+ } else {
+ /*
+ * In this case, lazy tlb mms are refounted and would not reach
+ * __mmdrop until all CPUs have switched away and mmdrop()ed.
+ */
+ }
+}
+
/*
* Called when the last reference to the mm
* is dropped: either by a lazy thread or by
@@ -678,7 +725,12 @@ void __mmdrop(struct mm_struct *mm)
{
BUG_ON(mm == &init_mm);
WARN_ON_ONCE(mm == current->mm);
+
+ /* Ensure no CPUs are using this as their lazy tlb mm */
+ shoot_lazy_tlbs(mm);
+
WARN_ON_ONCE(mm == current->active_mm);
+
mm_free_pgd(mm);
destroy_context(mm);
mmu_notifier_subscriptions_destroy(mm);
--
2.23.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 4/5] powerpc: use lazy mm refcount helper functions
2020-12-14 6:53 ` Nicholas Piggin
@ 2020-12-14 6:53 ` Nicholas Piggin
-1 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2020-12-14 6:53 UTC (permalink / raw)
To: linux-kernel
Cc: Nicholas Piggin, linux-arch, linuxppc-dev, linux-mm,
Anton Blanchard, Andy Lutomirski
Use _lazy_tlb functions for lazy mm refcounting in powerpc, to prepare
to move to MMU_LAZY_TLB_SHOOTDOWN.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kernel/smp.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8c2857cbd960..93c0eaa6f4bf 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1395,7 +1395,7 @@ void start_secondary(void *unused)
{
unsigned int cpu = raw_smp_processor_id();
- mmgrab(&init_mm);
+ mmgrab_lazy_tlb(&init_mm);
current->active_mm = &init_mm;
smp_store_cpu_info(cpu);
--
2.23.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 4/5] powerpc: use lazy mm refcount helper functions
@ 2020-12-14 6:53 ` Nicholas Piggin
0 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2020-12-14 6:53 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Nicholas Piggin, linux-mm, Andy Lutomirski, linuxppc-dev
Use _lazy_tlb functions for lazy mm refcounting in powerpc, to prepare
to move to MMU_LAZY_TLB_SHOOTDOWN.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kernel/smp.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8c2857cbd960..93c0eaa6f4bf 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1395,7 +1395,7 @@ void start_secondary(void *unused)
{
unsigned int cpu = raw_smp_processor_id();
- mmgrab(&init_mm);
+ mmgrab_lazy_tlb(&init_mm);
current->active_mm = &init_mm;
smp_store_cpu_info(cpu);
--
2.23.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 5/5] powerpc/64s: enable MMU_LAZY_TLB_SHOOTDOWN
2020-12-14 6:53 ` Nicholas Piggin
@ 2020-12-14 6:53 ` Nicholas Piggin
-1 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2020-12-14 6:53 UTC (permalink / raw)
To: linux-kernel
Cc: Nicholas Piggin, linux-arch, linuxppc-dev, linux-mm,
Anton Blanchard, Andy Lutomirski
On a 16-socket 192-core POWER8 system, a context switching benchmark
with as many software threads as CPUs (so each switch will go in and
out of idle), upstream can achieve a rate of about 1 million context
switches per second. After this patch it goes up to 118 million.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5181872f9452..356138bdb5bb 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -232,6 +232,7 @@ config PPC
select HAVE_PERF_USER_STACK_DUMP
select MMU_GATHER_RCU_TABLE_FREE
select MMU_GATHER_PAGE_SIZE
+ select MMU_LAZY_TLB_SHOOTDOWN if PPC_BOOK3S_64
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
select HAVE_SYSCALL_TRACEPOINTS
--
2.23.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 5/5] powerpc/64s: enable MMU_LAZY_TLB_SHOOTDOWN
@ 2020-12-14 6:53 ` Nicholas Piggin
0 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2020-12-14 6:53 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, Nicholas Piggin, linux-mm, Andy Lutomirski, linuxppc-dev
On a 16-socket 192-core POWER8 system, a context switching benchmark
with as many software threads as CPUs (so each switch will go in and
out of idle), upstream can achieve a rate of about 1 million context
switches per second. After this patch it goes up to 118 million.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5181872f9452..356138bdb5bb 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -232,6 +232,7 @@ config PPC
select HAVE_PERF_USER_STACK_DUMP
select MMU_GATHER_RCU_TABLE_FREE
select MMU_GATHER_PAGE_SIZE
+ select MMU_LAZY_TLB_SHOOTDOWN if PPC_BOOK3S_64
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
select HAVE_SYSCALL_TRACEPOINTS
--
2.23.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH v2 3/5] lazy tlb: shoot lazies, a non-refcounting lazy tlb option
2020-12-14 6:53 ` Nicholas Piggin
@ 2020-12-14 7:04 ` Randy Dunlap
-1 siblings, 0 replies; 16+ messages in thread
From: Randy Dunlap @ 2020-12-14 7:04 UTC (permalink / raw)
To: Nicholas Piggin, linux-kernel
Cc: linux-arch, linuxppc-dev, linux-mm, Anton Blanchard, Andy Lutomirski
On 12/13/20 10:53 PM, Nicholas Piggin wrote:
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 84faaba66364..e69c974369cc 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -443,9 +443,22 @@ config MMU_LAZY_TLB
> config MMU_LAZY_TLB_REFCOUNT
> def_bool y
> depends on MMU_LAZY_TLB
> + depends on !MMU_LAZY_TLB_SHOOTDOWN
> help
> - This must be enabled if MMU_LAZY_TLB is enabled until the next
> - patch.
> + This refcounts the mm that is used as the lazy TLB mm when switching
> + switching to a kernel thread.
duplicate "switching".
> +
> +config MMU_LAZY_TLB_SHOOTDOWN
> + bool
> + depends on MMU_LAZY_TLB
> + help
> + Instead of refcounting the "lazy tlb" mm struct, which can cause
> + contention with multi-threaded apps on large multiprocessor systems,
> + this option causes __mmdrop to IPI all CPUs in the mm_cpumask and
> + switch to init_mm if they were using the to-be-freed mm as the lazy
> + tlb. To implement this, architectures must use _lazy_tlb variants of
> + mm refcounting, and mm_cpumask must include at least all possible
> + CPUs in which mm might be lazy.
>
> config ARCH_HAVE_NMI_SAFE_CMPXCHG
> bool
--
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 3/5] lazy tlb: shoot lazies, a non-refcounting lazy tlb option
@ 2020-12-14 7:04 ` Randy Dunlap
0 siblings, 0 replies; 16+ messages in thread
From: Randy Dunlap @ 2020-12-14 7:04 UTC (permalink / raw)
To: Nicholas Piggin, linux-kernel
Cc: linux-arch, linux-mm, linuxppc-dev, Andy Lutomirski
On 12/13/20 10:53 PM, Nicholas Piggin wrote:
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 84faaba66364..e69c974369cc 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -443,9 +443,22 @@ config MMU_LAZY_TLB
> config MMU_LAZY_TLB_REFCOUNT
> def_bool y
> depends on MMU_LAZY_TLB
> + depends on !MMU_LAZY_TLB_SHOOTDOWN
> help
> - This must be enabled if MMU_LAZY_TLB is enabled until the next
> - patch.
> + This refcounts the mm that is used as the lazy TLB mm when switching
> + switching to a kernel thread.
duplicate "switching".
> +
> +config MMU_LAZY_TLB_SHOOTDOWN
> + bool
> + depends on MMU_LAZY_TLB
> + help
> + Instead of refcounting the "lazy tlb" mm struct, which can cause
> + contention with multi-threaded apps on large multiprocessor systems,
> + this option causes __mmdrop to IPI all CPUs in the mm_cpumask and
> + switch to init_mm if they were using the to-be-freed mm as the lazy
> + tlb. To implement this, architectures must use _lazy_tlb variants of
> + mm refcounting, and mm_cpumask must include at least all possible
> + CPUs in which mm might be lazy.
>
> config ARCH_HAVE_NMI_SAFE_CMPXCHG
> bool
--
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 0/5] shoot lazy tlbs
2020-12-14 6:53 ` Nicholas Piggin
@ 2021-02-04 8:21 ` Nicholas Piggin
-1 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2021-02-04 8:21 UTC (permalink / raw)
To: linux-kernel
Cc: Anton Blanchard, linux-arch, linux-mm, linuxppc-dev, Andy Lutomirski
I'll ask Andrew to put this in -mm if no objections.
The series now doesn't touch other archs in non-trivial ways, and core code
is functionally not changed much / at all if the option is not selected so
it's actually pretty simple aside from the powerpc change.
Thanks,
Nick
Excerpts from Nicholas Piggin's message of December 14, 2020 4:53 pm:
> This is another rebase, on top of mainline now (don't need the
> asm-generic tree), and without any x86 or membarrier changes.
> This makes the series far smaller and more manageable and
> without the controversial bits.
>
> Thanks,
> Nick
>
> Nicholas Piggin (5):
> lazy tlb: introduce lazy mm refcount helper functions
> lazy tlb: allow lazy tlb mm switching to be configurable
> lazy tlb: shoot lazies, a non-refcounting lazy tlb option
> powerpc: use lazy mm refcount helper functions
> powerpc/64s: enable MMU_LAZY_TLB_SHOOTDOWN
>
> arch/Kconfig | 30 ++++++++++
> arch/arm/mach-rpc/ecard.c | 2 +-
> arch/powerpc/Kconfig | 1 +
> arch/powerpc/kernel/smp.c | 2 +-
> arch/powerpc/mm/book3s64/radix_tlb.c | 4 +-
> fs/exec.c | 4 +-
> include/linux/sched/mm.h | 20 +++++++
> kernel/cpu.c | 2 +-
> kernel/exit.c | 2 +-
> kernel/fork.c | 52 ++++++++++++++++
> kernel/kthread.c | 11 ++--
> kernel/sched/core.c | 88 ++++++++++++++++++++--------
> kernel/sched/sched.h | 4 +-
> 13 files changed, 184 insertions(+), 38 deletions(-)
>
> --
> 2.23.0
>
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 0/5] shoot lazy tlbs
@ 2021-02-04 8:21 ` Nicholas Piggin
0 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2021-02-04 8:21 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-arch, linux-mm, linuxppc-dev, Andy Lutomirski
I'll ask Andrew to put this in -mm if no objections.
The series now doesn't touch other archs in non-trivial ways, and core code
is functionally not changed much / at all if the option is not selected so
it's actually pretty simple aside from the powerpc change.
Thanks,
Nick
Excerpts from Nicholas Piggin's message of December 14, 2020 4:53 pm:
> This is another rebase, on top of mainline now (don't need the
> asm-generic tree), and without any x86 or membarrier changes.
> This makes the series far smaller and more manageable and
> without the controversial bits.
>
> Thanks,
> Nick
>
> Nicholas Piggin (5):
> lazy tlb: introduce lazy mm refcount helper functions
> lazy tlb: allow lazy tlb mm switching to be configurable
> lazy tlb: shoot lazies, a non-refcounting lazy tlb option
> powerpc: use lazy mm refcount helper functions
> powerpc/64s: enable MMU_LAZY_TLB_SHOOTDOWN
>
> arch/Kconfig | 30 ++++++++++
> arch/arm/mach-rpc/ecard.c | 2 +-
> arch/powerpc/Kconfig | 1 +
> arch/powerpc/kernel/smp.c | 2 +-
> arch/powerpc/mm/book3s64/radix_tlb.c | 4 +-
> fs/exec.c | 4 +-
> include/linux/sched/mm.h | 20 +++++++
> kernel/cpu.c | 2 +-
> kernel/exit.c | 2 +-
> kernel/fork.c | 52 ++++++++++++++++
> kernel/kthread.c | 11 ++--
> kernel/sched/core.c | 88 ++++++++++++++++++++--------
> kernel/sched/sched.h | 4 +-
> 13 files changed, 184 insertions(+), 38 deletions(-)
>
> --
> 2.23.0
>
>
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2021-02-04 8:23 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-14 6:53 [PATCH v2 0/5] shoot lazy tlbs Nicholas Piggin
2020-12-14 6:53 ` Nicholas Piggin
2020-12-14 6:53 ` [PATCH v2 1/5] lazy tlb: introduce lazy mm refcount helper functions Nicholas Piggin
2020-12-14 6:53 ` Nicholas Piggin
2020-12-14 6:53 ` [PATCH v2 2/5] lazy tlb: allow lazy tlb mm switching to be configurable Nicholas Piggin
2020-12-14 6:53 ` Nicholas Piggin
2020-12-14 6:53 ` [PATCH v2 3/5] lazy tlb: shoot lazies, a non-refcounting lazy tlb option Nicholas Piggin
2020-12-14 6:53 ` Nicholas Piggin
2020-12-14 7:04 ` Randy Dunlap
2020-12-14 7:04 ` Randy Dunlap
2020-12-14 6:53 ` [PATCH v2 4/5] powerpc: use lazy mm refcount helper functions Nicholas Piggin
2020-12-14 6:53 ` Nicholas Piggin
2020-12-14 6:53 ` [PATCH v2 5/5] powerpc/64s: enable MMU_LAZY_TLB_SHOOTDOWN Nicholas Piggin
2020-12-14 6:53 ` Nicholas Piggin
2021-02-04 8:21 ` [PATCH v2 0/5] shoot lazy tlbs Nicholas Piggin
2021-02-04 8:21 ` Nicholas Piggin
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.