* [patch 0/2] posix-timers: avoid nohz_full= IPIs via task cpu masks @ 2020-08-25 18:41 Marcelo Tosatti 2020-08-25 18:41 ` [patch 1/2] nohz: try to avoid IPI when configuring per-CPU posix timer Marcelo Tosatti 2020-08-25 18:41 ` [patch 2/2] nohz: try to avoid IPI when setting tick dependency for task Marcelo Tosatti 0 siblings, 2 replies; 7+ messages in thread From: Marcelo Tosatti @ 2020-08-25 18:41 UTC (permalink / raw) To: Frederic Weisbecker; +Cc: linux-kernel This patchset avoids IPIs to nohz_full= CPUs when the intersection between the set of nohz_full CPUs and task allowed cpus is null. See individual patches for details. ^ permalink raw reply [flat|nested] 7+ messages in thread
* [patch 1/2] nohz: try to avoid IPI when configuring per-CPU posix timer 2020-08-25 18:41 [patch 0/2] posix-timers: avoid nohz_full= IPIs via task cpu masks Marcelo Tosatti @ 2020-08-25 18:41 ` Marcelo Tosatti 2020-09-01 23:38 ` Frederic Weisbecker 2020-08-25 18:41 ` [patch 2/2] nohz: try to avoid IPI when setting tick dependency for task Marcelo Tosatti 1 sibling, 1 reply; 7+ messages in thread From: Marcelo Tosatti @ 2020-08-25 18:41 UTC (permalink / raw) To: Frederic Weisbecker; +Cc: linux-kernel, Marcelo Tosatti When enabling per-CPU posix timers, an IPI to nohz_full CPUs might be performed (to re-read the dependencies and possibly not re-enter nohz_full on a given CPU). A common case is for applications that run on nohz_full= CPUs to not use POSIX timers (eg DPDK). This patch skips the IPI in case the task allowed mask does not intersect with nohz_full= CPU mask, when going through tick_nohz_dep_set_signal. This reduces interruptions to nohz_full= CPUs. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> --- include/linux/tick.h | 11 +++++++---- kernel/time/posix-cpu-timers.c | 4 ++-- kernel/time/tick-sched.c | 27 +++++++++++++++++++++++++-- 3 files changed, 34 insertions(+), 8 deletions(-) Index: linux-2.6/include/linux/tick.h =================================================================== --- linux-2.6.orig/include/linux/tick.h +++ linux-2.6/include/linux/tick.h @@ -207,7 +207,8 @@ extern void tick_nohz_dep_set_task(struc enum tick_dep_bits bit); extern void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit); -extern void tick_nohz_dep_set_signal(struct signal_struct *signal, +extern void tick_nohz_dep_set_signal(struct task_struct *tsk, + struct signal_struct *signal, enum tick_dep_bits bit); extern void tick_nohz_dep_clear_signal(struct signal_struct *signal, enum tick_dep_bits bit); @@ -252,11 +253,12 @@ static inline void tick_dep_clear_task(s if (tick_nohz_full_enabled()) tick_nohz_dep_clear_task(tsk, bit); } -static inline void tick_dep_set_signal(struct signal_struct *signal, +static inline void tick_dep_set_signal(struct task_struct *tsk, + struct signal_struct *signal, enum tick_dep_bits bit) { if (tick_nohz_full_enabled()) - tick_nohz_dep_set_signal(signal, bit); + tick_nohz_dep_set_signal(tsk, signal, bit); } static inline void tick_dep_clear_signal(struct signal_struct *signal, enum tick_dep_bits bit) @@ -284,7 +286,8 @@ static inline void tick_dep_set_task(str enum tick_dep_bits bit) { } static inline void tick_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit) { } -static inline void tick_dep_set_signal(struct signal_struct *signal, +static inline void tick_dep_set_signal(struct task_struct *tsk, + struct signal_struct *signal, enum tick_dep_bits bit) { } static inline void tick_dep_clear_signal(struct signal_struct *signal, enum tick_dep_bits bit) { } Index: linux-2.6/kernel/time/posix-cpu-timers.c =================================================================== --- linux-2.6.orig/kernel/time/posix-cpu-timers.c +++ linux-2.6/kernel/time/posix-cpu-timers.c @@ -523,7 +523,7 @@ static void arm_timer(struct k_itimer *t if (CPUCLOCK_PERTHREAD(timer->it_clock)) tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER); else - tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER); + tick_dep_set_signal(p, p->signal, TICK_DEP_BIT_POSIX_TIMER); } /* @@ -1358,7 +1358,7 @@ void set_process_cpu_timer(struct task_s if (*newval < *nextevt) *nextevt = *newval; - tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER); + tick_dep_set_signal(tsk, tsk->signal, TICK_DEP_BIT_POSIX_TIMER); } static int do_cpu_nanosleep(const clockid_t which_clock, int flags, Index: linux-2.6/kernel/time/tick-sched.c =================================================================== --- linux-2.6.orig/kernel/time/tick-sched.c +++ linux-2.6/kernel/time/tick-sched.c @@ -302,6 +302,27 @@ static void tick_nohz_dep_set_all(atomic } /* + * Set bit on nohz full dependency, kicking all cpus + * only if task can run on nohz full CPUs. + */ +static void tick_nohz_dep_set_all_cond(struct task_struct *tsk, + atomic_t *dep, + enum tick_dep_bits bit) +{ + int prev; + unsigned long flags; + + prev = atomic_fetch_or(BIT(bit), dep); + if (prev) + return; + + raw_spin_lock_irqsave(&tsk->pi_lock, flags); + if (cpumask_intersects(&tsk->cpus_mask, tick_nohz_full_mask)) + tick_nohz_full_kick_all(); + raw_spin_unlock_irqrestore(&tsk->pi_lock, flags); +} + +/* * Set a global tick dependency. Used by perf events that rely on freq and * by unstable clock. */ @@ -382,9 +403,11 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_ta * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse * per process timers. */ -void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit) +void tick_nohz_dep_set_signal(struct task_struct *tsk, + struct signal_struct *sig, + enum tick_dep_bits bit) { - tick_nohz_dep_set_all(&sig->tick_dep_mask, bit); + tick_nohz_dep_set_all_cond(tsk, &sig->tick_dep_mask, bit); } void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit) ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 1/2] nohz: try to avoid IPI when configuring per-CPU posix timer 2020-08-25 18:41 ` [patch 1/2] nohz: try to avoid IPI when configuring per-CPU posix timer Marcelo Tosatti @ 2020-09-01 23:38 ` Frederic Weisbecker 2020-09-02 19:42 ` Marcelo Tosatti 0 siblings, 1 reply; 7+ messages in thread From: Frederic Weisbecker @ 2020-09-01 23:38 UTC (permalink / raw) To: Marcelo Tosatti; +Cc: linux-kernel On Tue, Aug 25, 2020 at 03:41:48PM -0300, Marcelo Tosatti wrote: > When enabling per-CPU posix timers, an IPI to nohz_full CPUs might be > performed (to re-read the dependencies and possibly not re-enter > nohz_full on a given CPU). > > A common case is for applications that run on nohz_full= CPUs > to not use POSIX timers (eg DPDK). This patch skips the IPI > in case the task allowed mask does not intersect with nohz_full= CPU mask, > when going through tick_nohz_dep_set_signal. > > This reduces interruptions to nohz_full= CPUs. > > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> [...] > /* > + * Set bit on nohz full dependency, kicking all cpus > + * only if task can run on nohz full CPUs. > + */ > +static void tick_nohz_dep_set_all_cond(struct task_struct *tsk, > + atomic_t *dep, > + enum tick_dep_bits bit) > +{ > + int prev; > + unsigned long flags; > + > + prev = atomic_fetch_or(BIT(bit), dep); > + if (prev) > + return; > + > + raw_spin_lock_irqsave(&tsk->pi_lock, flags); > + if (cpumask_intersects(&tsk->cpus_mask, tick_nohz_full_mask)) > + tick_nohz_full_kick_all(); So that's for one task but what about the other threads in that process? We are setting the tick dependency on all tasks sharing that struct signal. Thanks. > + raw_spin_unlock_irqrestore(&tsk->pi_lock, flags); > +} > + ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 1/2] nohz: try to avoid IPI when configuring per-CPU posix timer 2020-09-01 23:38 ` Frederic Weisbecker @ 2020-09-02 19:42 ` Marcelo Tosatti 0 siblings, 0 replies; 7+ messages in thread From: Marcelo Tosatti @ 2020-09-02 19:42 UTC (permalink / raw) To: Frederic Weisbecker; +Cc: linux-kernel On Wed, Sep 02, 2020 at 01:38:59AM +0200, Frederic Weisbecker wrote: > On Tue, Aug 25, 2020 at 03:41:48PM -0300, Marcelo Tosatti wrote: > > When enabling per-CPU posix timers, an IPI to nohz_full CPUs might be > > performed (to re-read the dependencies and possibly not re-enter > > nohz_full on a given CPU). > > > > A common case is for applications that run on nohz_full= CPUs > > to not use POSIX timers (eg DPDK). This patch skips the IPI > > in case the task allowed mask does not intersect with nohz_full= CPU mask, > > when going through tick_nohz_dep_set_signal. > > > > This reduces interruptions to nohz_full= CPUs. > > > > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> > [...] > > /* > > + * Set bit on nohz full dependency, kicking all cpus > > + * only if task can run on nohz full CPUs. > > + */ > > +static void tick_nohz_dep_set_all_cond(struct task_struct *tsk, > > + atomic_t *dep, > > + enum tick_dep_bits bit) > > +{ > > + int prev; > > + unsigned long flags; > > + > > + prev = atomic_fetch_or(BIT(bit), dep); > > + if (prev) > > + return; > > + > > + raw_spin_lock_irqsave(&tsk->pi_lock, flags); > > + if (cpumask_intersects(&tsk->cpus_mask, tick_nohz_full_mask)) > > + tick_nohz_full_kick_all(); > > So that's for one task but what about the other threads in that > process? We are setting the tick dependency on all tasks sharing that > struct signal. Hi Frederic, Yep, fixing in -v2, thanks. ^ permalink raw reply [flat|nested] 7+ messages in thread
* [patch 2/2] nohz: try to avoid IPI when setting tick dependency for task 2020-08-25 18:41 [patch 0/2] posix-timers: avoid nohz_full= IPIs via task cpu masks Marcelo Tosatti 2020-08-25 18:41 ` [patch 1/2] nohz: try to avoid IPI when configuring per-CPU posix timer Marcelo Tosatti @ 2020-08-25 18:41 ` Marcelo Tosatti 2020-09-03 15:01 ` Frederic Weisbecker 1 sibling, 1 reply; 7+ messages in thread From: Marcelo Tosatti @ 2020-08-25 18:41 UTC (permalink / raw) To: Frederic Weisbecker; +Cc: linux-kernel When enabling per-CPU posix timers, an IPI to nohz_full CPUs might be performed (to re-read the dependencies and possibly not re-enter nohz_full on a given CPU). A common case is for applications that run on nohz_full= CPUs to not use POSIX timers (eg DPDK). This patch optimizes tick_nohz_dep_set_task to avoid kicking all nohz_full= CPUs in case the task allowed mask does not intersect with nohz_full= CPU mask, when going through tick_nohz_dep_set_task. This reduces interruptions to nohz_full= CPUs. --- kernel/time/tick-sched.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) Index: linux-2.6/kernel/time/tick-sched.c =================================================================== --- linux-2.6.orig/kernel/time/tick-sched.c +++ linux-2.6/kernel/time/tick-sched.c @@ -383,11 +383,16 @@ void tick_nohz_dep_set_task(struct task_ tick_nohz_full_kick(); preempt_enable(); } else { + unsigned long flags; + /* * Some future tick_nohz_full_kick_task() - * should optimize this. + * should further optimize this. */ - tick_nohz_full_kick_all(); + raw_spin_lock_irqsave(&tsk->pi_lock, flags); + if (cpumask_intersects(&tsk->cpus_mask, tick_nohz_full_mask)) + tick_nohz_full_kick_all(); + raw_spin_unlock_irqrestore(&tsk->pi_lock, flags); } } } ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 2/2] nohz: try to avoid IPI when setting tick dependency for task 2020-08-25 18:41 ` [patch 2/2] nohz: try to avoid IPI when setting tick dependency for task Marcelo Tosatti @ 2020-09-03 15:01 ` Frederic Weisbecker 2020-09-10 18:51 ` Marcelo Tosatti 0 siblings, 1 reply; 7+ messages in thread From: Frederic Weisbecker @ 2020-09-03 15:01 UTC (permalink / raw) To: Marcelo Tosatti, Peter Zijlstra; +Cc: linux-kernel On Tue, Aug 25, 2020 at 03:41:49PM -0300, Marcelo Tosatti wrote: > When enabling per-CPU posix timers, an IPI to nohz_full CPUs might be > performed (to re-read the dependencies and possibly not re-enter > nohz_full on a given CPU). > > A common case is for applications that run on nohz_full= CPUs > to not use POSIX timers (eg DPDK). > > This patch optimizes tick_nohz_dep_set_task to avoid kicking > all nohz_full= CPUs in case the task allowed mask does not > intersect with nohz_full= CPU mask, > when going through tick_nohz_dep_set_task. > > This reduces interruptions to nohz_full= CPUs. > > --- > kernel/time/tick-sched.c | 9 +++++++-- > 1 file changed, 7 insertions(+), 2 deletions(-) > > Index: linux-2.6/kernel/time/tick-sched.c > =================================================================== > --- linux-2.6.orig/kernel/time/tick-sched.c > +++ linux-2.6/kernel/time/tick-sched.c > @@ -383,11 +383,16 @@ void tick_nohz_dep_set_task(struct task_ > tick_nohz_full_kick(); > preempt_enable(); > } else { > + unsigned long flags; > + > /* > * Some future tick_nohz_full_kick_task() > - * should optimize this. > + * should further optimize this. > */ > - tick_nohz_full_kick_all(); > + raw_spin_lock_irqsave(&tsk->pi_lock, flags); > + if (cpumask_intersects(&tsk->cpus_mask, tick_nohz_full_mask)) > + tick_nohz_full_kick_all(); > + raw_spin_unlock_irqrestore(&tsk->pi_lock, flags); > } > } > } > > Not long ago, Peterz suggested that we simply do: diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f0199a4ba1ad..42ce8e458013 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -357,17 +357,26 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu); void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit) { if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) { - if (tsk == current) { - preempt_disable(); - tick_nohz_full_kick(); - preempt_enable(); - } else { - /* - * Some future tick_nohz_full_kick_task() - * should optimize this. - */ - tick_nohz_full_kick_all(); - } + int cpu = task_cpu(tsk); + + /* + * If the task concurrently migrates to another cpu, + * we guarantee it sees the new tick dependency upon + * schedule. + * + * set_task_cpu(p, cpu); + * STORE p->cpu = @cpu + * __schedule() (switch to task 'p') + * LOCK rq->lock + * smp_mb__after_spin_lock() STORE p->tick_dep_mask + * tick_nohz_task_switch() smp_mb() (atomic_fetch_or()) + * LOAD p->tick_dep_mask LOAD p->cpu + */ + + preempt_disable(); + if (cpu_online(cpu)) + tick_nohz_full_kick_cpu(cpu); + preempt_enable(); } } EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task); ^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [patch 2/2] nohz: try to avoid IPI when setting tick dependency for task 2020-09-03 15:01 ` Frederic Weisbecker @ 2020-09-10 18:51 ` Marcelo Tosatti 0 siblings, 0 replies; 7+ messages in thread From: Marcelo Tosatti @ 2020-09-10 18:51 UTC (permalink / raw) To: Frederic Weisbecker; +Cc: Peter Zijlstra, linux-kernel On Thu, Sep 03, 2020 at 05:01:53PM +0200, Frederic Weisbecker wrote: > On Tue, Aug 25, 2020 at 03:41:49PM -0300, Marcelo Tosatti wrote: > > When enabling per-CPU posix timers, an IPI to nohz_full CPUs might be > > performed (to re-read the dependencies and possibly not re-enter > > nohz_full on a given CPU). > > > > A common case is for applications that run on nohz_full= CPUs > > to not use POSIX timers (eg DPDK). > > > > This patch optimizes tick_nohz_dep_set_task to avoid kicking > > all nohz_full= CPUs in case the task allowed mask does not > > intersect with nohz_full= CPU mask, > > when going through tick_nohz_dep_set_task. > > > > This reduces interruptions to nohz_full= CPUs. > > > > --- > > kernel/time/tick-sched.c | 9 +++++++-- > > 1 file changed, 7 insertions(+), 2 deletions(-) > > > > Index: linux-2.6/kernel/time/tick-sched.c > > =================================================================== > > --- linux-2.6.orig/kernel/time/tick-sched.c > > +++ linux-2.6/kernel/time/tick-sched.c > > @@ -383,11 +383,16 @@ void tick_nohz_dep_set_task(struct task_ > > tick_nohz_full_kick(); > > preempt_enable(); > > } else { > > + unsigned long flags; > > + > > /* > > * Some future tick_nohz_full_kick_task() > > - * should optimize this. > > + * should further optimize this. > > */ > > - tick_nohz_full_kick_all(); > > + raw_spin_lock_irqsave(&tsk->pi_lock, flags); > > + if (cpumask_intersects(&tsk->cpus_mask, tick_nohz_full_mask)) > > + tick_nohz_full_kick_all(); > > + raw_spin_unlock_irqrestore(&tsk->pi_lock, flags); > > } > > } > > } > > > > > > Not long ago, Peterz suggested that we simply do: > > diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c > index f0199a4ba1ad..42ce8e458013 100644 > --- a/kernel/time/tick-sched.c > +++ b/kernel/time/tick-sched.c > @@ -357,17 +357,26 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu); > void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit) > { > if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) { > - if (tsk == current) { > - preempt_disable(); > - tick_nohz_full_kick(); > - preempt_enable(); > - } else { > - /* > - * Some future tick_nohz_full_kick_task() > - * should optimize this. > - */ > - tick_nohz_full_kick_all(); > - } > + int cpu = task_cpu(tsk); > + > + /* > + * If the task concurrently migrates to another cpu, > + * we guarantee it sees the new tick dependency upon > + * schedule. > + * > + * set_task_cpu(p, cpu); > + * STORE p->cpu = @cpu > + * __schedule() (switch to task 'p') > + * LOCK rq->lock > + * smp_mb__after_spin_lock() STORE p->tick_dep_mask > + * tick_nohz_task_switch() smp_mb() (atomic_fetch_or()) > + * LOAD p->tick_dep_mask LOAD p->cpu > + */ > + > + preempt_disable(); > + if (cpu_online(cpu)) > + tick_nohz_full_kick_cpu(cpu); > + preempt_enable(); > } > } > EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task); This can also be used for the signal case... thanks. ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2020-09-10 18:54 UTC | newest] Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-08-25 18:41 [patch 0/2] posix-timers: avoid nohz_full= IPIs via task cpu masks Marcelo Tosatti 2020-08-25 18:41 ` [patch 1/2] nohz: try to avoid IPI when configuring per-CPU posix timer Marcelo Tosatti 2020-09-01 23:38 ` Frederic Weisbecker 2020-09-02 19:42 ` Marcelo Tosatti 2020-08-25 18:41 ` [patch 2/2] nohz: try to avoid IPI when setting tick dependency for task Marcelo Tosatti 2020-09-03 15:01 ` Frederic Weisbecker 2020-09-10 18:51 ` Marcelo Tosatti
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).