linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 0/2] posix-timers: avoid nohz_full= IPIs via task cpu masks
@ 2020-08-25 18:41 Marcelo Tosatti
  2020-08-25 18:41 ` [patch 1/2] nohz: try to avoid IPI when configuring per-CPU posix timer Marcelo Tosatti
  2020-08-25 18:41 ` [patch 2/2] nohz: try to avoid IPI when setting tick dependency for task Marcelo Tosatti
  0 siblings, 2 replies; 7+ messages in thread
From: Marcelo Tosatti @ 2020-08-25 18:41 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: linux-kernel

This patchset avoids IPIs to nohz_full= CPUs when the intersection 
between the set of nohz_full CPUs and task allowed cpus is null.

See individual patches for details.



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [patch 1/2] nohz: try to avoid IPI when configuring per-CPU posix timer
  2020-08-25 18:41 [patch 0/2] posix-timers: avoid nohz_full= IPIs via task cpu masks Marcelo Tosatti
@ 2020-08-25 18:41 ` Marcelo Tosatti
  2020-09-01 23:38   ` Frederic Weisbecker
  2020-08-25 18:41 ` [patch 2/2] nohz: try to avoid IPI when setting tick dependency for task Marcelo Tosatti
  1 sibling, 1 reply; 7+ messages in thread
From: Marcelo Tosatti @ 2020-08-25 18:41 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: linux-kernel, Marcelo Tosatti

When enabling per-CPU posix timers, an IPI to nohz_full CPUs might be
performed (to re-read the dependencies and possibly not re-enter
nohz_full on a given CPU).

A common case is for applications that run on nohz_full= CPUs 
to not use POSIX timers (eg DPDK). This patch skips the IPI 
in case the task allowed mask does not intersect with nohz_full= CPU mask,
when going through tick_nohz_dep_set_signal.

This reduces interruptions to nohz_full= CPUs.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---
 include/linux/tick.h           |   11 +++++++----
 kernel/time/posix-cpu-timers.c |    4 ++--
 kernel/time/tick-sched.c       |   27 +++++++++++++++++++++++++--
 3 files changed, 34 insertions(+), 8 deletions(-)

Index: linux-2.6/include/linux/tick.h
===================================================================
--- linux-2.6.orig/include/linux/tick.h
+++ linux-2.6/include/linux/tick.h
@@ -207,7 +207,8 @@ extern void tick_nohz_dep_set_task(struc
 				   enum tick_dep_bits bit);
 extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
 				     enum tick_dep_bits bit);
-extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
+extern void tick_nohz_dep_set_signal(struct task_struct *tsk,
+				     struct signal_struct *signal,
 				     enum tick_dep_bits bit);
 extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
 				       enum tick_dep_bits bit);
@@ -252,11 +253,12 @@ static inline void tick_dep_clear_task(s
 	if (tick_nohz_full_enabled())
 		tick_nohz_dep_clear_task(tsk, bit);
 }
-static inline void tick_dep_set_signal(struct signal_struct *signal,
+static inline void tick_dep_set_signal(struct task_struct *tsk,
+				       struct signal_struct *signal,
 				       enum tick_dep_bits bit)
 {
 	if (tick_nohz_full_enabled())
-		tick_nohz_dep_set_signal(signal, bit);
+		tick_nohz_dep_set_signal(tsk, signal, bit);
 }
 static inline void tick_dep_clear_signal(struct signal_struct *signal,
 					 enum tick_dep_bits bit)
@@ -284,7 +286,8 @@ static inline void tick_dep_set_task(str
 				     enum tick_dep_bits bit) { }
 static inline void tick_dep_clear_task(struct task_struct *tsk,
 				       enum tick_dep_bits bit) { }
-static inline void tick_dep_set_signal(struct signal_struct *signal,
+static inline void tick_dep_set_signal(struct task_struct *tsk,
+				       struct signal_struct *signal,
 				       enum tick_dep_bits bit) { }
 static inline void tick_dep_clear_signal(struct signal_struct *signal,
 					 enum tick_dep_bits bit) { }
Index: linux-2.6/kernel/time/posix-cpu-timers.c
===================================================================
--- linux-2.6.orig/kernel/time/posix-cpu-timers.c
+++ linux-2.6/kernel/time/posix-cpu-timers.c
@@ -523,7 +523,7 @@ static void arm_timer(struct k_itimer *t
 	if (CPUCLOCK_PERTHREAD(timer->it_clock))
 		tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
 	else
-		tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
+		tick_dep_set_signal(p, p->signal, TICK_DEP_BIT_POSIX_TIMER);
 }
 
 /*
@@ -1358,7 +1358,7 @@ void set_process_cpu_timer(struct task_s
 	if (*newval < *nextevt)
 		*nextevt = *newval;
 
-	tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
+	tick_dep_set_signal(tsk, tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
 }
 
 static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
Index: linux-2.6/kernel/time/tick-sched.c
===================================================================
--- linux-2.6.orig/kernel/time/tick-sched.c
+++ linux-2.6/kernel/time/tick-sched.c
@@ -302,6 +302,27 @@ static void tick_nohz_dep_set_all(atomic
 }
 
 /*
+ * Set bit on nohz full dependency, kicking all cpus
+ * only if task can run on nohz full CPUs.
+ */
+static void tick_nohz_dep_set_all_cond(struct task_struct *tsk,
+				       atomic_t *dep,
+				       enum tick_dep_bits bit)
+{
+	int prev;
+	unsigned long flags;
+
+	prev = atomic_fetch_or(BIT(bit), dep);
+	if (prev)
+		return;
+
+	raw_spin_lock_irqsave(&tsk->pi_lock, flags);
+	if (cpumask_intersects(&tsk->cpus_mask, tick_nohz_full_mask))
+		tick_nohz_full_kick_all();
+	raw_spin_unlock_irqrestore(&tsk->pi_lock, flags);
+}
+
+/*
  * Set a global tick dependency. Used by perf events that rely on freq and
  * by unstable clock.
  */
@@ -382,9 +403,11 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_ta
  * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
  * per process timers.
  */
-void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
+void tick_nohz_dep_set_signal(struct task_struct *tsk,
+			      struct signal_struct *sig,
+			      enum tick_dep_bits bit)
 {
-	tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
+	tick_nohz_dep_set_all_cond(tsk, &sig->tick_dep_mask, bit);
 }
 
 void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [patch 2/2] nohz: try to avoid IPI when setting tick dependency for task
  2020-08-25 18:41 [patch 0/2] posix-timers: avoid nohz_full= IPIs via task cpu masks Marcelo Tosatti
  2020-08-25 18:41 ` [patch 1/2] nohz: try to avoid IPI when configuring per-CPU posix timer Marcelo Tosatti
@ 2020-08-25 18:41 ` Marcelo Tosatti
  2020-09-03 15:01   ` Frederic Weisbecker
  1 sibling, 1 reply; 7+ messages in thread
From: Marcelo Tosatti @ 2020-08-25 18:41 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: linux-kernel

When enabling per-CPU posix timers, an IPI to nohz_full CPUs might be
performed (to re-read the dependencies and possibly not re-enter
nohz_full on a given CPU).

A common case is for applications that run on nohz_full= CPUs
to not use POSIX timers (eg DPDK).

This patch optimizes tick_nohz_dep_set_task to avoid kicking
all nohz_full= CPUs in case the task allowed mask does not
intersect with nohz_full= CPU mask,
when going through tick_nohz_dep_set_task.

This reduces interruptions to nohz_full= CPUs.

---
 kernel/time/tick-sched.c |    9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

Index: linux-2.6/kernel/time/tick-sched.c
===================================================================
--- linux-2.6.orig/kernel/time/tick-sched.c
+++ linux-2.6/kernel/time/tick-sched.c
@@ -383,11 +383,16 @@ void tick_nohz_dep_set_task(struct task_
 			tick_nohz_full_kick();
 			preempt_enable();
 		} else {
+			unsigned long flags;
+
 			/*
 			 * Some future tick_nohz_full_kick_task()
-			 * should optimize this.
+			 * should further optimize this.
 			 */
-			tick_nohz_full_kick_all();
+			raw_spin_lock_irqsave(&tsk->pi_lock, flags);
+			if (cpumask_intersects(&tsk->cpus_mask, tick_nohz_full_mask))
+				tick_nohz_full_kick_all();
+			raw_spin_unlock_irqrestore(&tsk->pi_lock, flags);
 		}
 	}
 }



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 1/2] nohz: try to avoid IPI when configuring per-CPU posix timer
  2020-08-25 18:41 ` [patch 1/2] nohz: try to avoid IPI when configuring per-CPU posix timer Marcelo Tosatti
@ 2020-09-01 23:38   ` Frederic Weisbecker
  2020-09-02 19:42     ` Marcelo Tosatti
  0 siblings, 1 reply; 7+ messages in thread
From: Frederic Weisbecker @ 2020-09-01 23:38 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: linux-kernel

On Tue, Aug 25, 2020 at 03:41:48PM -0300, Marcelo Tosatti wrote:
> When enabling per-CPU posix timers, an IPI to nohz_full CPUs might be
> performed (to re-read the dependencies and possibly not re-enter
> nohz_full on a given CPU).
> 
> A common case is for applications that run on nohz_full= CPUs 
> to not use POSIX timers (eg DPDK). This patch skips the IPI 
> in case the task allowed mask does not intersect with nohz_full= CPU mask,
> when going through tick_nohz_dep_set_signal.
> 
> This reduces interruptions to nohz_full= CPUs.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
[...]
>  /*
> + * Set bit on nohz full dependency, kicking all cpus
> + * only if task can run on nohz full CPUs.
> + */
> +static void tick_nohz_dep_set_all_cond(struct task_struct *tsk,
> +				       atomic_t *dep,
> +				       enum tick_dep_bits bit)
> +{
> +	int prev;
> +	unsigned long flags;
> +
> +	prev = atomic_fetch_or(BIT(bit), dep);
> +	if (prev)
> +		return;
> +
> +	raw_spin_lock_irqsave(&tsk->pi_lock, flags);
> +	if (cpumask_intersects(&tsk->cpus_mask, tick_nohz_full_mask))
> +		tick_nohz_full_kick_all();

So that's for one task but what about the other threads in that
process? We are setting the tick dependency on all tasks sharing that
struct signal.

Thanks.

> +	raw_spin_unlock_irqrestore(&tsk->pi_lock, flags);
> +}
> +

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 1/2] nohz: try to avoid IPI when configuring per-CPU posix timer
  2020-09-01 23:38   ` Frederic Weisbecker
@ 2020-09-02 19:42     ` Marcelo Tosatti
  0 siblings, 0 replies; 7+ messages in thread
From: Marcelo Tosatti @ 2020-09-02 19:42 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: linux-kernel

On Wed, Sep 02, 2020 at 01:38:59AM +0200, Frederic Weisbecker wrote:
> On Tue, Aug 25, 2020 at 03:41:48PM -0300, Marcelo Tosatti wrote:
> > When enabling per-CPU posix timers, an IPI to nohz_full CPUs might be
> > performed (to re-read the dependencies and possibly not re-enter
> > nohz_full on a given CPU).
> > 
> > A common case is for applications that run on nohz_full= CPUs 
> > to not use POSIX timers (eg DPDK). This patch skips the IPI 
> > in case the task allowed mask does not intersect with nohz_full= CPU mask,
> > when going through tick_nohz_dep_set_signal.
> > 
> > This reduces interruptions to nohz_full= CPUs.
> > 
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> [...]
> >  /*
> > + * Set bit on nohz full dependency, kicking all cpus
> > + * only if task can run on nohz full CPUs.
> > + */
> > +static void tick_nohz_dep_set_all_cond(struct task_struct *tsk,
> > +				       atomic_t *dep,
> > +				       enum tick_dep_bits bit)
> > +{
> > +	int prev;
> > +	unsigned long flags;
> > +
> > +	prev = atomic_fetch_or(BIT(bit), dep);
> > +	if (prev)
> > +		return;
> > +
> > +	raw_spin_lock_irqsave(&tsk->pi_lock, flags);
> > +	if (cpumask_intersects(&tsk->cpus_mask, tick_nohz_full_mask))
> > +		tick_nohz_full_kick_all();
> 
> So that's for one task but what about the other threads in that
> process? We are setting the tick dependency on all tasks sharing that
> struct signal.

Hi Frederic,

Yep, fixing in -v2, thanks.



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/2] nohz: try to avoid IPI when setting tick dependency for task
  2020-08-25 18:41 ` [patch 2/2] nohz: try to avoid IPI when setting tick dependency for task Marcelo Tosatti
@ 2020-09-03 15:01   ` Frederic Weisbecker
  2020-09-10 18:51     ` Marcelo Tosatti
  0 siblings, 1 reply; 7+ messages in thread
From: Frederic Weisbecker @ 2020-09-03 15:01 UTC (permalink / raw)
  To: Marcelo Tosatti, Peter Zijlstra; +Cc: linux-kernel

On Tue, Aug 25, 2020 at 03:41:49PM -0300, Marcelo Tosatti wrote:
> When enabling per-CPU posix timers, an IPI to nohz_full CPUs might be
> performed (to re-read the dependencies and possibly not re-enter
> nohz_full on a given CPU).
> 
> A common case is for applications that run on nohz_full= CPUs
> to not use POSIX timers (eg DPDK).
> 
> This patch optimizes tick_nohz_dep_set_task to avoid kicking
> all nohz_full= CPUs in case the task allowed mask does not
> intersect with nohz_full= CPU mask,
> when going through tick_nohz_dep_set_task.
> 
> This reduces interruptions to nohz_full= CPUs.
> 
> ---
>  kernel/time/tick-sched.c |    9 +++++++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> Index: linux-2.6/kernel/time/tick-sched.c
> ===================================================================
> --- linux-2.6.orig/kernel/time/tick-sched.c
> +++ linux-2.6/kernel/time/tick-sched.c
> @@ -383,11 +383,16 @@ void tick_nohz_dep_set_task(struct task_
>  			tick_nohz_full_kick();
>  			preempt_enable();
>  		} else {
> +			unsigned long flags;
> +
>  			/*
>  			 * Some future tick_nohz_full_kick_task()
> -			 * should optimize this.
> +			 * should further optimize this.
>  			 */
> -			tick_nohz_full_kick_all();
> +			raw_spin_lock_irqsave(&tsk->pi_lock, flags);
> +			if (cpumask_intersects(&tsk->cpus_mask, tick_nohz_full_mask))
> +				tick_nohz_full_kick_all();
> +			raw_spin_unlock_irqrestore(&tsk->pi_lock, flags);
>  		}
>  	}
>  }
> 
> 

Not long ago, Peterz suggested that we simply do:

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f0199a4ba1ad..42ce8e458013 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -357,17 +357,26 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
 void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
 {
 	if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) {
-		if (tsk == current) {
-			preempt_disable();
-			tick_nohz_full_kick();
-			preempt_enable();
-		} else {
-			/*
-			 * Some future tick_nohz_full_kick_task()
-			 * should optimize this.
-			 */
-			tick_nohz_full_kick_all();
-		}
+		int cpu = task_cpu(tsk);
+
+		/*
+		 * If the task concurrently migrates to another cpu,
+		 * we guarantee it sees the new tick dependency upon
+		 * schedule.
+		 *
+		 * set_task_cpu(p, cpu);
+		 *   STORE p->cpu = @cpu
+		 * __schedule() (switch to task 'p')
+		 *   LOCK rq->lock
+		 *   smp_mb__after_spin_lock()		STORE p->tick_dep_mask
+		 *   tick_nohz_task_switch()		smp_mb() (atomic_fetch_or())
+		 *      LOAD p->tick_dep_mask		LOAD p->cpu
+		 */
+
+		preempt_disable();
+		if (cpu_online(cpu))
+			tick_nohz_full_kick_cpu(cpu);
+		preempt_enable();
 	}
 }
 EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [patch 2/2] nohz: try to avoid IPI when setting tick dependency for task
  2020-09-03 15:01   ` Frederic Weisbecker
@ 2020-09-10 18:51     ` Marcelo Tosatti
  0 siblings, 0 replies; 7+ messages in thread
From: Marcelo Tosatti @ 2020-09-10 18:51 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: Peter Zijlstra, linux-kernel

On Thu, Sep 03, 2020 at 05:01:53PM +0200, Frederic Weisbecker wrote:
> On Tue, Aug 25, 2020 at 03:41:49PM -0300, Marcelo Tosatti wrote:
> > When enabling per-CPU posix timers, an IPI to nohz_full CPUs might be
> > performed (to re-read the dependencies and possibly not re-enter
> > nohz_full on a given CPU).
> > 
> > A common case is for applications that run on nohz_full= CPUs
> > to not use POSIX timers (eg DPDK).
> > 
> > This patch optimizes tick_nohz_dep_set_task to avoid kicking
> > all nohz_full= CPUs in case the task allowed mask does not
> > intersect with nohz_full= CPU mask,
> > when going through tick_nohz_dep_set_task.
> > 
> > This reduces interruptions to nohz_full= CPUs.
> > 
> > ---
> >  kernel/time/tick-sched.c |    9 +++++++--
> >  1 file changed, 7 insertions(+), 2 deletions(-)
> > 
> > Index: linux-2.6/kernel/time/tick-sched.c
> > ===================================================================
> > --- linux-2.6.orig/kernel/time/tick-sched.c
> > +++ linux-2.6/kernel/time/tick-sched.c
> > @@ -383,11 +383,16 @@ void tick_nohz_dep_set_task(struct task_
> >  			tick_nohz_full_kick();
> >  			preempt_enable();
> >  		} else {
> > +			unsigned long flags;
> > +
> >  			/*
> >  			 * Some future tick_nohz_full_kick_task()
> > -			 * should optimize this.
> > +			 * should further optimize this.
> >  			 */
> > -			tick_nohz_full_kick_all();
> > +			raw_spin_lock_irqsave(&tsk->pi_lock, flags);
> > +			if (cpumask_intersects(&tsk->cpus_mask, tick_nohz_full_mask))
> > +				tick_nohz_full_kick_all();
> > +			raw_spin_unlock_irqrestore(&tsk->pi_lock, flags);
> >  		}
> >  	}
> >  }
> > 
> > 
> 
> Not long ago, Peterz suggested that we simply do:
> 
> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> index f0199a4ba1ad..42ce8e458013 100644
> --- a/kernel/time/tick-sched.c
> +++ b/kernel/time/tick-sched.c
> @@ -357,17 +357,26 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
>  void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
>  {
>  	if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) {
> -		if (tsk == current) {
> -			preempt_disable();
> -			tick_nohz_full_kick();
> -			preempt_enable();
> -		} else {
> -			/*
> -			 * Some future tick_nohz_full_kick_task()
> -			 * should optimize this.
> -			 */
> -			tick_nohz_full_kick_all();
> -		}
> +		int cpu = task_cpu(tsk);
> +
> +		/*
> +		 * If the task concurrently migrates to another cpu,
> +		 * we guarantee it sees the new tick dependency upon
> +		 * schedule.
> +		 *
> +		 * set_task_cpu(p, cpu);
> +		 *   STORE p->cpu = @cpu
> +		 * __schedule() (switch to task 'p')
> +		 *   LOCK rq->lock
> +		 *   smp_mb__after_spin_lock()		STORE p->tick_dep_mask
> +		 *   tick_nohz_task_switch()		smp_mb() (atomic_fetch_or())
> +		 *      LOAD p->tick_dep_mask		LOAD p->cpu
> +		 */
> +
> +		preempt_disable();
> +		if (cpu_online(cpu))
> +			tick_nohz_full_kick_cpu(cpu);
> +		preempt_enable();
>  	}
>  }
>  EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);

This can also be used for the signal case... thanks.


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-09-10 18:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-25 18:41 [patch 0/2] posix-timers: avoid nohz_full= IPIs via task cpu masks Marcelo Tosatti
2020-08-25 18:41 ` [patch 1/2] nohz: try to avoid IPI when configuring per-CPU posix timer Marcelo Tosatti
2020-09-01 23:38   ` Frederic Weisbecker
2020-09-02 19:42     ` Marcelo Tosatti
2020-08-25 18:41 ` [patch 2/2] nohz: try to avoid IPI when setting tick dependency for task Marcelo Tosatti
2020-09-03 15:01   ` Frederic Weisbecker
2020-09-10 18:51     ` Marcelo Tosatti

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).