All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Sasha Levin <sasha.levin@oracle.com>
Cc: linux-kernel@vger.kernel.org, pjt@google.com, tglx@linutronix.de,
	klamm@yandex-team.ru, mingo@kernel.org, bsegall@google.com,
	hpa@zytor.com
Subject: Re: [tip:timers/core] hrtimer: Allow concurrent hrtimer_start() for self restarting timers
Date: Wed, 13 May 2015 15:43:11 +0200	[thread overview]
Message-ID: <20150513134311.GW21418@twins.programming.kicks-ass.net> (raw)
In-Reply-To: <55520589.9080401@oracle.com>

On Tue, May 12, 2015 at 09:52:09AM -0400, Sasha Levin wrote:
> Hey Peter,
> 
> I seem to be hitting this warning with the latest -next (2015-05-11):
> 
> [3344291.055800] WARNING: CPU: 0 PID: 9422 at kernel/time/hrtimer.c:802 hrtimer_forward+0x1f9/0x330()

Indeed.

So I can't seem to come up with a pretty solution :-(

The 'problem' is creating a periodic timer that can stop when there's no
work left and ensuring (re)start doesn't get lost or looses overruns.

The problem with the current code is that the re-start can come before
the callback does fwd, at which point the fwd will spew the above
warning (rightly so).

Now, conceptually its easy to detect if you're before or after the fwd
by comparing the expiration time against the current time. Of course,
that's expensive (and racy) because we don't have the current time.

Alternatively one could cache this state inside the timer, but then
everybody pays the overhead of maintaining this extra state, and that is
undesired.

The only other option that I could see is the external timer_active
variable, which I tried to kill before.

So I suppose the below (compile tested) patch should fix things, but
seeing how I've been up since 4am I might just have missed something
obvious :-)

Almost-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/perf_event.h |  4 ++++
 kernel/events/core.c       | 28 +++++++++++++++-------------
 kernel/sched/core.c        | 12 ------------
 kernel/sched/fair.c        | 17 +++++++++++++----
 kernel/sched/rt.c          |  8 +++++++-
 kernel/sched/sched.h       |  5 ++---
 6 files changed, 41 insertions(+), 33 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e86f85abeda7..1f5607b220e4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -566,8 +566,12 @@ struct perf_cpu_context {
 	struct perf_event_context	*task_ctx;
 	int				active_oncpu;
 	int				exclusive;
+
+	raw_spinlock_t			hrtimer_lock;
 	struct hrtimer			hrtimer;
 	ktime_t				hrtimer_interval;
+	unsigned int			hrtimer_active;
+
 	struct pmu			*unique_pmu;
 	struct perf_cgroup		*cgrp;
 };
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 84231a146dd5..3dac1d0c6072 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -752,24 +752,21 @@ perf_cgroup_mark_enabled(struct perf_event *event,
 static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
 {
 	struct perf_cpu_context *cpuctx;
-	enum hrtimer_restart ret = HRTIMER_NORESTART;
 	int rotations = 0;
 
 	WARN_ON(!irqs_disabled());
 
 	cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
-
 	rotations = perf_rotate_context(cpuctx);
 
-	/*
-	 * arm timer if needed
-	 */
-	if (rotations) {
+	raw_spin_lock(&cpuctx->hrtimer_lock);
+	if (rotations)
 		hrtimer_forward_now(hr, cpuctx->hrtimer_interval);
-		ret = HRTIMER_RESTART;
-	}
+	else
+		cpuctx->hrtimer_active = 0;
+	raw_spin_unlock(&cpuctx->hrtimer_lock);
 
-	return ret;
+	return rotations ? HRTIMER_RESTART : HRTIMER_NORESTART;
 }
 
 static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
@@ -792,7 +789,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
 
 	cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
 
-	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
 	timer->function = perf_mux_hrtimer_handler;
 }
 
@@ -800,15 +797,20 @@ static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
 {
 	struct hrtimer *timer = &cpuctx->hrtimer;
 	struct pmu *pmu = cpuctx->ctx.pmu;
+	unsigned long flags;
 
 	/* not for SW PMU */
 	if (pmu->task_ctx_nr == perf_sw_context)
 		return 0;
 
-	if (hrtimer_is_queued(timer))
-		return 0;
+	raw_spin_lock_irqsave(&cpuctx->hrtimer_lock, flags);
+	if (!cpuctx->hrtimer_active) {
+		cpuctx->hrtimer_active = 1;
+		hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
+		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+	}
+	raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);
 
-	hrtimer_start(timer, cpuctx->hrtimer_interval, HRTIMER_MODE_REL_PINNED);
 	return 0;
 }
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 613b61e381ca..bd9182aa74c0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -90,18 +90,6 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
 
-void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
-{
-	/*
-	 * Do not forward the expiration time of active timers;
-	 * we do not want to loose an overrun.
-	 */
-	if (!hrtimer_active(period_timer))
-		hrtimer_forward_now(period_timer, period);
-
-	hrtimer_start_expires(period_timer, HRTIMER_MODE_ABS_PINNED);
-}
-
 DEFINE_MUTEX(sched_domains_mutex);
 DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8c1510abeefa..2c08783ee95c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3883,8 +3883,9 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
 	if (runtime_refresh_within(cfs_b, min_left))
 		return;
 
-	start_bandwidth_timer(&cfs_b->slack_timer,
-				ns_to_ktime(cfs_bandwidth_slack_period));
+	hrtimer_start(&cfs_b->slack_timer,
+			ns_to_ktime(cfs_bandwidth_slack_period),
+			HRTIMER_MODE_REL);
 }
 
 /* we know any runtime found here is valid as update_curr() precedes return */
@@ -4025,6 +4026,8 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
 
 		idle = do_sched_cfs_period_timer(cfs_b, overrun);
 	}
+	if (idle)
+		cfs_b->period_active = 0;
 	raw_spin_unlock(&cfs_b->lock);
 
 	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
@@ -4038,7 +4041,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 	cfs_b->period = ns_to_ktime(default_cfs_period());
 
 	INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
-	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
 	cfs_b->period_timer.function = sched_cfs_period_timer;
 	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	cfs_b->slack_timer.function = sched_cfs_slack_timer;
@@ -4052,7 +4055,13 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 
 void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 {
-	start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period);
+	lockdep_assert_held(&cfs_b->lock);
+
+	if (!cfs_b->period_active) {
+		cfs_b->period_active = 1;
+		hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
+		hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
+	}
 }
 
 static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 8781a38a636e..7d7093c51f8d 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -31,6 +31,8 @@ static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
 		idle = do_sched_rt_period_timer(rt_b, overrun);
 		raw_spin_lock(&rt_b->rt_runtime_lock);
 	}
+	if (idle)
+		rt_b->rt_period_active = 0;
 	raw_spin_unlock(&rt_b->rt_runtime_lock);
 
 	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
@@ -54,7 +56,11 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
 		return;
 
 	raw_spin_lock(&rt_b->rt_runtime_lock);
-	start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
+	if (!rt_b->rt_period_active) {
+		rt_b->rt_period_active = 1;
+		hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period);
+		hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
+	}
 	raw_spin_unlock(&rt_b->rt_runtime_lock);
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 460f98648afd..f10a445910c9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -137,6 +137,7 @@ struct rt_bandwidth {
 	ktime_t			rt_period;
 	u64			rt_runtime;
 	struct hrtimer		rt_period_timer;
+	unsigned int		rt_period_active;
 };
 
 void __dl_clear_params(struct task_struct *p);
@@ -221,7 +222,7 @@ struct cfs_bandwidth {
 	s64 hierarchical_quota;
 	u64 runtime_expires;
 
-	int idle;
+	int idle, period_active;
 	struct hrtimer period_timer, slack_timer;
 	struct list_head throttled_cfs_rq;
 
@@ -1410,8 +1411,6 @@ static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
 static inline void sched_avg_update(struct rq *rq) { }
 #endif
 
-extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
-
 /*
  * __task_rq_lock - lock the rq @p resides on.
  */

  reply	other threads:[~2015-05-13 13:43 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-15  9:41 [PATCH 0/3] hrtimer (related) fixes Peter Zijlstra
2015-04-15  9:41 ` [PATCH 1/3] hrtimer: Fix race between hrtimer_start() and __run_hrtimer() Peter Zijlstra
2015-04-15 10:26   ` Thomas Gleixner
2015-04-15 11:31     ` Peter Zijlstra
2015-04-15 11:35       ` Thomas Gleixner
2015-04-15 11:43         ` Peter Zijlstra
2015-04-22 19:15       ` [tip:timers/core] hrtimer: Allow concurrent hrtimer_start() for self restarting timers tip-bot for Peter Zijlstra
2015-05-12 13:52         ` Sasha Levin
2015-05-13 13:43           ` Peter Zijlstra [this message]
2015-05-13 13:54             ` Ingo Molnar
2015-05-13 17:25             ` bsegall
2015-05-13 23:09             ` Sasha Levin
2015-05-14 10:23               ` Peter Zijlstra
2015-05-18 15:21                 ` [tip:timers/core] sched,perf: Fix periodic timers tip-bot for Peter Zijlstra
2015-04-15  9:41 ` [PATCH 2/3] sched: Cleanup bandwidth timers Peter Zijlstra
2015-04-16 20:03   ` bsegall
2015-04-22 19:15   ` [tip:timers/core] " tip-bot for Peter Zijlstra
2015-04-15  9:41 ` [PATCH 3/3] perf: Fix mux_interval hrtimer wreckage Peter Zijlstra
2015-04-15 13:48   ` David Ahern
2015-04-15 14:20     ` Peter Zijlstra
2015-04-22 15:12   ` Thomas Gleixner
2015-04-22 19:15   ` [tip:timers/core] " tip-bot for Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150513134311.GW21418@twins.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=bsegall@google.com \
    --cc=hpa@zytor.com \
    --cc=klamm@yandex-team.ru \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=pjt@google.com \
    --cc=sasha.levin@oracle.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.