linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] sched/vtime: Prevent unstable evaluation of WARN(vtime->state)
@ 2020-01-23 18:08 Frederic Weisbecker
  2020-02-14 16:23 ` Frederic Weisbecker
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Frederic Weisbecker @ 2020-01-23 18:08 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra; +Cc: LKML, Chris Wilson, Frederic Weisbecker

From: Chris Wilson <chris@chris-wilson.co.uk>

As the vtime is sampled under loose seqcount protection by kcpustat, the
vtime fields may change as the code flows. Where logic dictates a field
has a static value, use a READ_ONCE.

Fixes: 74722bb223d0 ("sched/vtime: Bring up complete kcpustat accessor")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 kernel/sched/cputime.c | 41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index d43318a489f2..df3577149d2e 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -912,8 +912,10 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
 	} while (read_seqcount_retry(&vtime->seqcount, seq));
 }
 
-static int vtime_state_check(struct vtime *vtime, int cpu)
+static int vtime_state_fetch(struct vtime *vtime, int cpu)
 {
+	int state = READ_ONCE(vtime->state);
+
 	/*
 	 * We raced against a context switch, fetch the
 	 * kcpustat task again.
@@ -930,10 +932,10 @@ static int vtime_state_check(struct vtime *vtime, int cpu)
 	 *
 	 * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
 	 */
-	if (vtime->state == VTIME_INACTIVE)
+	if (state == VTIME_INACTIVE)
 		return -EAGAIN;
 
-	return 0;
+	return state;
 }
 
 static u64 kcpustat_user_vtime(struct vtime *vtime)
@@ -952,14 +954,15 @@ static int kcpustat_field_vtime(u64 *cpustat,
 {
 	struct vtime *vtime = &tsk->vtime;
 	unsigned int seq;
-	int err;
 
 	do {
+		int state;
+
 		seq = read_seqcount_begin(&vtime->seqcount);
 
-		err = vtime_state_check(vtime, cpu);
-		if (err < 0)
-			return err;
+		state = vtime_state_fetch(vtime, cpu);
+		if (state < 0)
+			return state;
 
 		*val = cpustat[usage];
 
@@ -972,7 +975,7 @@ static int kcpustat_field_vtime(u64 *cpustat,
 		 */
 		switch (usage) {
 		case CPUTIME_SYSTEM:
-			if (vtime->state == VTIME_SYS)
+			if (state == VTIME_SYS)
 				*val += vtime->stime + vtime_delta(vtime);
 			break;
 		case CPUTIME_USER:
@@ -984,11 +987,11 @@ static int kcpustat_field_vtime(u64 *cpustat,
 				*val += kcpustat_user_vtime(vtime);
 			break;
 		case CPUTIME_GUEST:
-			if (vtime->state == VTIME_GUEST && task_nice(tsk) <= 0)
+			if (state == VTIME_GUEST && task_nice(tsk) <= 0)
 				*val += vtime->gtime + vtime_delta(vtime);
 			break;
 		case CPUTIME_GUEST_NICE:
-			if (vtime->state == VTIME_GUEST && task_nice(tsk) > 0)
+			if (state == VTIME_GUEST && task_nice(tsk) > 0)
 				*val += vtime->gtime + vtime_delta(vtime);
 			break;
 		default:
@@ -1039,23 +1042,23 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
 {
 	struct vtime *vtime = &tsk->vtime;
 	unsigned int seq;
-	int err;
 
 	do {
 		u64 *cpustat;
 		u64 delta;
+		int state;
 
 		seq = read_seqcount_begin(&vtime->seqcount);
 
-		err = vtime_state_check(vtime, cpu);
-		if (err < 0)
-			return err;
+		state = vtime_state_fetch(vtime, cpu);
+		if (state < 0)
+			return state;
 
 		*dst = *src;
 		cpustat = dst->cpustat;
 
 		/* Task is sleeping, dead or idle, nothing to add */
-		if (vtime->state < VTIME_SYS)
+		if (state < VTIME_SYS)
 			continue;
 
 		delta = vtime_delta(vtime);
@@ -1064,15 +1067,15 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
 		 * Task runs either in user (including guest) or kernel space,
 		 * add pending nohz time to the right place.
 		 */
-		if (vtime->state == VTIME_SYS) {
+		if (state == VTIME_SYS) {
 			cpustat[CPUTIME_SYSTEM] += vtime->stime + delta;
-		} else if (vtime->state == VTIME_USER) {
+		} else if (state == VTIME_USER) {
 			if (task_nice(tsk) > 0)
 				cpustat[CPUTIME_NICE] += vtime->utime + delta;
 			else
 				cpustat[CPUTIME_USER] += vtime->utime + delta;
 		} else {
-			WARN_ON_ONCE(vtime->state != VTIME_GUEST);
+			WARN_ON_ONCE(state != VTIME_GUEST);
 			if (task_nice(tsk) > 0) {
 				cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta;
 				cpustat[CPUTIME_NICE] += vtime->gtime + delta;
@@ -1083,7 +1086,7 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
 		}
 	} while (read_seqcount_retry(&vtime->seqcount, seq));
 
-	return err;
+	return 0;
 }
 
 void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread
* Re: [PATCH 2/6] sched/vtime: Bring up complete kcpustat accessor
@ 2019-12-30  1:08 Frederic Weisbecker
  2019-12-30  9:04 ` [PATCH] sched/vtime: Prevent unstable evaluation of WARN(vtime->state) Chris Wilson
  0 siblings, 1 reply; 6+ messages in thread
From: Frederic Weisbecker @ 2019-12-30  1:08 UTC (permalink / raw)
  To: Chris Wilson
  Cc: Ingo Molnar, Peter Zijlstra, LKML, Jacek Anaszewski, Wanpeng Li,
	Rafael J . Wysocki, Benjamin Herrenschmidt, Rik van Riel,
	Thomas Gleixner, Yauheni Kaliuta, Viresh Kumar, Pavel Machek

On Sat, Dec 28, 2019 at 08:56:19PM +0000, Chris Wilson wrote:
> I'm randomly hitting this WARN on a non-virtualised system reading
> /proc/stat.
> 
> vtime->state is updated under the write_seqcount, so the access here is
> deliberately racey, and the change in vtime->state would be picked up
> the seqcount_retry.
> 
> Quick suggestion would be something along the lines of
> 
>  static int vtime_state_check(struct vtime *vtime, int cpu)
>  {
> +	int state = READ_ONCE(vtime->state);
> +
>  	/*
>  	 * We raced against a context switch, fetch the
>  	 * kcpustat task again.
> @@ -930,10 +932,10 @@ static int vtime_state_check(struct vtime *vtime, int cpu)
>  	 *
>  	 * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
>  	 */
> -	if (vtime->state == VTIME_INACTIVE)
> +	if (state == VTIME_INACTIVE)
>  		return -EAGAIN;
> 
> -	return 0;
> +	return state;
>  }
> 
>  static u64 kcpustat_user_vtime(struct vtime *vtime)
> @@ -1055,7 +1057,7 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
>  		cpustat = dst->cpustat;
> 
>  		/* Task is sleeping, dead or idle, nothing to add */
> -		if (vtime->state < VTIME_SYS)
> +		if (err < VTIME_SYS)
>  			continue;
> 
>  		delta = vtime_delta(vtime);
> @@ -1064,15 +1066,15 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
>  		 * Task runs either in user (including guest) or kernel space,
>  		 * add pending nohz time to the right place.
>  		 */
> -		if (vtime->state == VTIME_SYS) {
> +		if (err == VTIME_SYS) {
>  			cpustat[CPUTIME_SYSTEM] += vtime->stime + delta;
> -		} else if (vtime->state == VTIME_USER) {
> +		} else if (err == VTIME_USER) {
>  			if (task_nice(tsk) > 0)
>  				cpustat[CPUTIME_NICE] += vtime->utime + delta;
>  			else
>  				cpustat[CPUTIME_USER] += vtime->utime + delta;
>  		} else {
> -			WARN_ON_ONCE(vtime->state != VTIME_GUEST);
> +			WARN_ON_ONCE(err != VTIME_GUEST);
>  			if (task_nice(tsk) > 0) {
>  				cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta;
>  				cpustat[CPUTIME_NICE] += vtime->gtime + delta;
> 
> Or drop the warn.

Good catch, can I use your Signed-off-by ?

Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-03-06 14:42 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-23 18:08 [PATCH] sched/vtime: Prevent unstable evaluation of WARN(vtime->state) Frederic Weisbecker
2020-02-14 16:23 ` Frederic Weisbecker
2020-02-27 13:34 ` Frederic Weisbecker
2020-03-06 14:42 ` [tip: sched/core] " tip-bot2 for Chris Wilson
  -- strict thread matches above, loose matches on Subject: below --
2019-12-30  1:08 [PATCH 2/6] sched/vtime: Bring up complete kcpustat accessor Frederic Weisbecker
2019-12-30  9:04 ` [PATCH] sched/vtime: Prevent unstable evaluation of WARN(vtime->state) Chris Wilson
2019-12-30 17:39   ` Frederic Weisbecker

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).