All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] tick/nohz: fix data races in get_cpu_idle_time_us()
@ 2023-01-28  2:00 Yu Liao
  2023-01-31 14:44 ` Thomas Gleixner
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Yu Liao @ 2023-01-28  2:00 UTC (permalink / raw)
  To: fweisbec, tglx, mingo
  Cc: liaoyu15, liwei391, adobriyan, mirsad.todorovac, linux-kernel

selftest/proc/proc-uptime-001 complains:
  Euler:/mnt # while true; do ./proc-uptime-001; done
  proc-uptime-001: proc-uptime-001.c:41: main: Assertion `i1 >= i0' failed.
  proc-uptime-001: proc-uptime-001.c:41: main: Assertion `i1 >= i0' failed.

/proc/uptime should be monotonically increasing. This occurs because
the data races between get_cpu_idle_time_us and
tick_nohz_stop_idle/tick_nohz_start_idle, for example:

CPU0                        CPU1
get_cpu_idle_time_us

                            tick_nohz_idle_exit
                              now = ktime_get();
                              tick_nohz_stop_idle
                                update_ts_time_stats
                                  delta = ktime_sub(now, ts->idle_entrytime);
                                  ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta)
                                  ts->idle_entrytime = now

now = ktime_get();
if (ts->idle_active && !nr_iowait_cpu(cpu)) {
    ktime_t delta = ktime_sub(now, ts->idle_entrytime);
    idle = ktime_add(ts->idle_sleeptime, delta);
    //idle is slightly greater than the actual value
} else {
    idle = ts->idle_sleeptime;
}
                            ts->idle_active = 0

After this, idle = idle_sleeptime(actual idle value) + now(CPU0) - now(CPU1).
If get_cpu_idle_time_us() is called immediately after ts->idle_active = 0,
only ts->idle_sleeptime is returned, which is smaller than the previously
read one, resulting in a non-monotonically increasing idle time. In
addition, there are other data race scenarios not listed here.

This patch introduce a lock to prevent data races.

Fixes: a130e8fbc7de ("fs/proc/uptime.c: Fix idle time reporting in /proc/uptime")
Signed-off-by: Yu Liao <liaoyu15@huawei.com>
---
 kernel/time/tick-sched.c | 15 ++++++++++++++-
 kernel/time/tick-sched.h |  1 +
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b0e3c9205946..ad7d47098a58 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -36,7 +36,9 @@
 /*
  * Per-CPU nohz control structure
  */
-static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
+static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched) = {
+	.idle_time_lock = __SPIN_LOCK_UNLOCKED(tick_cpu_sched.idle_time_lock),
+};
 
 struct tick_sched *tick_get_tick_sched(int cpu)
 {
@@ -661,16 +663,24 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda
 
 static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&ts->idle_time_lock, flags);
 	update_ts_time_stats(smp_processor_id(), ts, now, NULL);
 	ts->idle_active = 0;
+	spin_unlock_irqrestore(&ts->idle_time_lock, flags);
 
 	sched_clock_idle_wakeup_event();
 }
 
 static void tick_nohz_start_idle(struct tick_sched *ts)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&ts->idle_time_lock, flags);
 	ts->idle_entrytime = ktime_get();
 	ts->idle_active = 1;
+	spin_unlock_irqrestore(&ts->idle_time_lock, flags);
 	sched_clock_idle_sleep_event();
 }
 
@@ -691,12 +701,14 @@ static void tick_nohz_start_idle(struct tick_sched *ts)
 u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
 {
 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+	unsigned long flags;
 	ktime_t now, idle;
 
 	if (!tick_nohz_active)
 		return -1;
 
 	now = ktime_get();
+	spin_lock_irqsave(&ts->idle_time_lock, flags);
 	if (last_update_time) {
 		update_ts_time_stats(cpu, ts, now, last_update_time);
 		idle = ts->idle_sleeptime;
@@ -709,6 +721,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
 			idle = ts->idle_sleeptime;
 		}
 	}
+	spin_unlock_irqrestore(&ts->idle_time_lock, flags);
 
 	return ktime_to_us(idle);
 
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 504649513399..a64d4781e7af 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -81,6 +81,7 @@ struct tick_sched {
 	atomic_t			tick_dep_mask;
 	unsigned long			last_tick_jiffies;
 	unsigned int			stalled_jiffies;
+	spinlock_t			idle_time_lock;
 };
 
 extern struct tick_sched *tick_get_tick_sched(int cpu);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2023-02-08 15:19 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-28  2:00 [PATCH RFC] tick/nohz: fix data races in get_cpu_idle_time_us() Yu Liao
2023-01-31 14:44 ` Thomas Gleixner
2023-01-31 18:35   ` Alexey Dobriyan
2023-01-31 19:59     ` Peter Zijlstra
2023-01-31 19:57   ` Peter Zijlstra
2023-01-31 21:11     ` Frederic Weisbecker
2023-02-01  9:03       ` Peter Zijlstra
2023-02-01  4:53   ` Hillf Danton
2023-02-01 12:02     ` Frederic Weisbecker
2023-02-01 14:01       ` Hillf Danton
2023-02-01 14:28         ` Frederic Weisbecker
2023-02-08 15:19   ` [PATCH] timers/nohz: Restructure and reshuffle struct tick_sched Frederic Weisbecker
2023-02-06  7:03 ` [PATCH RFC] tick/nohz: fix data races in get_cpu_idle_time_us() kernel test robot
2023-02-07  5:25 ` Mirsad Goran Todorovac
2023-02-07  8:03   ` Mirsad Goran Todorovac

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.