All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH 0/2] Lock and Pointer guards
@ 2023-05-26 15:05 Peter Zijlstra
  2023-05-26 15:05 ` [RFC][PATCH 1/2] locking: Introduce __cleanup__ based guards Peter Zijlstra
                   ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Peter Zijlstra @ 2023-05-26 15:05 UTC (permalink / raw)
  To: torvalds, keescook, gregkh, pbonzini
  Cc: linux-kernel, ojeda, ndesaulniers, peterz, mingo, will, longman,
	boqun.feng, juri.lelli, vincent.guittot, dietmar.eggemann,
	rostedt, bsegall, mgorman, bristot, vschneid, paulmck, frederic,
	quic_neeraju, joel, josh, mathieu.desnoyers, jiangshanlai,
	qiang1.zhang, rcu, tj, tglx

Hi!

Yesterday I was annoyed for the lack of RAII lock guards for entirely spurious
reason, but since you sometimes gotta do silly things I spend today creating some.

My initial (failed) attempts tried to combine __cleanup, _Generic and
__auto_type, but the compilers refused. I've also Googled around and found
(among many others) the QEMU and Glib guards. However I don't like them because
they end up relying on function pointers/indirect calls.

Hence the current pile of CPP hackery.. no indirect calls in sight.

I really like how they end up simplifying the code, but perhaps y'all hate them
with a passion?

I'm thinking we'll at least have a good brawl over naming, esp. the void_guard
needs a better name.

Compile and boot tested with x86_64-defconfig.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RFC][PATCH 1/2] locking: Introduce __cleanup__ based guards
  2023-05-26 15:05 [RFC][PATCH 0/2] Lock and Pointer guards Peter Zijlstra
@ 2023-05-26 15:05 ` Peter Zijlstra
  2023-05-26 17:05   ` Kees Cook
                     ` (2 more replies)
  2023-05-26 15:05 ` [RFC][PATCH 2/2] sched: Use fancy new guards Peter Zijlstra
  2023-05-29 11:29 ` [RFC][PATCH 0/2] Lock and Pointer guards Paolo Bonzini
  2 siblings, 3 replies; 17+ messages in thread
From: Peter Zijlstra @ 2023-05-26 15:05 UTC (permalink / raw)
  To: torvalds, keescook, gregkh, pbonzini
  Cc: linux-kernel, ojeda, ndesaulniers, peterz, mingo, will, longman,
	boqun.feng, juri.lelli, vincent.guittot, dietmar.eggemann,
	rostedt, bsegall, mgorman, bristot, vschneid, paulmck, frederic,
	quic_neeraju, joel, josh, mathieu.desnoyers, jiangshanlai,
	qiang1.zhang, rcu, tj, tglx

Use __attribute__((__cleanup__(func))) to buid various guards:

 - ptr_guard()
 - void_guard() / void_scope()
 - lock_guard() / lock_scope()
 - double_lock_guard() / double_lock_scope()

Where the _guard thingies are variables with scope-based cleanup and
the _scope thingies are basically do-once for-loops with the same.

The CPP is rather impenetrable -- but I'll attempt to write proper
comments if/when people think this is worth pursuing.

Actual usage in the next patch

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/compiler_attributes.h |    2 
 include/linux/irqflags.h            |    7 ++
 include/linux/guards.h          |  118 ++++++++++++++++++++++++++++++++++++
 include/linux/mutex.h               |    5 +
 include/linux/preempt.h             |    4 +
 include/linux/rcupdate.h            |    3 
 include/linux/sched/task.h          |    2 
 include/linux/spinlock.h            |   23 +++++++
 8 files changed, 164 insertions(+)

--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -366,4 +366,6 @@
  */
 #define __fix_address noinline __noclone
 
+#define __cleanup(func)			__attribute__((__cleanup__(func)))
+
 #endif /* __LINUX_COMPILER_ATTRIBUTES_H */
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -13,6 +13,7 @@
 #define _LINUX_TRACE_IRQFLAGS_H
 
 #include <linux/typecheck.h>
+#include <linux/guards.h>
 #include <asm/irqflags.h>
 #include <asm/percpu.h>
 
@@ -267,4 +268,10 @@ extern void warn_bogus_irq_restore(void)
 
 #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
 
+DEFINE_VOID_GUARD(irq, local_irq_disable(), local_irq_enable())
+DEFINE_VOID_GUARD(irqsave,
+		  local_irq_save(_G->flags),
+		  local_irq_restore(_G->flags),
+		  unsigned long flags;)
+
 #endif
--- /dev/null
+++ b/include/linux/guards.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_GUARDS_H
+#define __LINUX_GUARDS_H
+
+#include <linux/compiler_attributes.h>
+
+/* Pointer Guard */
+
+#define DEFINE_PTR_GUARD(_type, _Type, _Put)				\
+typedef _Type *ptr_guard_##_type##_t;					\
+static inline void ptr_guard_##_type##_cleanup(_Type **_ptr)		\
+{									\
+	_Type *_G = *_ptr;						\
+	if (_G)								\
+		_Put(_G);						\
+}
+
+#define ptr_guard(_type, _name)						\
+	ptr_guard_##_type##_t _name __cleanup(ptr_guard_##_type##_cleanup)
+
+
+/* Void Guard */
+
+#define DEFINE_VOID_GUARD(_type, _Lock, _Unlock, ...)				\
+typedef struct {								\
+	__VA_ARGS__								\
+} void_guard_##_type##_t;							\
+										\
+static inline void void_guard_##_type##_cleanup(void *_g)			\
+{										\
+	void_guard_##_type##_t *_G __maybe_unused = _g;				\
+	_Unlock;								\
+}										\
+										\
+static inline void_guard_##_type##_t void_guard_##_type##_init(void)		\
+{										\
+	void_guard_##_type##_t _g = { }, *_G __maybe_unused = &_g;		\
+	_Lock;									\
+	return _g;								\
+}
+
+#define void_guard(_type, _name)						\
+	void_guard_##_type##_t _name __cleanup(void_guard_##_type##_cleanup) =	\
+	void_guard_##_type##_init()
+
+#define void_scope(_type)							\
+	for (struct { void_guard_##_type##_t guard; bool done; } _scope		\
+	     __cleanup(void_guard_##_type##_cleanup) =				\
+	     { .guard = void_guard_##_type##_init() }; !_scope.done;		\
+	     _scope.done = true)
+
+
+/* Lock Guard */
+
+#define DEFINE_LOCK_GUARD(_type, _Type, _Lock, _Unlock, ...)			\
+typedef struct {								\
+	_Type *lock;								\
+	__VA_ARGS__								\
+} lock_guard_##_type##_t;							\
+										\
+static inline void lock_guard_##_type##_cleanup(void *_g)			\
+{										\
+	lock_guard_##_type##_t *_G = _g;					\
+	_Unlock;								\
+}										\
+										\
+static inline lock_guard_##_type##_t lock_guard_##_type##_init(_Type *lock)	\
+{										\
+	lock_guard_##_type##_t _g = { .lock = lock }, *_G = &_g;		\
+	_Lock;									\
+	return _g;								\
+}
+
+#define lock_guard(_type, _name, _ptr)						\
+	lock_guard_##_type##_t _name __cleanup(lock_guard_##_type##_cleanup) =	\
+	lock_guard_##_type##_init(_ptr)
+
+#define lock_scope(_type, _ptr)							\
+	for (struct { lock_guard_##_type##_t guard; bool done; } _scope		\
+	     __cleanup(lock_guard_##_type##_cleanup) =				\
+	     { .guard = lock_guard_##_type##_init(_ptr) }; !_scope.done;	\
+	     _scope.done = true)
+
+
+/* Double Lock Guard */
+
+#define DEFINE_DOUBLE_LOCK_GUARD(_type, _Type, _Lock, _Unlock, ...)		\
+typedef struct {								\
+	_Type *lock;								\
+	_Type *lock2;								\
+	__VA_ARGS__								\
+} double_lock_guard_##_type##_t;						\
+										\
+static inline void double_lock_guard_##_type##_cleanup(void *_g) 		\
+{										\
+	double_lock_guard_##_type##_t *_G = _g;					\
+	_Unlock;								\
+}										\
+										\
+static inline double_lock_guard_##_type##_t double_lock_guard_##_type##_init(_Type *lock, _Type *lock2) \
+{										\
+	double_lock_guard_##_type##_t _g = { .lock = lock, .lock2 = lock2 }, *_G = &_g;\
+	_Lock;									\
+	return _g;								\
+}
+
+#define double_lock_guard(_type, _name, _ptr, _ptr2)				\
+	double_lock_guard_##_type##_t _name __cleanup(double_lock_guard_##_type##_cleanup) = \
+	double_lock_guard_##_type##_init(_ptr, _ptr2)
+
+#define double_lock_scope(_type, _ptr, _ptr2)					\
+	for (struct { double_lock_guard_##_type##_t guard; bool done; } _scope	\
+	     __cleanup(double_lock_guard_##_type##_cleanup) =			\
+	     { .guard = double_lock_guard_##_type##_init(_ptr, _ptr2) };	\
+	     !_scope.done; _scope.done = true)
+
+
+#endif /* __LINUX_GUARDS_H */
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -19,6 +19,7 @@
 #include <asm/processor.h>
 #include <linux/osq_lock.h>
 #include <linux/debug_locks.h>
+#include <linux/guards.h>
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define __DEP_MAP_MUTEX_INITIALIZER(lockname)			\
@@ -219,4 +220,8 @@ extern void mutex_unlock(struct mutex *l
 
 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
+DEFINE_LOCK_GUARD(mutex, struct mutex,
+		  mutex_lock(_G->lock),
+		  mutex_unlock(_G->lock))
+
 #endif /* __LINUX_MUTEX_H */
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -8,6 +8,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/guards.h>
 #include <linux/list.h>
 
 /*
@@ -463,4 +464,7 @@ static __always_inline void preempt_enab
 		preempt_enable();
 }
 
+DEFINE_VOID_GUARD(preempt, preempt_disable(), preempt_enable())
+DEFINE_VOID_GUARD(migrate, migrate_disable(), migrate_enable())
+
 #endif /* __LINUX_PREEMPT_H */
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -27,6 +27,7 @@
 #include <linux/preempt.h>
 #include <linux/bottom_half.h>
 #include <linux/lockdep.h>
+#include <linux/guards.h>
 #include <asm/processor.h>
 #include <linux/cpumask.h>
 #include <linux/context_tracking_irq.h>
@@ -1095,4 +1096,6 @@ rcu_head_after_call_rcu(struct rcu_head
 extern int rcu_expedited;
 extern int rcu_normal;
 
+DEFINE_VOID_GUARD(rcu, rcu_read_lock(), rcu_read_unlock())
+
 #endif /* __LINUX_RCUPDATE_H */
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -126,6 +126,8 @@ static inline void put_task_struct(struc
 		__put_task_struct(t);
 }
 
+DEFINE_PTR_GUARD(put_task, struct task_struct, put_task_struct)
+
 static inline void put_task_struct_many(struct task_struct *t, int nr)
 {
 	if (refcount_sub_and_test(nr, &t->usage))
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -61,6 +61,7 @@
 #include <linux/stringify.h>
 #include <linux/bottom_half.h>
 #include <linux/lockdep.h>
+#include <linux/guards.h>
 #include <asm/barrier.h>
 #include <asm/mmiowb.h>
 
@@ -502,5 +503,27 @@ int __alloc_bucket_spinlocks(spinlock_t
 
 void free_bucket_spinlocks(spinlock_t *locks);
 
+DEFINE_LOCK_GUARD(raw, raw_spinlock_t,
+		  raw_spin_lock(_G->lock),
+		  raw_spin_unlock(_G->lock))
+DEFINE_LOCK_GUARD(raw_irq, raw_spinlock_t,
+		  raw_spin_lock_irq(_G->lock),
+		  raw_spin_unlock_irq(_G->lock))
+DEFINE_LOCK_GUARD(raw_irqsave, raw_spinlock_t,
+		  raw_spin_lock_irqsave(_G->lock, _G->flags),
+		  raw_spin_unlock_irqrestore(_G->lock, _G->flags),
+		  unsigned long flags;)
+
+DEFINE_LOCK_GUARD(spin, spinlock_t,
+		  spin_lock(_G->lock),
+		  spin_unlock(_G->lock))
+DEFINE_LOCK_GUARD(spin_irq, spinlock_t,
+		  spin_lock_irq(_G->lock),
+		  spin_unlock_irq(_G->lock))
+DEFINE_LOCK_GUARD(spin_irqsave, spinlock_t,
+		  spin_lock_irqsave(_G->lock, _G->flags),
+		  spin_unlock_irqrestore(_G->lock, _G->flags),
+		  unsigned long flags;)
+
 #undef __LINUX_INSIDE_SPINLOCK_H
 #endif /* __LINUX_SPINLOCK_H */



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RFC][PATCH 2/2] sched: Use fancy new guards
  2023-05-26 15:05 [RFC][PATCH 0/2] Lock and Pointer guards Peter Zijlstra
  2023-05-26 15:05 ` [RFC][PATCH 1/2] locking: Introduce __cleanup__ based guards Peter Zijlstra
@ 2023-05-26 15:05 ` Peter Zijlstra
  2023-05-26 16:25   ` Greg KH
  2023-05-29 11:29 ` [RFC][PATCH 0/2] Lock and Pointer guards Paolo Bonzini
  2 siblings, 1 reply; 17+ messages in thread
From: Peter Zijlstra @ 2023-05-26 15:05 UTC (permalink / raw)
  To: torvalds, keescook, gregkh, pbonzini
  Cc: linux-kernel, ojeda, ndesaulniers, peterz, mingo, will, longman,
	boqun.feng, juri.lelli, vincent.guittot, dietmar.eggemann,
	rostedt, bsegall, mgorman, bristot, vschneid, paulmck, frederic,
	quic_neeraju, joel, josh, mathieu.desnoyers, jiangshanlai,
	qiang1.zhang, rcu, tj, tglx

Convert kernel/sched/core.c to use the fancy new guards to simplify
the error paths.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/sched/core.c  | 1223 +++++++++++++++++++++++----------------------------
 kernel/sched/sched.h |   39 +
 2 files changed, 595 insertions(+), 667 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1097,24 +1097,21 @@ int get_nohz_timer_target(void)
 
 	hk_mask = housekeeping_cpumask(HK_TYPE_TIMER);
 
-	rcu_read_lock();
-	for_each_domain(cpu, sd) {
-		for_each_cpu_and(i, sched_domain_span(sd), hk_mask) {
-			if (cpu == i)
-				continue;
+	void_scope(rcu) {
+		for_each_domain(cpu, sd) {
+			for_each_cpu_and(i, sched_domain_span(sd), hk_mask) {
+				if (cpu == i)
+					continue;
 
-			if (!idle_cpu(i)) {
-				cpu = i;
-				goto unlock;
+				if (!idle_cpu(i))
+					return i;
 			}
 		}
-	}
 
-	if (default_cpu == -1)
-		default_cpu = housekeeping_any_cpu(HK_TYPE_TIMER);
-	cpu = default_cpu;
-unlock:
-	rcu_read_unlock();
+		if (default_cpu == -1)
+			default_cpu = housekeeping_any_cpu(HK_TYPE_TIMER);
+		cpu = default_cpu;
+	}
 	return cpu;
 }
 
@@ -1457,16 +1454,12 @@ static void __uclamp_update_util_min_rt_
 
 static void uclamp_update_util_min_rt_default(struct task_struct *p)
 {
-	struct rq_flags rf;
-	struct rq *rq;
-
 	if (!rt_task(p))
 		return;
 
 	/* Protect updates to p->uclamp_* */
-	rq = task_rq_lock(p, &rf);
-	__uclamp_update_util_min_rt_default(p);
-	task_rq_unlock(rq, p, &rf);
+	lock_scope(task_rq, p)
+		__uclamp_update_util_min_rt_default(p);
 }
 
 static inline struct uclamp_se
@@ -1762,9 +1755,8 @@ static void uclamp_update_root_tg(void)
 	uclamp_se_set(&tg->uclamp_req[UCLAMP_MAX],
 		      sysctl_sched_uclamp_util_max, false);
 
-	rcu_read_lock();
-	cpu_util_update_eff(&root_task_group.css);
-	rcu_read_unlock();
+	void_scope(rcu)
+		cpu_util_update_eff(&root_task_group.css);
 }
 #else
 static void uclamp_update_root_tg(void) { }
@@ -1791,10 +1783,10 @@ static void uclamp_sync_util_min_rt_defa
 	smp_mb__after_spinlock();
 	read_unlock(&tasklist_lock);
 
-	rcu_read_lock();
-	for_each_process_thread(g, p)
-		uclamp_update_util_min_rt_default(p);
-	rcu_read_unlock();
+	void_scope(rcu) {
+		for_each_process_thread(g, p)
+			uclamp_update_util_min_rt_default(p);
+	}
 }
 
 static int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
@@ -1804,7 +1796,8 @@ static int sysctl_sched_uclamp_handler(s
 	int old_min, old_max, old_min_rt;
 	int result;
 
-	mutex_lock(&uclamp_mutex);
+	lock_guard(mutex, guard, &uclamp_mutex);
+
 	old_min = sysctl_sched_uclamp_util_min;
 	old_max = sysctl_sched_uclamp_util_max;
 	old_min_rt = sysctl_sched_uclamp_util_min_rt_default;
@@ -1813,7 +1806,7 @@ static int sysctl_sched_uclamp_handler(s
 	if (result)
 		goto undo;
 	if (!write)
-		goto done;
+		return result;
 
 	if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max ||
 	    sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE	||
@@ -1849,16 +1842,12 @@ static int sysctl_sched_uclamp_handler(s
 	 * Otherwise, keep it simple and do just a lazy update at each next
 	 * task enqueue time.
 	 */
-
-	goto done;
+	return result;
 
 undo:
 	sysctl_sched_uclamp_util_min = old_min;
 	sysctl_sched_uclamp_util_max = old_max;
 	sysctl_sched_uclamp_util_min_rt_default = old_min_rt;
-done:
-	mutex_unlock(&uclamp_mutex);
-
 	return result;
 }
 #endif
@@ -2249,10 +2238,10 @@ void migrate_disable(void)
 		return;
 	}
 
-	preempt_disable();
-	this_rq()->nr_pinned++;
-	p->migration_disabled = 1;
-	preempt_enable();
+	void_scope(preempt) {
+		this_rq()->nr_pinned++;
+		p->migration_disabled = 1;
+	}
 }
 EXPORT_SYMBOL_GPL(migrate_disable);
 
@@ -2276,18 +2265,18 @@ void migrate_enable(void)
 	 * Ensure stop_task runs either before or after this, and that
 	 * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
 	 */
-	preempt_disable();
-	if (p->cpus_ptr != &p->cpus_mask)
-		__set_cpus_allowed_ptr(p, &ac);
-	/*
-	 * Mustn't clear migration_disabled() until cpus_ptr points back at the
-	 * regular cpus_mask, otherwise things that race (eg.
-	 * select_fallback_rq) get confused.
-	 */
-	barrier();
-	p->migration_disabled = 0;
-	this_rq()->nr_pinned--;
-	preempt_enable();
+	void_scope(preempt) {
+		if (p->cpus_ptr != &p->cpus_mask)
+			__set_cpus_allowed_ptr(p, &ac);
+		/*
+		 * Mustn't clear migration_disabled() until cpus_ptr points back at the
+		 * regular cpus_mask, otherwise things that race (eg.
+		 * select_fallback_rq) get confused.
+		 */
+		barrier();
+		p->migration_disabled = 0;
+		this_rq()->nr_pinned--;
+	}
 }
 EXPORT_SYMBOL_GPL(migrate_enable);
 
@@ -3272,31 +3261,28 @@ static int migrate_swap_stop(void *data)
 	src_rq = cpu_rq(arg->src_cpu);
 	dst_rq = cpu_rq(arg->dst_cpu);
 
-	double_raw_lock(&arg->src_task->pi_lock,
-			&arg->dst_task->pi_lock);
-	double_rq_lock(src_rq, dst_rq);
-
-	if (task_cpu(arg->dst_task) != arg->dst_cpu)
-		goto unlock;
+	double_lock_scope(raw,
+			  &arg->src_task->pi_lock,
+			  &arg->dst_task->pi_lock) {
+		double_lock_guard(rq, guard, src_rq, dst_rq);
 
-	if (task_cpu(arg->src_task) != arg->src_cpu)
-		goto unlock;
+		if (task_cpu(arg->dst_task) != arg->dst_cpu)
+			break;
 
-	if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr))
-		goto unlock;
+		if (task_cpu(arg->src_task) != arg->src_cpu)
+			break;
 
-	if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr))
-		goto unlock;
+		if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr))
+			break;
 
-	__migrate_swap_task(arg->src_task, arg->dst_cpu);
-	__migrate_swap_task(arg->dst_task, arg->src_cpu);
+		if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr))
+			break;
 
-	ret = 0;
+		__migrate_swap_task(arg->src_task, arg->dst_cpu);
+		__migrate_swap_task(arg->dst_task, arg->src_cpu);
 
-unlock:
-	double_rq_unlock(src_rq, dst_rq);
-	raw_spin_unlock(&arg->dst_task->pi_lock);
-	raw_spin_unlock(&arg->src_task->pi_lock);
+		ret = 0;
+	}
 
 	return ret;
 }
@@ -3464,13 +3450,12 @@ unsigned long wait_task_inactive(struct
  */
 void kick_process(struct task_struct *p)
 {
+	void_guard(preempt, guard);
 	int cpu;
 
-	preempt_disable();
 	cpu = task_cpu(p);
 	if ((cpu != smp_processor_id()) && task_curr(p))
 		smp_send_reschedule(cpu);
-	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(kick_process);
 
@@ -3678,17 +3663,16 @@ ttwu_stat(struct task_struct *p, int cpu
 		__schedstat_inc(rq->ttwu_local);
 		__schedstat_inc(p->stats.nr_wakeups_local);
 	} else {
+		void_guard(rcu, guard);
 		struct sched_domain *sd;
 
 		__schedstat_inc(p->stats.nr_wakeups_remote);
-		rcu_read_lock();
 		for_each_domain(rq->cpu, sd) {
 			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 				__schedstat_inc(sd->ttwu_wake_remote);
 				break;
 			}
 		}
-		rcu_read_unlock();
 	}
 
 	if (wake_flags & WF_MIGRATED)
@@ -3887,21 +3871,13 @@ static void __ttwu_queue_wakelist(struct
 void wake_up_if_idle(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
-	struct rq_flags rf;
-
-	rcu_read_lock();
 
-	if (!is_idle_task(rcu_dereference(rq->curr)))
-		goto out;
-
-	rq_lock_irqsave(rq, &rf);
-	if (is_idle_task(rq->curr))
-		resched_curr(rq);
-	/* Else CPU is not idle, do nothing here: */
-	rq_unlock_irqrestore(rq, &rf);
-
-out:
-	rcu_read_unlock();
+	void_guard(rcu, rg);
+	if (is_idle_task(rcu_dereference(rq->curr))) {
+		lock_guard(rq, rq_guard, rq);
+		if (is_idle_task(rq->curr))
+			resched_curr(rq);
+	}
 }
 
 bool cpus_share_cache(int this_cpu, int that_cpu)
@@ -4158,10 +4134,9 @@ bool ttwu_state_match(struct task_struct
 static int
 try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 {
-	unsigned long flags;
+	void_guard(preempt, guard);
 	int cpu, success = 0;
 
-	preempt_disable();
 	if (p == current) {
 		/*
 		 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
@@ -4188,129 +4163,127 @@ try_to_wake_up(struct task_struct *p, un
 	 * reordered with p->state check below. This pairs with smp_store_mb()
 	 * in set_current_state() that the waiting thread does.
 	 */
-	raw_spin_lock_irqsave(&p->pi_lock, flags);
-	smp_mb__after_spinlock();
-	if (!ttwu_state_match(p, state, &success))
-		goto unlock;
+	lock_scope(raw_irqsave, &p->pi_lock) {
+		smp_mb__after_spinlock();
+		if (!ttwu_state_match(p, state, &success))
+			break;
 
-	trace_sched_waking(p);
+		trace_sched_waking(p);
 
-	/*
-	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
-	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
-	 * in smp_cond_load_acquire() below.
-	 *
-	 * sched_ttwu_pending()			try_to_wake_up()
-	 *   STORE p->on_rq = 1			  LOAD p->state
-	 *   UNLOCK rq->lock
-	 *
-	 * __schedule() (switch to task 'p')
-	 *   LOCK rq->lock			  smp_rmb();
-	 *   smp_mb__after_spinlock();
-	 *   UNLOCK rq->lock
-	 *
-	 * [task p]
-	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
-	 *
-	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-	 * __schedule().  See the comment for smp_mb__after_spinlock().
-	 *
-	 * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
-	 */
-	smp_rmb();
-	if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
-		goto unlock;
+		/*
+		 * Ensure we load p->on_rq _after_ p->state, otherwise it would
+		 * be possible to, falsely, observe p->on_rq == 0 and get stuck
+		 * in smp_cond_load_acquire() below.
+		 *
+		 * sched_ttwu_pending()			try_to_wake_up()
+		 *   STORE p->on_rq = 1			  LOAD p->state
+		 *   UNLOCK rq->lock
+		 *
+		 * __schedule() (switch to task 'p')
+		 *   LOCK rq->lock			  smp_rmb();
+		 *   smp_mb__after_spinlock();
+		 *   UNLOCK rq->lock
+		 *
+		 * [task p]
+		 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
+		 *
+		 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
+		 * __schedule().  See the comment for smp_mb__after_spinlock().
+		 *
+		 * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
+		 */
+		smp_rmb();
+		if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
+			break;
 
 #ifdef CONFIG_SMP
-	/*
-	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-	 * possible to, falsely, observe p->on_cpu == 0.
-	 *
-	 * One must be running (->on_cpu == 1) in order to remove oneself
-	 * from the runqueue.
-	 *
-	 * __schedule() (switch to task 'p')	try_to_wake_up()
-	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
-	 *   UNLOCK rq->lock
-	 *
-	 * __schedule() (put 'p' to sleep)
-	 *   LOCK rq->lock			  smp_rmb();
-	 *   smp_mb__after_spinlock();
-	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
-	 *
-	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-	 * __schedule().  See the comment for smp_mb__after_spinlock().
-	 *
-	 * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
-	 * schedule()'s deactivate_task() has 'happened' and p will no longer
-	 * care about it's own p->state. See the comment in __schedule().
-	 */
-	smp_acquire__after_ctrl_dep();
+		/*
+		 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
+		 * possible to, falsely, observe p->on_cpu == 0.
+		 *
+		 * One must be running (->on_cpu == 1) in order to remove oneself
+		 * from the runqueue.
+		 *
+		 * __schedule() (switch to task 'p')	try_to_wake_up()
+		 *   STORE p->on_cpu = 1		  LOAD p->on_rq
+		 *   UNLOCK rq->lock
+		 *
+		 * __schedule() (put 'p' to sleep)
+		 *   LOCK rq->lock			  smp_rmb();
+		 *   smp_mb__after_spinlock();
+		 *   STORE p->on_rq = 0			  LOAD p->on_cpu
+		 *
+		 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
+		 * __schedule().  See the comment for smp_mb__after_spinlock().
+		 *
+		 * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
+		 * schedule()'s deactivate_task() has 'happened' and p will no longer
+		 * care about it's own p->state. See the comment in __schedule().
+		 */
+		smp_acquire__after_ctrl_dep();
 
-	/*
-	 * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
-	 * == 0), which means we need to do an enqueue, change p->state to
-	 * TASK_WAKING such that we can unlock p->pi_lock before doing the
-	 * enqueue, such as ttwu_queue_wakelist().
-	 */
-	WRITE_ONCE(p->__state, TASK_WAKING);
+		/*
+		 * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
+		 * == 0), which means we need to do an enqueue, change p->state to
+		 * TASK_WAKING such that we can unlock p->pi_lock before doing the
+		 * enqueue, such as ttwu_queue_wakelist().
+		 */
+		WRITE_ONCE(p->__state, TASK_WAKING);
 
-	/*
-	 * If the owning (remote) CPU is still in the middle of schedule() with
-	 * this task as prev, considering queueing p on the remote CPUs wake_list
-	 * which potentially sends an IPI instead of spinning on p->on_cpu to
-	 * let the waker make forward progress. This is safe because IRQs are
-	 * disabled and the IPI will deliver after on_cpu is cleared.
-	 *
-	 * Ensure we load task_cpu(p) after p->on_cpu:
-	 *
-	 * set_task_cpu(p, cpu);
-	 *   STORE p->cpu = @cpu
-	 * __schedule() (switch to task 'p')
-	 *   LOCK rq->lock
-	 *   smp_mb__after_spin_lock()		smp_cond_load_acquire(&p->on_cpu)
-	 *   STORE p->on_cpu = 1		LOAD p->cpu
-	 *
-	 * to ensure we observe the correct CPU on which the task is currently
-	 * scheduling.
-	 */
-	if (smp_load_acquire(&p->on_cpu) &&
-	    ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
-		goto unlock;
+		/*
+		 * If the owning (remote) CPU is still in the middle of schedule() with
+		 * this task as prev, considering queueing p on the remote CPUs wake_list
+		 * which potentially sends an IPI instead of spinning on p->on_cpu to
+		 * let the waker make forward progress. This is safe because IRQs are
+		 * disabled and the IPI will deliver after on_cpu is cleared.
+		 *
+		 * Ensure we load task_cpu(p) after p->on_cpu:
+		 *
+		 * set_task_cpu(p, cpu);
+		 *   STORE p->cpu = @cpu
+		 * __schedule() (switch to task 'p')
+		 *   LOCK rq->lock
+		 *   smp_mb__after_spin_lock()		smp_cond_load_acquire(&p->on_cpu)
+		 *   STORE p->on_cpu = 1		LOAD p->cpu
+		 *
+		 * to ensure we observe the correct CPU on which the task is currently
+		 * scheduling.
+		 */
+		if (smp_load_acquire(&p->on_cpu) &&
+		    ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
+			break;
 
-	/*
-	 * If the owning (remote) CPU is still in the middle of schedule() with
-	 * this task as prev, wait until it's done referencing the task.
-	 *
-	 * Pairs with the smp_store_release() in finish_task().
-	 *
-	 * This ensures that tasks getting woken will be fully ordered against
-	 * their previous state and preserve Program Order.
-	 */
-	smp_cond_load_acquire(&p->on_cpu, !VAL);
+		/*
+		 * If the owning (remote) CPU is still in the middle of schedule() with
+		 * this task as prev, wait until it's done referencing the task.
+		 *
+		 * Pairs with the smp_store_release() in finish_task().
+		 *
+		 * This ensures that tasks getting woken will be fully ordered against
+		 * their previous state and preserve Program Order.
+		 */
+		smp_cond_load_acquire(&p->on_cpu, !VAL);
 
-	cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
-	if (task_cpu(p) != cpu) {
-		if (p->in_iowait) {
-			delayacct_blkio_end(p);
-			atomic_dec(&task_rq(p)->nr_iowait);
-		}
+		cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
+		if (task_cpu(p) != cpu) {
+			if (p->in_iowait) {
+				delayacct_blkio_end(p);
+				atomic_dec(&task_rq(p)->nr_iowait);
+			}
 
-		wake_flags |= WF_MIGRATED;
-		psi_ttwu_dequeue(p);
-		set_task_cpu(p, cpu);
-	}
+			wake_flags |= WF_MIGRATED;
+			psi_ttwu_dequeue(p);
+			set_task_cpu(p, cpu);
+		}
 #else
-	cpu = task_cpu(p);
+		cpu = task_cpu(p);
 #endif /* CONFIG_SMP */
 
-	ttwu_queue(p, cpu, wake_flags);
-unlock:
-	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+		ttwu_queue(p, cpu, wake_flags);
+	}
 out:
 	if (success)
 		ttwu_stat(p, task_cpu(p), wake_flags);
-	preempt_enable();
 
 	return success;
 }
@@ -5458,23 +5431,20 @@ unsigned int nr_iowait(void)
 void sched_exec(void)
 {
 	struct task_struct *p = current;
-	unsigned long flags;
+	struct migration_arg arg;
 	int dest_cpu;
 
-	raw_spin_lock_irqsave(&p->pi_lock, flags);
-	dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), WF_EXEC);
-	if (dest_cpu == smp_processor_id())
-		goto unlock;
+	lock_scope(raw_irqsave, &p->pi_lock) {
+		dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), WF_EXEC);
+		if (dest_cpu == smp_processor_id())
+			return;
 
-	if (likely(cpu_active(dest_cpu))) {
-		struct migration_arg arg = { p, dest_cpu };
+		if (unlikely(!cpu_active(dest_cpu)))
+			return;
 
-		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-		stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
-		return;
+		arg = (struct migration_arg){ p, dest_cpu };
 	}
-unlock:
-	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+	stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
 }
 
 #endif
@@ -5682,7 +5652,6 @@ static void sched_tick_remote(struct wor
 	int cpu = twork->cpu;
 	struct rq *rq = cpu_rq(cpu);
 	struct task_struct *curr;
-	struct rq_flags rf;
 	u64 delta;
 	int os;
 
@@ -5693,30 +5662,27 @@ static void sched_tick_remote(struct wor
 	 * statistics and checks timeslices in a time-independent way, regardless
 	 * of when exactly it is running.
 	 */
-	if (!tick_nohz_tick_stopped_cpu(cpu))
-		goto out_requeue;
+	if (tick_nohz_tick_stopped_cpu(cpu)) {
+		lock_guard(rq_irq, rq_guard, rq);
 
-	rq_lock_irq(rq, &rf);
-	curr = rq->curr;
-	if (cpu_is_offline(cpu))
-		goto out_unlock;
+		curr = rq->curr;
+		if (cpu_is_offline(cpu))
+			break;
 
-	update_rq_clock(rq);
+		update_rq_clock(rq);
 
-	if (!is_idle_task(curr)) {
-		/*
-		 * Make sure the next tick runs within a reasonable
-		 * amount of time.
-		 */
-		delta = rq_clock_task(rq) - curr->se.exec_start;
-		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
-	}
-	curr->sched_class->task_tick(rq, curr, 0);
+		if (!is_idle_task(curr)) {
+			/*
+			 * Make sure the next tick runs within a reasonable
+			 * amount of time.
+			 */
+			delta = rq_clock_task(rq) - curr->se.exec_start;
+			WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
+		}
+		curr->sched_class->task_tick(rq, curr, 0);
 
-	calc_load_nohz_remote(rq);
-out_unlock:
-	rq_unlock_irq(rq, &rf);
-out_requeue:
+		calc_load_nohz_remote(rq);
+	}
 
 	/*
 	 * Run the remote tick once per second (1Hz). This arbitrary
@@ -6265,53 +6231,51 @@ static bool try_steal_cookie(int this, i
 	unsigned long cookie;
 	bool success = false;
 
-	local_irq_disable();
-	double_rq_lock(dst, src);
+	void_scope(irq) {
+		double_lock_guard(rq, guard, dst, src);
 
-	cookie = dst->core->core_cookie;
-	if (!cookie)
-		goto unlock;
+		cookie = dst->core->core_cookie;
+		if (!cookie)
+			break;
 
-	if (dst->curr != dst->idle)
-		goto unlock;
+		if (dst->curr != dst->idle)
+			break;
 
-	p = sched_core_find(src, cookie);
-	if (!p)
-		goto unlock;
+		p = sched_core_find(src, cookie);
+		if (!p)
+			break;
 
-	do {
-		if (p == src->core_pick || p == src->curr)
-			goto next;
+		do {
+			if (p == src->core_pick || p == src->curr)
+				goto next;
 
-		if (!is_cpu_allowed(p, this))
-			goto next;
+			if (!is_cpu_allowed(p, this))
+				goto next;
 
-		if (p->core_occupation > dst->idle->core_occupation)
-			goto next;
-		/*
-		 * sched_core_find() and sched_core_next() will ensure that task @p
-		 * is not throttled now, we also need to check whether the runqueue
-		 * of the destination CPU is being throttled.
-		 */
-		if (sched_task_is_throttled(p, this))
-			goto next;
+			if (p->core_occupation > dst->idle->core_occupation)
+				goto next;
+			/*
+			 * sched_core_find() and sched_core_next() will ensure
+			 * that task @p is not throttled now, we also need to
+			 * check whether the runqueue of the destination CPU is
+			 * being throttled.
+			 */
+			if (sched_task_is_throttled(p, this))
+				goto next;
 
-		deactivate_task(src, p, 0);
-		set_task_cpu(p, this);
-		activate_task(dst, p, 0);
+			deactivate_task(src, p, 0);
+			set_task_cpu(p, this);
+			activate_task(dst, p, 0);
 
-		resched_curr(dst);
+			resched_curr(dst);
 
-		success = true;
-		break;
+			success = true;
+			break;
 
 next:
-		p = sched_core_next(p, cookie);
-	} while (p);
-
-unlock:
-	double_rq_unlock(dst, src);
-	local_irq_enable();
+			p = sched_core_next(p, cookie);
+		} while (p);
+	}
 
 	return success;
 }
@@ -6339,8 +6303,9 @@ static void sched_core_balance(struct rq
 	struct sched_domain *sd;
 	int cpu = cpu_of(rq);
 
-	preempt_disable();
-	rcu_read_lock();
+	void_guard(preempt, pg);
+	void_guard(rcu, rg);
+
 	raw_spin_rq_unlock_irq(rq);
 	for_each_domain(cpu, sd) {
 		if (need_resched())
@@ -6350,8 +6315,6 @@ static void sched_core_balance(struct rq
 			break;
 	}
 	raw_spin_rq_lock_irq(rq);
-	rcu_read_unlock();
-	preempt_enable();
 }
 
 static DEFINE_PER_CPU(struct balance_callback, core_balance_head);
@@ -6370,20 +6333,24 @@ static void queue_core_balance(struct rq
 	queue_balance_callback(rq, &per_cpu(core_balance_head, rq->cpu), sched_core_balance);
 }
 
+DEFINE_LOCK_GUARD(core, int,
+		  sched_core_lock(*_G->lock, &_G->flags),
+		  sched_core_unlock(*_G->lock, &_G->flags),
+		  unsigned long flags;)
+
 static void sched_core_cpu_starting(unsigned int cpu)
 {
 	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
 	struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
-	unsigned long flags;
 	int t;
 
-	sched_core_lock(cpu, &flags);
+	lock_guard(core, guard, &cpu);
 
 	WARN_ON_ONCE(rq->core != rq);
 
 	/* if we're the first, we'll be our own leader */
 	if (cpumask_weight(smt_mask) == 1)
-		goto unlock;
+		return;
 
 	/* find the leader */
 	for_each_cpu(t, smt_mask) {
@@ -6397,7 +6364,7 @@ static void sched_core_cpu_starting(unsi
 	}
 
 	if (WARN_ON_ONCE(!core_rq)) /* whoopsie */
-		goto unlock;
+		return;
 
 	/* install and validate core_rq */
 	for_each_cpu(t, smt_mask) {
@@ -6408,29 +6375,25 @@ static void sched_core_cpu_starting(unsi
 
 		WARN_ON_ONCE(rq->core != core_rq);
 	}
-
-unlock:
-	sched_core_unlock(cpu, &flags);
 }
 
 static void sched_core_cpu_deactivate(unsigned int cpu)
 {
 	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
 	struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
-	unsigned long flags;
 	int t;
 
-	sched_core_lock(cpu, &flags);
+	lock_guard(core, guard, &cpu);
 
 	/* if we're the last man standing, nothing to do */
 	if (cpumask_weight(smt_mask) == 1) {
 		WARN_ON_ONCE(rq->core != rq);
-		goto unlock;
+		return;
 	}
 
 	/* if we're not the leader, nothing to do */
 	if (rq->core != rq)
-		goto unlock;
+		return;
 
 	/* find a new leader */
 	for_each_cpu(t, smt_mask) {
@@ -6441,7 +6404,7 @@ static void sched_core_cpu_deactivate(un
 	}
 
 	if (WARN_ON_ONCE(!core_rq)) /* impossible */
-		goto unlock;
+		return;
 
 	/* copy the shared state to the new leader */
 	core_rq->core_task_seq             = rq->core_task_seq;
@@ -6463,9 +6426,6 @@ static void sched_core_cpu_deactivate(un
 		rq = cpu_rq(t);
 		rq->core = core_rq;
 	}
-
-unlock:
-	sched_core_unlock(cpu, &flags);
 }
 
 static inline void sched_core_cpu_dying(unsigned int cpu)
@@ -7162,8 +7122,6 @@ void set_user_nice(struct task_struct *p
 {
 	bool queued, running;
 	int old_prio;
-	struct rq_flags rf;
-	struct rq *rq;
 
 	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
 		return;
@@ -7171,44 +7129,45 @@ void set_user_nice(struct task_struct *p
 	 * We have to be careful, if called from sys_setpriority(),
 	 * the task might be in the middle of scheduling on another CPU.
 	 */
-	rq = task_rq_lock(p, &rf);
-	update_rq_clock(rq);
+	lock_scope(task_rq, p) {
+		struct rq *rq = _scope.guard.rq;
 
-	/*
-	 * The RT priorities are set via sched_setscheduler(), but we still
-	 * allow the 'normal' nice value to be set - but as expected
-	 * it won't have any effect on scheduling until the task is
-	 * SCHED_DEADLINE, SCHED_FIFO or SCHED_RR:
-	 */
-	if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
-		p->static_prio = NICE_TO_PRIO(nice);
-		goto out_unlock;
-	}
-	queued = task_on_rq_queued(p);
-	running = task_current(rq, p);
-	if (queued)
-		dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
-	if (running)
-		put_prev_task(rq, p);
+		update_rq_clock(rq);
 
-	p->static_prio = NICE_TO_PRIO(nice);
-	set_load_weight(p, true);
-	old_prio = p->prio;
-	p->prio = effective_prio(p);
+		/*
+		 * The RT priorities are set via sched_setscheduler(), but we still
+		 * allow the 'normal' nice value to be set - but as expected
+		 * it won't have any effect on scheduling until the task is
+		 * SCHED_DEADLINE, SCHED_FIFO or SCHED_RR:
+		 */
+		if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
+			p->static_prio = NICE_TO_PRIO(nice);
+			return;
+		}
 
-	if (queued)
-		enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
-	if (running)
-		set_next_task(rq, p);
+		queued = task_on_rq_queued(p);
+		running = task_current(rq, p);
+		if (queued)
+			dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
+		if (running)
+			put_prev_task(rq, p);
 
-	/*
-	 * If the task increased its priority or is running and
-	 * lowered its priority, then reschedule its CPU:
-	 */
-	p->sched_class->prio_changed(rq, p, old_prio);
+		p->static_prio = NICE_TO_PRIO(nice);
+		set_load_weight(p, true);
+		old_prio = p->prio;
+		p->prio = effective_prio(p);
 
-out_unlock:
-	task_rq_unlock(rq, p, &rf);
+		if (queued)
+			enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
+		if (running)
+			set_next_task(rq, p);
+
+		/*
+		 * If the task increased its priority or is running and
+		 * lowered its priority, then reschedule its CPU:
+		 */
+		p->sched_class->prio_changed(rq, p, old_prio);
+	}
 }
 EXPORT_SYMBOL(set_user_nice);
 
@@ -7468,6 +7427,19 @@ static struct task_struct *find_process_
 	return pid ? find_task_by_vpid(pid) : current;
 }
 
+static struct task_struct *find_get_task(pid_t pid)
+{
+	struct task_struct *p;
+
+	void_scope(rcu) {
+		p = find_process_by_pid(pid);
+		if (likely(p))
+			get_task_struct(p);
+	}
+
+	return p;
+}
+
 /*
  * sched_setparam() passes in -1 for its policy, to let the functions
  * it calls know not to change it.
@@ -7505,14 +7477,11 @@ static void __setscheduler_params(struct
 static bool check_same_owner(struct task_struct *p)
 {
 	const struct cred *cred = current_cred(), *pcred;
-	bool match;
+	void_guard(rcu, guard);
 
-	rcu_read_lock();
 	pcred = __task_cred(p);
-	match = (uid_eq(cred->euid, pcred->euid) ||
-		 uid_eq(cred->euid, pcred->uid));
-	rcu_read_unlock();
-	return match;
+	return (uid_eq(cred->euid, pcred->euid) ||
+		uid_eq(cred->euid, pcred->uid));
 }
 
 /*
@@ -7915,28 +7884,19 @@ EXPORT_SYMBOL_GPL(sched_set_normal);
 static int
 do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
 {
+	ptr_guard(put_task, p) = NULL;
 	struct sched_param lparam;
-	struct task_struct *p;
-	int retval;
 
 	if (!param || pid < 0)
 		return -EINVAL;
 	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
 		return -EFAULT;
 
-	rcu_read_lock();
-	retval = -ESRCH;
-	p = find_process_by_pid(pid);
-	if (likely(p))
-		get_task_struct(p);
-	rcu_read_unlock();
-
-	if (likely(p)) {
-		retval = sched_setscheduler(p, policy, &lparam);
-		put_task_struct(p);
-	}
+	p = find_get_task(pid);
+	if (!p)
+		return -ESRCH;
 
-	return retval;
+	return sched_setscheduler(p, policy, &lparam);
 }
 
 /*
@@ -8031,8 +7991,8 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, p
 SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
 			       unsigned int, flags)
 {
+	ptr_guard(put_task, p) = NULL;
 	struct sched_attr attr;
-	struct task_struct *p;
 	int retval;
 
 	if (!uattr || pid < 0 || flags)
@@ -8047,21 +8007,14 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pi
 	if (attr.sched_flags & SCHED_FLAG_KEEP_POLICY)
 		attr.sched_policy = SETPARAM_POLICY;
 
-	rcu_read_lock();
-	retval = -ESRCH;
-	p = find_process_by_pid(pid);
-	if (likely(p))
-		get_task_struct(p);
-	rcu_read_unlock();
+	p = find_get_task(pid);
+	if (!p)
+		return -ESRCH;
 
-	if (likely(p)) {
-		if (attr.sched_flags & SCHED_FLAG_KEEP_PARAMS)
-			get_params(p, &attr);
-		retval = sched_setattr(p, &attr);
-		put_task_struct(p);
-	}
+	if (attr.sched_flags & SCHED_FLAG_KEEP_PARAMS)
+		get_params(p, &attr);
 
-	return retval;
+	return sched_setattr(p, &attr);
 }
 
 /**
@@ -8079,16 +8032,19 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_
 	if (pid < 0)
 		return -EINVAL;
 
-	retval = -ESRCH;
-	rcu_read_lock();
-	p = find_process_by_pid(pid);
-	if (p) {
+	void_scope(rcu) {
+		p = find_process_by_pid(pid);
+		if (!p)
+			return -ESRCH;
+
 		retval = security_task_getscheduler(p);
-		if (!retval)
-			retval = p->policy
-				| (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
+		if (!retval) {
+			retval = p->policy;
+			if (p->sched_reset_on_fork)
+				retval |= SCHED_RESET_ON_FORK;
+		}
 	}
-	rcu_read_unlock();
+
 	return retval;
 }
 
@@ -8109,30 +8065,23 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, p
 	if (!param || pid < 0)
 		return -EINVAL;
 
-	rcu_read_lock();
-	p = find_process_by_pid(pid);
-	retval = -ESRCH;
-	if (!p)
-		goto out_unlock;
+	void_scope(rcu) {
+		p = find_process_by_pid(pid);
+		if (!p)
+			return -ESRCH;
 
-	retval = security_task_getscheduler(p);
-	if (retval)
-		goto out_unlock;
+		retval = security_task_getscheduler(p);
+		if (retval)
+			return retval;
 
-	if (task_has_rt_policy(p))
-		lp.sched_priority = p->rt_priority;
-	rcu_read_unlock();
+		if (task_has_rt_policy(p))
+			lp.sched_priority = p->rt_priority;
+	}
 
 	/*
 	 * This one might sleep, we cannot do it with a spinlock held ...
 	 */
-	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
-
-	return retval;
-
-out_unlock:
-	rcu_read_unlock();
-	return retval;
+	return copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
 }
 
 /*
@@ -8192,39 +8141,33 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pi
 	    usize < SCHED_ATTR_SIZE_VER0 || flags)
 		return -EINVAL;
 
-	rcu_read_lock();
-	p = find_process_by_pid(pid);
-	retval = -ESRCH;
-	if (!p)
-		goto out_unlock;
+	void_scope(rcu) {
+		p = find_process_by_pid(pid);
+		if (!p)
+			return -ESRCH;
 
-	retval = security_task_getscheduler(p);
-	if (retval)
-		goto out_unlock;
+		retval = security_task_getscheduler(p);
+		if (retval)
+			return retval;
 
-	kattr.sched_policy = p->policy;
-	if (p->sched_reset_on_fork)
-		kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-	get_params(p, &kattr);
-	kattr.sched_flags &= SCHED_FLAG_ALL;
+		kattr.sched_policy = p->policy;
+		if (p->sched_reset_on_fork)
+			kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
+		get_params(p, &kattr);
+		kattr.sched_flags &= SCHED_FLAG_ALL;
 
 #ifdef CONFIG_UCLAMP_TASK
-	/*
-	 * This could race with another potential updater, but this is fine
-	 * because it'll correctly read the old or the new value. We don't need
-	 * to guarantee who wins the race as long as it doesn't return garbage.
-	 */
-	kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
-	kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
+		/*
+		 * This could race with another potential updater, but this is fine
+		 * because it'll correctly read the old or the new value. We don't need
+		 * to guarantee who wins the race as long as it doesn't return garbage.
+		 */
+		kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
+		kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
 #endif
-
-	rcu_read_unlock();
+	}
 
 	return sched_attr_copy_to_user(uattr, &kattr, usize);
-
-out_unlock:
-	rcu_read_unlock();
-	return retval;
 }
 
 #ifdef CONFIG_SMP
@@ -8245,10 +8188,10 @@ int dl_task_check_affinity(struct task_s
 	 * tasks allowed to run on all the CPUs in the task's
 	 * root_domain.
 	 */
-	rcu_read_lock();
-	if (!cpumask_subset(task_rq(p)->rd->span, mask))
-		ret = -EBUSY;
-	rcu_read_unlock();
+	void_scope(rcu) {
+		if (!cpumask_subset(task_rq(p)->rd->span, mask))
+			ret = -EBUSY;
+	}
 	return ret;
 }
 #endif
@@ -8317,41 +8260,27 @@ __sched_setaffinity(struct task_struct *
 
 long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 {
+	ptr_guard(put_task, p) = NULL;
 	struct affinity_context ac;
 	struct cpumask *user_mask;
-	struct task_struct *p;
 	int retval;
 
-	rcu_read_lock();
-
-	p = find_process_by_pid(pid);
-	if (!p) {
-		rcu_read_unlock();
+	p = find_get_task(pid);
+	if (!p)
 		return -ESRCH;
-	}
-
-	/* Prevent p going away */
-	get_task_struct(p);
-	rcu_read_unlock();
 
-	if (p->flags & PF_NO_SETAFFINITY) {
-		retval = -EINVAL;
-		goto out_put_task;
-	}
+	if (p->flags & PF_NO_SETAFFINITY)
+		return -EINVAL;
 
 	if (!check_same_owner(p)) {
-		rcu_read_lock();
-		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
-			rcu_read_unlock();
-			retval = -EPERM;
-			goto out_put_task;
-		}
-		rcu_read_unlock();
+		void_guard(rcu, rg);
+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))
+			return -EPERM;
 	}
 
 	retval = security_task_setscheduler(p);
 	if (retval)
-		goto out_put_task;
+		return retval;
 
 	/*
 	 * With non-SMP configs, user_cpus_ptr/user_mask isn't used and
@@ -8361,8 +8290,7 @@ long sched_setaffinity(pid_t pid, const
 	if (user_mask) {
 		cpumask_copy(user_mask, in_mask);
 	} else if (IS_ENABLED(CONFIG_SMP)) {
-		retval = -ENOMEM;
-		goto out_put_task;
+		return -ENOMEM;
 	}
 
 	ac = (struct affinity_context){
@@ -8374,8 +8302,6 @@ long sched_setaffinity(pid_t pid, const
 	retval = __sched_setaffinity(p, &ac);
 	kfree(ac.user_mask);
 
-out_put_task:
-	put_task_struct(p);
 	return retval;
 }
 
@@ -8417,28 +8343,22 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t
 long sched_getaffinity(pid_t pid, struct cpumask *mask)
 {
 	struct task_struct *p;
-	unsigned long flags;
 	int retval;
 
-	rcu_read_lock();
+	void_scope(rcu) {
+		p = find_process_by_pid(pid);
+		if (!p)
+			return -ESRCH;
 
-	retval = -ESRCH;
-	p = find_process_by_pid(pid);
-	if (!p)
-		goto out_unlock;
-
-	retval = security_task_getscheduler(p);
-	if (retval)
-		goto out_unlock;
-
-	raw_spin_lock_irqsave(&p->pi_lock, flags);
-	cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
-	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+		retval = security_task_getscheduler(p);
+		if (retval)
+			return retval;
 
-out_unlock:
-	rcu_read_unlock();
+		lock_scope(raw_irqsave, &p->pi_lock)
+			cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
+	}
 
-	return retval;
+	return 0;
 }
 
 /**
@@ -8885,55 +8805,47 @@ int __sched yield_to(struct task_struct
 {
 	struct task_struct *curr = current;
 	struct rq *rq, *p_rq;
-	unsigned long flags;
 	int yielded = 0;
 
-	local_irq_save(flags);
-	rq = this_rq();
+	void_scope(irqsave) {
+		rq = this_rq();
 
 again:
-	p_rq = task_rq(p);
-	/*
-	 * If we're the only runnable task on the rq and target rq also
-	 * has only one task, there's absolutely no point in yielding.
-	 */
-	if (rq->nr_running == 1 && p_rq->nr_running == 1) {
-		yielded = -ESRCH;
-		goto out_irq;
-	}
+		p_rq = task_rq(p);
+		/*
+		 * If we're the only runnable task on the rq and target rq also
+		 * has only one task, there's absolutely no point in yielding.
+		 */
+		if (rq->nr_running == 1 && p_rq->nr_running == 1)
+			return -ESRCH;
 
-	double_rq_lock(rq, p_rq);
-	if (task_rq(p) != p_rq) {
-		double_rq_unlock(rq, p_rq);
-		goto again;
-	}
+		double_lock_scope(rq, rq, p_rq) {
+			if (task_rq(p) != p_rq)
+				goto again;
 
-	if (!curr->sched_class->yield_to_task)
-		goto out_unlock;
+			if (!curr->sched_class->yield_to_task)
+				return 0;
 
-	if (curr->sched_class != p->sched_class)
-		goto out_unlock;
+			if (curr->sched_class != p->sched_class)
+				return 0;
 
-	if (task_on_cpu(p_rq, p) || !task_is_running(p))
-		goto out_unlock;
+			if (task_on_cpu(p_rq, p) || !task_is_running(p))
+				return 0;
 
-	yielded = curr->sched_class->yield_to_task(rq, p);
-	if (yielded) {
-		schedstat_inc(rq->yld_count);
-		/*
-		 * Make p's CPU reschedule; pick_next_entity takes care of
-		 * fairness.
-		 */
-		if (preempt && rq != p_rq)
-			resched_curr(p_rq);
+			yielded = curr->sched_class->yield_to_task(rq, p);
+			if (yielded) {
+				schedstat_inc(rq->yld_count);
+				/*
+				 * Make p's CPU reschedule; pick_next_entity
+				 * takes care of fairness.
+				 */
+				if (preempt && rq != p_rq)
+					resched_curr(p_rq);
+			}
+		}
 	}
 
-out_unlock:
-	double_rq_unlock(rq, p_rq);
-out_irq:
-	local_irq_restore(flags);
-
-	if (yielded > 0)
+	if (yielded)
 		schedule();
 
 	return yielded;
@@ -9036,38 +8948,30 @@ SYSCALL_DEFINE1(sched_get_priority_min,
 
 static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
 {
-	struct task_struct *p;
-	unsigned int time_slice;
-	struct rq_flags rf;
-	struct rq *rq;
+	unsigned int time_slice = 0;
 	int retval;
 
 	if (pid < 0)
 		return -EINVAL;
 
-	retval = -ESRCH;
-	rcu_read_lock();
-	p = find_process_by_pid(pid);
-	if (!p)
-		goto out_unlock;
+	void_scope(rcu) {
+		struct task_struct *p = find_process_by_pid(pid);
+		if (!p)
+			return -ESRCH;
 
-	retval = security_task_getscheduler(p);
-	if (retval)
-		goto out_unlock;
+		retval = security_task_getscheduler(p);
+		if (retval)
+			return retval;
 
-	rq = task_rq_lock(p, &rf);
-	time_slice = 0;
-	if (p->sched_class->get_rr_interval)
-		time_slice = p->sched_class->get_rr_interval(rq, p);
-	task_rq_unlock(rq, p, &rf);
+		lock_scope(task_rq, p) {
+			struct rq *rq = _scope.guard.rq;
+			if (p->sched_class->get_rr_interval)
+				time_slice = p->sched_class->get_rr_interval(rq, p);
+		}
+	}
 
-	rcu_read_unlock();
 	jiffies_to_timespec64(time_slice, t);
 	return 0;
-
-out_unlock:
-	rcu_read_unlock();
-	return retval;
 }
 
 /**
@@ -9300,10 +9204,8 @@ int task_can_attach(struct task_struct *
 	 * success of set_cpus_allowed_ptr() on all attached tasks
 	 * before cpus_mask may be changed.
 	 */
-	if (p->flags & PF_NO_SETAFFINITY) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (p->flags & PF_NO_SETAFFINITY)
+		return -EINVAL;
 
 	if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
 					      cs_effective_cpus)) {
@@ -9314,7 +9216,6 @@ int task_can_attach(struct task_struct *
 		ret = dl_cpu_busy(cpu, p);
 	}
 
-out:
 	return ret;
 }
 
@@ -10464,17 +10365,18 @@ void sched_move_task(struct task_struct
 	int queued, running, queue_flags =
 		DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
 	struct task_group *group;
-	struct rq_flags rf;
 	struct rq *rq;
 
-	rq = task_rq_lock(tsk, &rf);
+	lock_guard(task_rq, guard, tsk);
+	rq = guard.rq;
+
 	/*
 	 * Esp. with SCHED_AUTOGROUP enabled it is possible to get superfluous
 	 * group changes.
 	 */
 	group = sched_get_task_group(tsk);
 	if (group == tsk->sched_task_group)
-		goto unlock;
+		return;
 
 	update_rq_clock(rq);
 
@@ -10499,9 +10401,6 @@ void sched_move_task(struct task_struct
 		 */
 		resched_curr(rq);
 	}
-
-unlock:
-	task_rq_unlock(rq, tsk, &rf);
 }
 
 static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
@@ -10538,11 +10437,10 @@ static int cpu_cgroup_css_online(struct
 
 #ifdef CONFIG_UCLAMP_TASK_GROUP
 	/* Propagate the effective uclamp value for the new group */
-	mutex_lock(&uclamp_mutex);
-	rcu_read_lock();
-	cpu_util_update_eff(css);
-	rcu_read_unlock();
-	mutex_unlock(&uclamp_mutex);
+	lock_scope(mutex, &uclamp_mutex) {
+		void_guard(rcu, guard);
+		cpu_util_update_eff(css);
+	}
 #endif
 
 	return 0;
@@ -10693,24 +10591,22 @@ static ssize_t cpu_uclamp_write(struct k
 
 	static_branch_enable(&sched_uclamp_used);
 
-	mutex_lock(&uclamp_mutex);
-	rcu_read_lock();
-
-	tg = css_tg(of_css(of));
-	if (tg->uclamp_req[clamp_id].value != req.util)
-		uclamp_se_set(&tg->uclamp_req[clamp_id], req.util, false);
+	lock_scope(mutex, &uclamp_mutex) {
+		void_guard(rcu, guard);
 
-	/*
-	 * Because of not recoverable conversion rounding we keep track of the
-	 * exact requested value
-	 */
-	tg->uclamp_pct[clamp_id] = req.percent;
+		tg = css_tg(of_css(of));
+		if (tg->uclamp_req[clamp_id].value != req.util)
+			uclamp_se_set(&tg->uclamp_req[clamp_id], req.util, false);
 
-	/* Update effective clamps to track the most restrictive value */
-	cpu_util_update_eff(of_css(of));
+		/*
+		 * Because of not recoverable conversion rounding we keep track of the
+		 * exact requested value
+		 */
+		tg->uclamp_pct[clamp_id] = req.percent;
 
-	rcu_read_unlock();
-	mutex_unlock(&uclamp_mutex);
+		/* Update effective clamps to track the most restrictive value */
+		cpu_util_update_eff(of_css(of));
+	}
 
 	return nbytes;
 }
@@ -10737,10 +10633,10 @@ static inline void cpu_uclamp_print(stru
 	u64 percent;
 	u32 rem;
 
-	rcu_read_lock();
-	tg = css_tg(seq_css(sf));
-	util_clamp = tg->uclamp_req[clamp_id].value;
-	rcu_read_unlock();
+	void_scope(rcu) {
+		tg = css_tg(seq_css(sf));
+		util_clamp = tg->uclamp_req[clamp_id].value;
+	}
 
 	if (util_clamp == SCHED_CAPACITY_SCALE) {
 		seq_puts(sf, "max\n");
@@ -10792,6 +10688,8 @@ static const u64 max_cfs_runtime = MAX_B
 
 static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
 
+DEFINE_VOID_GUARD(cpus_read, cpus_read_lock(), cpus_read_unlock())
+
 static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
 				u64 burst)
 {
@@ -10831,53 +10729,54 @@ static int tg_set_cfs_bandwidth(struct t
 	 * Prevent race between setting of cfs_rq->runtime_enabled and
 	 * unthrottle_offline_cfs_rqs().
 	 */
-	cpus_read_lock();
-	mutex_lock(&cfs_constraints_mutex);
-	ret = __cfs_schedulable(tg, period, quota);
-	if (ret)
-		goto out_unlock;
+	void_scope(cpus_read) {
+		lock_guard(mutex, guard, &cfs_constraints_mutex);
 
-	runtime_enabled = quota != RUNTIME_INF;
-	runtime_was_enabled = cfs_b->quota != RUNTIME_INF;
-	/*
-	 * If we need to toggle cfs_bandwidth_used, off->on must occur
-	 * before making related changes, and on->off must occur afterwards
-	 */
-	if (runtime_enabled && !runtime_was_enabled)
-		cfs_bandwidth_usage_inc();
-	raw_spin_lock_irq(&cfs_b->lock);
-	cfs_b->period = ns_to_ktime(period);
-	cfs_b->quota = quota;
-	cfs_b->burst = burst;
-
-	__refill_cfs_bandwidth_runtime(cfs_b);
-
-	/* Restart the period timer (if active) to handle new period expiry: */
-	if (runtime_enabled)
-		start_cfs_bandwidth(cfs_b);
-
-	raw_spin_unlock_irq(&cfs_b->lock);
-
-	for_each_online_cpu(i) {
-		struct cfs_rq *cfs_rq = tg->cfs_rq[i];
-		struct rq *rq = cfs_rq->rq;
-		struct rq_flags rf;
-
-		rq_lock_irq(rq, &rf);
-		cfs_rq->runtime_enabled = runtime_enabled;
-		cfs_rq->runtime_remaining = 0;
-
-		if (cfs_rq->throttled)
-			unthrottle_cfs_rq(cfs_rq);
-		rq_unlock_irq(rq, &rf);
+		ret = __cfs_schedulable(tg, period, quota);
+		if (ret)
+			return ret;
+
+		runtime_enabled = quota != RUNTIME_INF;
+		runtime_was_enabled = cfs_b->quota != RUNTIME_INF;
+		/*
+		 * If we need to toggle cfs_bandwidth_used, off->on must occur
+		 * before making related changes, and on->off must occur afterwards
+		 */
+		if (runtime_enabled && !runtime_was_enabled)
+			cfs_bandwidth_usage_inc();
+
+		lock_scope(raw_irq, &cfs_b->lock) {
+			cfs_b->period = ns_to_ktime(period);
+			cfs_b->quota = quota;
+			cfs_b->burst = burst;
+
+			__refill_cfs_bandwidth_runtime(cfs_b);
+
+			/*
+			 * Restart the period timer (if active) to handle new
+			 * period expiry:
+			 */
+			if (runtime_enabled)
+				start_cfs_bandwidth(cfs_b);
+		}
+
+		for_each_online_cpu(i) {
+			struct cfs_rq *cfs_rq = tg->cfs_rq[i];
+			struct rq *rq = cfs_rq->rq;
+
+			lock_scope(rq_irq, rq) {
+				cfs_rq->runtime_enabled = runtime_enabled;
+				cfs_rq->runtime_remaining = 0;
+
+				if (cfs_rq->throttled)
+					unthrottle_cfs_rq(cfs_rq);
+			}
+		}
+		if (runtime_was_enabled && !runtime_enabled)
+			cfs_bandwidth_usage_dec();
 	}
-	if (runtime_was_enabled && !runtime_enabled)
-		cfs_bandwidth_usage_dec();
-out_unlock:
-	mutex_unlock(&cfs_constraints_mutex);
-	cpus_read_unlock();
 
-	return ret;
+	return 0;
 }
 
 static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
@@ -11069,9 +10968,8 @@ static int __cfs_schedulable(struct task
 		do_div(data.quota, NSEC_PER_USEC);
 	}
 
-	rcu_read_lock();
-	ret = walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data);
-	rcu_read_unlock();
+	void_scope(rcu)
+		ret = walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data);
 
 	return ret;
 }
@@ -11634,14 +11532,13 @@ int __sched_mm_cid_migrate_from_fetch_ci
 	 * are not the last task to be migrated from this cpu for this mm, so
 	 * there is no need to move src_cid to the destination cpu.
 	 */
-	rcu_read_lock();
-	src_task = rcu_dereference(src_rq->curr);
-	if (READ_ONCE(src_task->mm_cid_active) && src_task->mm == mm) {
-		rcu_read_unlock();
-		t->last_mm_cid = -1;
-		return -1;
+	void_scope(rcu) {
+		src_task = rcu_dereference(src_rq->curr);
+		if (READ_ONCE(src_task->mm_cid_active) && src_task->mm == mm) {
+			t->last_mm_cid = -1;
+			return -1;
+		}
 	}
-	rcu_read_unlock();
 
 	return src_cid;
 }
@@ -11685,18 +11582,17 @@ int __sched_mm_cid_migrate_from_try_stea
 	 * the lazy-put flag, this task will be responsible for transitioning
 	 * from lazy-put flag set to MM_CID_UNSET.
 	 */
-	rcu_read_lock();
-	src_task = rcu_dereference(src_rq->curr);
-	if (READ_ONCE(src_task->mm_cid_active) && src_task->mm == mm) {
-		rcu_read_unlock();
-		/*
-		 * We observed an active task for this mm, there is therefore
-		 * no point in moving this cid to the destination cpu.
-		 */
-		t->last_mm_cid = -1;
-		return -1;
+	void_scope(rcu) {
+		src_task = rcu_dereference(src_rq->curr);
+		if (READ_ONCE(src_task->mm_cid_active) && src_task->mm == mm) {
+			/*
+			 * We observed an active task for this mm, there is therefore
+			 * no point in moving this cid to the destination cpu.
+			 */
+			t->last_mm_cid = -1;
+			return -1;
+		}
 	}
-	rcu_read_unlock();
 
 	/*
 	 * The src_cid is unused, so it can be unset.
@@ -11769,7 +11665,6 @@ static void sched_mm_cid_remote_clear(st
 {
 	struct rq *rq = cpu_rq(cpu);
 	struct task_struct *t;
-	unsigned long flags;
 	int cid, lazy_cid;
 
 	cid = READ_ONCE(pcpu_cid->cid);
@@ -11804,23 +11699,21 @@ static void sched_mm_cid_remote_clear(st
 	 * the lazy-put flag, that task will be responsible for transitioning
 	 * from lazy-put flag set to MM_CID_UNSET.
 	 */
-	rcu_read_lock();
-	t = rcu_dereference(rq->curr);
-	if (READ_ONCE(t->mm_cid_active) && t->mm == mm) {
-		rcu_read_unlock();
-		return;
+	void_scope(rcu) {
+		t = rcu_dereference(rq->curr);
+		if (READ_ONCE(t->mm_cid_active) && t->mm == mm)
+			return;
 	}
-	rcu_read_unlock();
 
 	/*
 	 * The cid is unused, so it can be unset.
 	 * Disable interrupts to keep the window of cid ownership without rq
 	 * lock small.
 	 */
-	local_irq_save(flags);
-	if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
-		__mm_cid_put(mm, cid);
-	local_irq_restore(flags);
+	void_scope(irqsave) {
+		if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
+			__mm_cid_put(mm, cid);
+	}
 }
 
 static void sched_mm_cid_remote_clear_old(struct mm_struct *mm, int cpu)
@@ -11842,14 +11735,13 @@ static void sched_mm_cid_remote_clear_ol
 	 * snapshot associated with this cid if an active task using the mm is
 	 * observed on this rq.
 	 */
-	rcu_read_lock();
-	curr = rcu_dereference(rq->curr);
-	if (READ_ONCE(curr->mm_cid_active) && curr->mm == mm) {
-		WRITE_ONCE(pcpu_cid->time, rq_clock);
-		rcu_read_unlock();
-		return;
+	void_scope(rcu) {
+		curr = rcu_dereference(rq->curr);
+		if (READ_ONCE(curr->mm_cid_active) && curr->mm == mm) {
+			WRITE_ONCE(pcpu_cid->time, rq_clock);
+			return;
+		}
 	}
-	rcu_read_unlock();
 
 	if (rq_clock < pcpu_cid->time + SCHED_MM_CID_PERIOD_NS)
 		return;
@@ -11943,7 +11835,6 @@ void task_tick_mm_cid(struct rq *rq, str
 void sched_mm_cid_exit_signals(struct task_struct *t)
 {
 	struct mm_struct *mm = t->mm;
-	struct rq_flags rf;
 	struct rq *rq;
 
 	if (!mm)
@@ -11951,23 +11842,22 @@ void sched_mm_cid_exit_signals(struct ta
 
 	preempt_disable();
 	rq = this_rq();
-	rq_lock_irqsave(rq, &rf);
-	preempt_enable_no_resched();	/* holding spinlock */
-	WRITE_ONCE(t->mm_cid_active, 0);
-	/*
-	 * Store t->mm_cid_active before loading per-mm/cpu cid.
-	 * Matches barrier in sched_mm_cid_remote_clear_old().
-	 */
-	smp_mb();
-	mm_cid_put(mm);
-	t->last_mm_cid = t->mm_cid = -1;
-	rq_unlock_irqrestore(rq, &rf);
+	lock_scope(rq_irqsave, rq) {
+		preempt_enable_no_resched();	/* holding spinlock */
+		WRITE_ONCE(t->mm_cid_active, 0);
+		/*
+		 * Store t->mm_cid_active before loading per-mm/cpu cid.
+		 * Matches barrier in sched_mm_cid_remote_clear_old().
+		 */
+		smp_mb();
+		mm_cid_put(mm);
+		t->last_mm_cid = t->mm_cid = -1;
+	}
 }
 
 void sched_mm_cid_before_execve(struct task_struct *t)
 {
 	struct mm_struct *mm = t->mm;
-	struct rq_flags rf;
 	struct rq *rq;
 
 	if (!mm)
@@ -11975,23 +11865,22 @@ void sched_mm_cid_before_execve(struct t
 
 	preempt_disable();
 	rq = this_rq();
-	rq_lock_irqsave(rq, &rf);
-	preempt_enable_no_resched();	/* holding spinlock */
-	WRITE_ONCE(t->mm_cid_active, 0);
-	/*
-	 * Store t->mm_cid_active before loading per-mm/cpu cid.
-	 * Matches barrier in sched_mm_cid_remote_clear_old().
-	 */
-	smp_mb();
-	mm_cid_put(mm);
-	t->last_mm_cid = t->mm_cid = -1;
-	rq_unlock_irqrestore(rq, &rf);
+	lock_scope(rq_irqsave, rq) {
+		preempt_enable_no_resched();	/* holding spinlock */
+		WRITE_ONCE(t->mm_cid_active, 0);
+		/*
+		 * Store t->mm_cid_active before loading per-mm/cpu cid.
+		 * Matches barrier in sched_mm_cid_remote_clear_old().
+		 */
+		smp_mb();
+		mm_cid_put(mm);
+		t->last_mm_cid = t->mm_cid = -1;
+	}
 }
 
 void sched_mm_cid_after_execve(struct task_struct *t)
 {
 	struct mm_struct *mm = t->mm;
-	struct rq_flags rf;
 	struct rq *rq;
 
 	if (!mm)
@@ -11999,16 +11888,16 @@ void sched_mm_cid_after_execve(struct ta
 
 	preempt_disable();
 	rq = this_rq();
-	rq_lock_irqsave(rq, &rf);
-	preempt_enable_no_resched();	/* holding spinlock */
-	WRITE_ONCE(t->mm_cid_active, 1);
-	/*
-	 * Store t->mm_cid_active before loading per-mm/cpu cid.
-	 * Matches barrier in sched_mm_cid_remote_clear_old().
-	 */
-	smp_mb();
-	t->last_mm_cid = t->mm_cid = mm_cid_get(rq, mm);
-	rq_unlock_irqrestore(rq, &rf);
+	lock_scope(rq_irqsave, rq) {
+		preempt_enable_no_resched();	/* holding spinlock */
+		WRITE_ONCE(t->mm_cid_active, 1);
+		/*
+		 * Store t->mm_cid_active before loading per-mm/cpu cid.
+		 * Matches barrier in sched_mm_cid_remote_clear_old().
+		 */
+		smp_mb();
+		t->last_mm_cid = t->mm_cid = mm_cid_get(rq, mm);
+	}
 	rseq_set_notify_resume(t);
 }
 
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1397,6 +1397,10 @@ do {						\
 	flags = _raw_spin_rq_lock_irqsave(rq);	\
 } while (0)
 
+DEFINE_LOCK_GUARD(raw_rq, struct rq,
+		  raw_spin_rq_lock(_G->lock),
+		  raw_spin_rq_unlock(_G->lock))
+
 #ifdef CONFIG_SCHED_SMT
 extern void __update_idle_core(struct rq *rq);
 
@@ -1630,6 +1634,12 @@ task_rq_unlock(struct rq *rq, struct tas
 	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
 }
 
+DEFINE_LOCK_GUARD(task_rq, struct task_struct,
+		  _G->rq = task_rq_lock(_G->lock, &_G->rf),
+		  task_rq_unlock(_G->rq, _G->lock, &_G->rf),
+		  struct rq *rq;
+		  struct rq_flags rf;)
+
 static inline void
 rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
 	__acquires(rq->lock)
@@ -1678,6 +1688,21 @@ rq_unlock(struct rq *rq, struct rq_flags
 	raw_spin_rq_unlock(rq);
 }
 
+DEFINE_LOCK_GUARD(rq, struct rq,
+		  rq_lock(_G->lock, &_G->rf),
+		  rq_unlock(_G->lock, &_G->rf),
+		  struct rq_flags rf;)
+
+DEFINE_LOCK_GUARD(rq_irq, struct rq,
+		  rq_lock_irq(_G->lock, &_G->rf),
+		  rq_unlock_irq(_G->lock, &_G->rf),
+		  struct rq_flags rf;)
+
+DEFINE_LOCK_GUARD(rq_irqsave, struct rq,
+		  rq_lock_irqsave(_G->lock, &_G->rf),
+		  rq_unlock_irqrestore(_G->lock, &_G->rf),
+		  struct rq_flags rf;)
+
 static inline struct rq *
 this_rq_lock_irq(struct rq_flags *rf)
 	__acquires(rq->lock)
@@ -2701,6 +2726,16 @@ static inline void double_raw_lock(raw_s
 	raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
 }
 
+static inline void double_raw_unlock(raw_spinlock_t *l1, raw_spinlock_t *l2)
+{
+	raw_spin_unlock(l1);
+	raw_spin_unlock(l2);
+}
+
+DEFINE_DOUBLE_LOCK_GUARD(raw, raw_spinlock_t,
+			 double_raw_lock(_G->lock, _G->lock2),
+			 double_raw_unlock(_G->lock, _G->lock2))
+
 /*
  * double_rq_unlock - safely unlock two runqueues
  *
@@ -2758,6 +2793,10 @@ static inline void double_rq_unlock(stru
 
 #endif
 
+DEFINE_DOUBLE_LOCK_GUARD(rq, struct rq,
+			 double_rq_lock(_G->lock, _G->lock2),
+			 double_rq_unlock(_G->lock, _G->lock2))
+
 extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
 extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
 



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC][PATCH 2/2] sched: Use fancy new guards
  2023-05-26 15:05 ` [RFC][PATCH 2/2] sched: Use fancy new guards Peter Zijlstra
@ 2023-05-26 16:25   ` Greg KH
  2023-05-26 16:27     ` Mathieu Desnoyers
  2023-05-26 16:41     ` Peter Zijlstra
  0 siblings, 2 replies; 17+ messages in thread
From: Greg KH @ 2023-05-26 16:25 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: torvalds, keescook, pbonzini, linux-kernel, ojeda, ndesaulniers,
	mingo, will, longman, boqun.feng, juri.lelli, vincent.guittot,
	dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
	paulmck, frederic, quic_neeraju, joel, josh, mathieu.desnoyers,
	jiangshanlai, qiang1.zhang, rcu, tj, tglx

On Fri, May 26, 2023 at 05:05:51PM +0200, Peter Zijlstra wrote:
> Convert kernel/sched/core.c to use the fancy new guards to simplify
> the error paths.

That's slightly crazy...

I like the idea, but is this really correct:


> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  kernel/sched/core.c  | 1223 +++++++++++++++++++++++----------------------------
>  kernel/sched/sched.h |   39 +
>  2 files changed, 595 insertions(+), 667 deletions(-)
> 
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1097,24 +1097,21 @@ int get_nohz_timer_target(void)
>  
>  	hk_mask = housekeeping_cpumask(HK_TYPE_TIMER);
>  
> -	rcu_read_lock();
> -	for_each_domain(cpu, sd) {
> -		for_each_cpu_and(i, sched_domain_span(sd), hk_mask) {
> -			if (cpu == i)
> -				continue;
> +	void_scope(rcu) {
> +		for_each_domain(cpu, sd) {
> +			for_each_cpu_and(i, sched_domain_span(sd), hk_mask) {
> +				if (cpu == i)
> +					continue;
>  
> -			if (!idle_cpu(i)) {
> -				cpu = i;
> -				goto unlock;
> +				if (!idle_cpu(i))
> +					return i;

You can call return from within a "scope" and it will clean up properly?

I tried to read the cpp "mess" but couldn't figure out how to validate
this at all, have a set of tests for this somewhere?

Anyway, the naming is whack, but I don't have a proposed better name,
except you might want to put "scope_" as the prefix not the suffix, but
then that might look odd to, so who knows.

But again, the idea is good, it might save us lots of "you forgot to
clean this up on the error path" mess that we are getting constant churn
for these days...

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC][PATCH 2/2] sched: Use fancy new guards
  2023-05-26 16:25   ` Greg KH
@ 2023-05-26 16:27     ` Mathieu Desnoyers
  2023-05-26 16:43       ` Peter Zijlstra
  2023-05-26 17:08       ` Kees Cook
  2023-05-26 16:41     ` Peter Zijlstra
  1 sibling, 2 replies; 17+ messages in thread
From: Mathieu Desnoyers @ 2023-05-26 16:27 UTC (permalink / raw)
  To: Greg KH, Peter Zijlstra
  Cc: torvalds, keescook, pbonzini, linux-kernel, ojeda, ndesaulniers,
	mingo, will, longman, boqun.feng, juri.lelli, vincent.guittot,
	dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
	paulmck, frederic, quic_neeraju, joel, josh, jiangshanlai,
	qiang1.zhang, rcu, tj, tglx

On 5/26/23 12:25, Greg KH wrote:
> On Fri, May 26, 2023 at 05:05:51PM +0200, Peter Zijlstra wrote:
>> Convert kernel/sched/core.c to use the fancy new guards to simplify
>> the error paths.
> 
> That's slightly crazy...
> 
> I like the idea, but is this really correct:
> 
> 
>>
>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>> ---
>>   kernel/sched/core.c  | 1223 +++++++++++++++++++++++----------------------------
>>   kernel/sched/sched.h |   39 +
>>   2 files changed, 595 insertions(+), 667 deletions(-)
>>
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -1097,24 +1097,21 @@ int get_nohz_timer_target(void)
>>   
>>   	hk_mask = housekeeping_cpumask(HK_TYPE_TIMER);
>>   
>> -	rcu_read_lock();
>> -	for_each_domain(cpu, sd) {
>> -		for_each_cpu_and(i, sched_domain_span(sd), hk_mask) {
>> -			if (cpu == i)
>> -				continue;
>> +	void_scope(rcu) {
>> +		for_each_domain(cpu, sd) {
>> +			for_each_cpu_and(i, sched_domain_span(sd), hk_mask) {
>> +				if (cpu == i)
>> +					continue;
>>   
>> -			if (!idle_cpu(i)) {
>> -				cpu = i;
>> -				goto unlock;
>> +				if (!idle_cpu(i))
>> +					return i;
> 
> You can call return from within a "scope" and it will clean up properly?
> 
> I tried to read the cpp "mess" but couldn't figure out how to validate
> this at all, have a set of tests for this somewhere?
> 
> Anyway, the naming is whack, but I don't have a proposed better name,
> except you might want to put "scope_" as the prefix not the suffix, but
> then that might look odd to, so who knows.

FWIW C++ has std::scoped_lock. So perhaps using a similar wording may help ?

Thanks,

Mathieu

> 
> But again, the idea is good, it might save us lots of "you forgot to
> clean this up on the error path" mess that we are getting constant churn
> for these days...
> 
> thanks,
> 
> greg k-h

-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC][PATCH 2/2] sched: Use fancy new guards
  2023-05-26 16:25   ` Greg KH
  2023-05-26 16:27     ` Mathieu Desnoyers
@ 2023-05-26 16:41     ` Peter Zijlstra
  1 sibling, 0 replies; 17+ messages in thread
From: Peter Zijlstra @ 2023-05-26 16:41 UTC (permalink / raw)
  To: Greg KH
  Cc: torvalds, keescook, pbonzini, linux-kernel, ojeda, ndesaulniers,
	mingo, will, longman, boqun.feng, juri.lelli, vincent.guittot,
	dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
	paulmck, frederic, quic_neeraju, joel, josh, mathieu.desnoyers,
	jiangshanlai, qiang1.zhang, rcu, tj, tglx

On Fri, May 26, 2023 at 05:25:58PM +0100, Greg KH wrote:
> On Fri, May 26, 2023 at 05:05:51PM +0200, Peter Zijlstra wrote:
> > Convert kernel/sched/core.c to use the fancy new guards to simplify
> > the error paths.
> 
> That's slightly crazy...
> 
> I like the idea, but is this really correct:
> 
> 
> > 
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> >  kernel/sched/core.c  | 1223 +++++++++++++++++++++++----------------------------
> >  kernel/sched/sched.h |   39 +
> >  2 files changed, 595 insertions(+), 667 deletions(-)
> > 
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -1097,24 +1097,21 @@ int get_nohz_timer_target(void)
> >  
> >  	hk_mask = housekeeping_cpumask(HK_TYPE_TIMER);
> >  
> > -	rcu_read_lock();
> > -	for_each_domain(cpu, sd) {
> > -		for_each_cpu_and(i, sched_domain_span(sd), hk_mask) {
> > -			if (cpu == i)
> > -				continue;
> > +	void_scope(rcu) {
> > +		for_each_domain(cpu, sd) {
> > +			for_each_cpu_and(i, sched_domain_span(sd), hk_mask) {
> > +				if (cpu == i)
> > +					continue;
> >  
> > -			if (!idle_cpu(i)) {
> > -				cpu = i;
> > -				goto unlock;
> > +				if (!idle_cpu(i))
> > +					return i;
> 
> You can call return from within a "scope" and it will clean up properly?

Yep, that's the main feature here.

> I tried to read the cpp "mess" but couldn't figure out how to validate
> this at all, have a set of tests for this somewhere?

I have it in userspace with printf, but yeah, I'll go make a selftest
somewhere.

One advantage of using the scheduler locks as testbed is that if you get
it wrong it burns *real* fast -- been there done that etc.

> Anyway, the naming is whack, but I don't have a proposed better name,
> except you might want to put "scope_" as the prefix not the suffix, but
> then that might look odd to, so who knows.

Yeah, naming is certainly crazy, but I figured I should get it all
working before spending too much time on that.

I can certainly do 's/lock_scope/scope_lock/g' on it all.

> But again, the idea is good, it might save us lots of "you forgot to
> clean this up on the error path" mess that we are getting constant churn
> for these days...

That's the goal...

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC][PATCH 2/2] sched: Use fancy new guards
  2023-05-26 16:27     ` Mathieu Desnoyers
@ 2023-05-26 16:43       ` Peter Zijlstra
  2023-05-26 17:08       ` Kees Cook
  1 sibling, 0 replies; 17+ messages in thread
From: Peter Zijlstra @ 2023-05-26 16:43 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Greg KH, torvalds, keescook, pbonzini, linux-kernel, ojeda,
	ndesaulniers, mingo, will, longman, boqun.feng, juri.lelli,
	vincent.guittot, dietmar.eggemann, rostedt, bsegall, mgorman,
	bristot, vschneid, paulmck, frederic, quic_neeraju, joel, josh,
	jiangshanlai, qiang1.zhang, rcu, tj, tglx

On Fri, May 26, 2023 at 12:27:51PM -0400, Mathieu Desnoyers wrote:

> > Anyway, the naming is whack, but I don't have a proposed better name,
> > except you might want to put "scope_" as the prefix not the suffix, but
> > then that might look odd to, so who knows.
> 
> FWIW C++ has std::scoped_lock. So perhaps using a similar wording may help ?

Yeah, C++ is a lot more flexible than CPP crazies. But yeah, happy to
change it that way.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC][PATCH 1/2] locking: Introduce __cleanup__ based guards
  2023-05-26 15:05 ` [RFC][PATCH 1/2] locking: Introduce __cleanup__ based guards Peter Zijlstra
@ 2023-05-26 17:05   ` Kees Cook
  2023-05-26 18:39     ` Miguel Ojeda
  2023-05-26 18:22   ` Linus Torvalds
  2023-05-26 18:49   ` Waiman Long
  2 siblings, 1 reply; 17+ messages in thread
From: Kees Cook @ 2023-05-26 17:05 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: torvalds, gregkh, pbonzini, linux-kernel, ojeda, ndesaulniers,
	mingo, will, longman, boqun.feng, juri.lelli, vincent.guittot,
	dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
	paulmck, frederic, quic_neeraju, joel, josh, mathieu.desnoyers,
	jiangshanlai, qiang1.zhang, rcu, tj, tglx

On Fri, May 26, 2023 at 05:05:50PM +0200, Peter Zijlstra wrote:
> Use __attribute__((__cleanup__(func))) to buid various guards:
> 
>  - ptr_guard()
>  - void_guard() / void_scope()
>  - lock_guard() / lock_scope()
>  - double_lock_guard() / double_lock_scope()
> 
> Where the _guard thingies are variables with scope-based cleanup and
> the _scope thingies are basically do-once for-loops with the same.

This makes things much easier to deal with, rather than forcing loops
into separate functions, etc, and hoping to get the cleanup right.

> 
> The CPP is rather impenetrable -- but I'll attempt to write proper
> comments if/when people think this is worth pursuing.

Yes please. Comments would help a lot. I was scratching my head over _G
for a bit before I realized what was happening. :)

> 
> Actual usage in the next patch
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  include/linux/compiler_attributes.h |    2 
>  include/linux/irqflags.h            |    7 ++
>  include/linux/guards.h          |  118 ++++++++++++++++++++++++++++++++++++
>  include/linux/mutex.h               |    5 +
>  include/linux/preempt.h             |    4 +
>  include/linux/rcupdate.h            |    3 
>  include/linux/sched/task.h          |    2 
>  include/linux/spinlock.h            |   23 +++++++
>  8 files changed, 164 insertions(+)
> 
> --- a/include/linux/compiler_attributes.h
> +++ b/include/linux/compiler_attributes.h
> @@ -366,4 +366,6 @@
>   */
>  #define __fix_address noinline __noclone
>  
> +#define __cleanup(func)			__attribute__((__cleanup__(func)))
> +
>  #endif /* __LINUX_COMPILER_ATTRIBUTES_H */

nitpick: sorting. This needs to be moved up alphabetically; the comment
at the start of the file says:

 ...
 * This file is meant to be sorted (by actual attribute name,
 * not by #define identifier). ...

> [...]
> +#define DEFINE_VOID_GUARD(_type, _Lock, _Unlock, ...)				\
> +typedef struct {								\
> +	__VA_ARGS__								\
> +} void_guard_##_type##_t;							\
> +										\
> [...]
> +DEFINE_VOID_GUARD(irq, local_irq_disable(), local_irq_enable())
> +DEFINE_VOID_GUARD(irqsave,
> +		  local_irq_save(_G->flags),
> +		  local_irq_restore(_G->flags),
> +		  unsigned long flags;)

Yeah, good trick for defining 0-or-more members to the guard struct. I
expect the common cases to be 0 or 1, so perhaps move the final ";" to
after __VA_ARGS__ to avoid needing it in the DEFINEs? (And even in this
initial patch, there's only 1 non-empty argument...)

> [...]
> --- /dev/null
> +++ b/include/linux/guards.h
> @@ -0,0 +1,118 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __LINUX_GUARDS_H
> +#define __LINUX_GUARDS_H
> +
> +#include <linux/compiler_attributes.h>
> +
> +/* Pointer Guard */
> +
> +#define DEFINE_PTR_GUARD(_type, _Type, _Put)				\
> +typedef _Type *ptr_guard_##_type##_t;					\
> +static inline void ptr_guard_##_type##_cleanup(_Type **_ptr)		\
> +{									\
> +	_Type *_G = *_ptr;						\
> +	if (_G)								\
> +		_Put(_G);						\
> +}

*loud forehead-smacking noise* __cleanup with inlines! I love it!

> [...]
> +#define void_scope(_type)							\
> +	for (struct { void_guard_##_type##_t guard; bool done; } _scope		\
> +	     __cleanup(void_guard_##_type##_cleanup) =				\
> +	     { .guard = void_guard_##_type##_init() }; !_scope.done;		\
> +	     _scope.done = true)

Heh, yes, that'll work for a forced scope, and I bet compiler
optimizations will collapse a bunch of this into a very clean execution
path.

> [...]
> +DEFINE_VOID_GUARD(preempt, preempt_disable(), preempt_enable())
> +DEFINE_VOID_GUARD(migrate, migrate_disable(), migrate_enable())
> [...]
> +DEFINE_VOID_GUARD(rcu, rcu_read_lock(), rcu_read_unlock())
> [...]
> +DEFINE_PTR_GUARD(put_task, struct task_struct, put_task_struct)
> [...]

It seems like there are some _really_ common code patterns you're
targeting here, and I bet we could do some mechanical treewide changes
with Coccinelle to remove a ton of boilerplate code.

I like this API, and the CPP isn't very obfuscated at all, compared to
some stuff we've already got in the tree. :)

-Kees

-- 
Kees Cook

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC][PATCH 2/2] sched: Use fancy new guards
  2023-05-26 16:27     ` Mathieu Desnoyers
  2023-05-26 16:43       ` Peter Zijlstra
@ 2023-05-26 17:08       ` Kees Cook
  2023-05-26 17:41         ` Miguel Ojeda
  1 sibling, 1 reply; 17+ messages in thread
From: Kees Cook @ 2023-05-26 17:08 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Greg KH, Peter Zijlstra, torvalds, pbonzini, linux-kernel, ojeda,
	ndesaulniers, mingo, will, longman, boqun.feng, juri.lelli,
	vincent.guittot, dietmar.eggemann, rostedt, bsegall, mgorman,
	bristot, vschneid, paulmck, frederic, quic_neeraju, joel, josh,
	jiangshanlai, qiang1.zhang, rcu, tj, tglx

On Fri, May 26, 2023 at 12:27:51PM -0400, Mathieu Desnoyers wrote:
> On 5/26/23 12:25, Greg KH wrote:
> > On Fri, May 26, 2023 at 05:05:51PM +0200, Peter Zijlstra wrote:
> > > Convert kernel/sched/core.c to use the fancy new guards to simplify
> > > the error paths.
> > 
> > That's slightly crazy...
> > 
> > I like the idea, but is this really correct:
> > 
> > 
> > > 
> > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > > ---
> > >   kernel/sched/core.c  | 1223 +++++++++++++++++++++++----------------------------
> > >   kernel/sched/sched.h |   39 +
> > >   2 files changed, 595 insertions(+), 667 deletions(-)
> > > 
> > > --- a/kernel/sched/core.c
> > > +++ b/kernel/sched/core.c
> > > @@ -1097,24 +1097,21 @@ int get_nohz_timer_target(void)
> > >   	hk_mask = housekeeping_cpumask(HK_TYPE_TIMER);
> > > -	rcu_read_lock();
> > > -	for_each_domain(cpu, sd) {
> > > -		for_each_cpu_and(i, sched_domain_span(sd), hk_mask) {
> > > -			if (cpu == i)
> > > -				continue;
> > > +	void_scope(rcu) {
> > > +		for_each_domain(cpu, sd) {
> > > +			for_each_cpu_and(i, sched_domain_span(sd), hk_mask) {
> > > +				if (cpu == i)
> > > +					continue;
> > > -			if (!idle_cpu(i)) {
> > > -				cpu = i;
> > > -				goto unlock;
> > > +				if (!idle_cpu(i))
> > > +					return i;
> > 
> > You can call return from within a "scope" and it will clean up properly?
> > 
> > I tried to read the cpp "mess" but couldn't figure out how to validate
> > this at all, have a set of tests for this somewhere?
> > 
> > Anyway, the naming is whack, but I don't have a proposed better name,
> > except you might want to put "scope_" as the prefix not the suffix, but
> > then that might look odd to, so who knows.
> 
> FWIW C++ has std::scoped_lock. So perhaps using a similar wording may help ?

Yeah, I like "scoped_*" and "guarded_*" for naming. IMO, it reads better.

-- 
Kees Cook

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC][PATCH 2/2] sched: Use fancy new guards
  2023-05-26 17:08       ` Kees Cook
@ 2023-05-26 17:41         ` Miguel Ojeda
  0 siblings, 0 replies; 17+ messages in thread
From: Miguel Ojeda @ 2023-05-26 17:41 UTC (permalink / raw)
  To: Kees Cook
  Cc: Mathieu Desnoyers, Greg KH, Peter Zijlstra, torvalds, pbonzini,
	linux-kernel, ojeda, ndesaulniers, mingo, will, longman,
	boqun.feng, juri.lelli, vincent.guittot, dietmar.eggemann,
	rostedt, bsegall, mgorman, bristot, vschneid, paulmck, frederic,
	quic_neeraju, joel, josh, jiangshanlai, qiang1.zhang, rcu, tj,
	tglx

On Fri, May 26, 2023 at 7:08 PM Kees Cook <keescook@chromium.org> wrote:
>
> > FWIW C++ has std::scoped_lock. So perhaps using a similar wording may help ?
>
> Yeah, I like "scoped_*" and "guarded_*" for naming. IMO, it reads better.

FWIW in the Rust side we called the general facility `ScopeGuard`,
then we also have lock `Guard`s and Rust stdlib has things like
`MutexGuard`.

Cheers,
Miguel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC][PATCH 1/2] locking: Introduce __cleanup__ based guards
  2023-05-26 15:05 ` [RFC][PATCH 1/2] locking: Introduce __cleanup__ based guards Peter Zijlstra
  2023-05-26 17:05   ` Kees Cook
@ 2023-05-26 18:22   ` Linus Torvalds
  2023-05-26 19:10     ` Peter Zijlstra
  2023-05-26 18:49   ` Waiman Long
  2 siblings, 1 reply; 17+ messages in thread
From: Linus Torvalds @ 2023-05-26 18:22 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: keescook, gregkh, pbonzini, linux-kernel, ojeda, ndesaulniers,
	mingo, will, longman, boqun.feng, juri.lelli, vincent.guittot,
	dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
	paulmck, frederic, quic_neeraju, joel, josh, mathieu.desnoyers,
	jiangshanlai, qiang1.zhang, rcu, tj, tglx

On Fri, May 26, 2023 at 8:23 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> The CPP is rather impenetrable -- but I'll attempt to write proper
> comments if/when people think this is worth pursuing.

Ugh.

It's not only impenetrable, it seems _unnecessarily_ so.

Yes, yes, 'for()' loops can only declare one type, and if you want
multiple typed variables you declare a struct that contains all the
types.

But you don't actually *need* multiple types.

Yes, you think you do, because you want to use that 'bool done' to
make the for-loop only execute once. Nasty limitation of the for
syntax.

But you can actually do the 'bool done' using the exact same type you
have for the guard - just make it a pointer instead, and use NULL for
"not done" and non-NULL for "done". It ends up acting exactly like a
boolean.

But that extra structure is only a detail. The real ugliness comes
from using different scoping macros.

And I think you don't actually need to have those different forms of
"scoped()" macros for different cases. I think you can just use
variable macro arguments.

IOW, something like this:

  #define variable_scope(type, enter, exit) \
        for (type *_done = NULL, _scope __cleanup(exit) = enter;
!_done; _done = (void *)8)

  #define scoped(type, init...) \
        variable_scope(scope_##type##_t, scope_##type##_init(init),
scope_##type##_cleanup)

and then you can do

        scoped (rcu) {
                ...
        }

and it will call "scope_rcu_init()" on entry, and
"scope_rcu_exit(_scope)" on exit.

And just doing

        scoped (mutex, mymutex) { ... }

will call "scope_mytex_init(mymutex)" on entry, and
"scope_mytex_exit(_scope)" on exit.

And if you just make the scope_##type##_init() functions return the
right values, it all works very naturally.

I think you can also do things like

        scoped(irqsave) { ... }
        scoped(irqoff) { ... }
        scoped(preempt) { ... }

very naturally. No need for that odd "one scope for 'void', one scope
for 'lock'" nonsense.

I dunno. I didn't *test* the above. Maybe you already tried something
like the above, and there's a reason why it doesn't work.

             Linus

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC][PATCH 1/2] locking: Introduce __cleanup__ based guards
  2023-05-26 17:05   ` Kees Cook
@ 2023-05-26 18:39     ` Miguel Ojeda
  0 siblings, 0 replies; 17+ messages in thread
From: Miguel Ojeda @ 2023-05-26 18:39 UTC (permalink / raw)
  To: Kees Cook
  Cc: Peter Zijlstra, torvalds, gregkh, pbonzini, linux-kernel, ojeda,
	ndesaulniers, mingo, will, longman, boqun.feng, juri.lelli,
	vincent.guittot, dietmar.eggemann, rostedt, bsegall, mgorman,
	bristot, vschneid, paulmck, frederic, quic_neeraju, joel, josh,
	mathieu.desnoyers, jiangshanlai, qiang1.zhang, rcu, tj, tglx

On Fri, May 26, 2023 at 7:05 PM Kees Cook <keescook@chromium.org> wrote:
>
> > --- a/include/linux/compiler_attributes.h
> > +++ b/include/linux/compiler_attributes.h
> > @@ -366,4 +366,6 @@
> >   */
> >  #define __fix_address noinline __noclone
> >
> > +#define __cleanup(func)                      __attribute__((__cleanup__(func)))
> > +
> >  #endif /* __LINUX_COMPILER_ATTRIBUTES_H */
>
> nitpick: sorting. This needs to be moved up alphabetically; the comment
> at the start of the file says:
>
>  ...
>  * This file is meant to be sorted (by actual attribute name,
>  * not by #define identifier). ...

+1, also please add:

    /*
     *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-cleanup-variable-attribute
     * clang: https://clang.llvm.org/docs/AttributeReference.html#cleanup
     */

Cheers,
Miguel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC][PATCH 1/2] locking: Introduce __cleanup__ based guards
  2023-05-26 15:05 ` [RFC][PATCH 1/2] locking: Introduce __cleanup__ based guards Peter Zijlstra
  2023-05-26 17:05   ` Kees Cook
  2023-05-26 18:22   ` Linus Torvalds
@ 2023-05-26 18:49   ` Waiman Long
  2023-05-26 18:58     ` Mathieu Desnoyers
  2 siblings, 1 reply; 17+ messages in thread
From: Waiman Long @ 2023-05-26 18:49 UTC (permalink / raw)
  To: Peter Zijlstra, torvalds, keescook, gregkh, pbonzini
  Cc: linux-kernel, ojeda, ndesaulniers, mingo, will, boqun.feng,
	juri.lelli, vincent.guittot, dietmar.eggemann, rostedt, bsegall,
	mgorman, bristot, vschneid, paulmck, frederic, quic_neeraju,
	joel, josh, mathieu.desnoyers, jiangshanlai, qiang1.zhang, rcu,
	tj, tglx

On 5/26/23 11:05, Peter Zijlstra wrote:
> Use __attribute__((__cleanup__(func))) to buid various guards:
>
>   - ptr_guard()
>   - void_guard() / void_scope()
>   - lock_guard() / lock_scope()
>   - double_lock_guard() / double_lock_scope()
>
> Where the _guard thingies are variables with scope-based cleanup and
> the _scope thingies are basically do-once for-loops with the same.
>
> The CPP is rather impenetrable -- but I'll attempt to write proper
> comments if/when people think this is worth pursuing.
>
> Actual usage in the next patch
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>   include/linux/compiler_attributes.h |    2
>   include/linux/irqflags.h            |    7 ++
>   include/linux/guards.h          |  118 ++++++++++++++++++++++++++++++++++++
>   include/linux/mutex.h               |    5 +
>   include/linux/preempt.h             |    4 +
>   include/linux/rcupdate.h            |    3
>   include/linux/sched/task.h          |    2
>   include/linux/spinlock.h            |   23 +++++++
>   8 files changed, 164 insertions(+)

That is an interesting idea and may help to simplify some of the common 
code patterns that we have in the kernel. The macros are a bit hard to 
read and understand though I thought I got a rough idea of what they are 
trying to do.

BTW, do we have a use case for double_lock_guard/double_lock_scope? I 
can envision a nested lock_scope inside a lock_scope, but taking 2 auto 
locks of the same type at init time and then unlock them at exit just 
doesn't make sense to me.

Cheers,
Longman


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC][PATCH 1/2] locking: Introduce __cleanup__ based guards
  2023-05-26 18:49   ` Waiman Long
@ 2023-05-26 18:58     ` Mathieu Desnoyers
  2023-05-26 19:04       ` Waiman Long
  0 siblings, 1 reply; 17+ messages in thread
From: Mathieu Desnoyers @ 2023-05-26 18:58 UTC (permalink / raw)
  To: Waiman Long, Peter Zijlstra, torvalds, keescook, gregkh, pbonzini
  Cc: linux-kernel, ojeda, ndesaulniers, mingo, will, boqun.feng,
	juri.lelli, vincent.guittot, dietmar.eggemann, rostedt, bsegall,
	mgorman, bristot, vschneid, paulmck, frederic, quic_neeraju,
	joel, josh, jiangshanlai, qiang1.zhang, rcu, tj, tglx

On 5/26/23 14:49, Waiman Long wrote:
[...]
> 
> BTW, do we have a use case for double_lock_guard/double_lock_scope? I 
> can envision a nested lock_scope inside a lock_scope, but taking 2 auto 
> locks of the same type at init time and then unlock them at exit just 
> doesn't make sense to me.

AFAIU taking both runqueue locks for source and destination runqueues on 
migration is one use-case for double_lock_guard/scope.

Thanks,

Mathieu


-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC][PATCH 1/2] locking: Introduce __cleanup__ based guards
  2023-05-26 18:58     ` Mathieu Desnoyers
@ 2023-05-26 19:04       ` Waiman Long
  0 siblings, 0 replies; 17+ messages in thread
From: Waiman Long @ 2023-05-26 19:04 UTC (permalink / raw)
  To: Mathieu Desnoyers, Peter Zijlstra, torvalds, keescook, gregkh, pbonzini
  Cc: linux-kernel, ojeda, ndesaulniers, mingo, will, boqun.feng,
	juri.lelli, vincent.guittot, dietmar.eggemann, rostedt, bsegall,
	mgorman, bristot, vschneid, paulmck, frederic, quic_neeraju,
	joel, josh, jiangshanlai, qiang1.zhang, rcu, tj, tglx

On 5/26/23 14:58, Mathieu Desnoyers wrote:
> On 5/26/23 14:49, Waiman Long wrote:
> [...]
>>
>> BTW, do we have a use case for double_lock_guard/double_lock_scope? I 
>> can envision a nested lock_scope inside a lock_scope, but taking 2 
>> auto locks of the same type at init time and then unlock them at exit 
>> just doesn't make sense to me.
>
> AFAIU taking both runqueue locks for source and destination runqueues 
> on migration is one use-case for double_lock_guard/scope.
>
I see. Thanks for the clarification. I forgot about that special case.

Cheers,
Longman


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC][PATCH 1/2] locking: Introduce __cleanup__ based guards
  2023-05-26 18:22   ` Linus Torvalds
@ 2023-05-26 19:10     ` Peter Zijlstra
  0 siblings, 0 replies; 17+ messages in thread
From: Peter Zijlstra @ 2023-05-26 19:10 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: keescook, gregkh, pbonzini, linux-kernel, ojeda, ndesaulniers,
	mingo, will, longman, boqun.feng, juri.lelli, vincent.guittot,
	dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
	paulmck, frederic, quic_neeraju, joel, josh, mathieu.desnoyers,
	jiangshanlai, rcu, tj, tglx

On Fri, May 26, 2023 at 11:22:36AM -0700, Linus Torvalds wrote:

> But you can actually do the 'bool done' using the exact same type you
> have for the guard - just make it a pointer instead, and use NULL for
> "not done" and non-NULL for "done". It ends up acting exactly like a
> boolean.

Damn; I've actually seen that and should've thought of it.

> IOW, something like this:
> 
>   #define variable_scope(type, enter, exit) \
>         for (type *_done = NULL, _scope __cleanup(exit) = enter;
> !_done; _done = (void *)8)
> 
>   #define scoped(type, init...) \
>         variable_scope(scope_##type##_t, scope_##type##_init(init),
> scope_##type##_cleanup)
> 

> I dunno. I didn't *test* the above. Maybe you already tried something
> like the above, and there's a reason why it doesn't work.

I have not; let me go try that. That does look *much* nicer.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC][PATCH 0/2] Lock and Pointer guards
  2023-05-26 15:05 [RFC][PATCH 0/2] Lock and Pointer guards Peter Zijlstra
  2023-05-26 15:05 ` [RFC][PATCH 1/2] locking: Introduce __cleanup__ based guards Peter Zijlstra
  2023-05-26 15:05 ` [RFC][PATCH 2/2] sched: Use fancy new guards Peter Zijlstra
@ 2023-05-29 11:29 ` Paolo Bonzini
  2 siblings, 0 replies; 17+ messages in thread
From: Paolo Bonzini @ 2023-05-29 11:29 UTC (permalink / raw)
  To: Peter Zijlstra, torvalds, keescook, gregkh
  Cc: linux-kernel, ojeda, ndesaulniers, mingo, will, longman,
	boqun.feng, juri.lelli, vincent.guittot, dietmar.eggemann,
	rostedt, bsegall, mgorman, bristot, vschneid, paulmck, frederic,
	quic_neeraju, joel, josh, mathieu.desnoyers, jiangshanlai,
	qiang1.zhang, rcu, tj, tglx

On 5/26/23 17:05, Peter Zijlstra wrote:
> My initial (failed) attempts tried to combine __cleanup, _Generic and
> __auto_type, but the compilers refused. I've also Googled around and found
> (among many others) the QEMU and Glib guards. However I don't like them because
> they end up relying on function pointers/indirect calls.
> 
> Hence the current pile of CPP hackery.. no indirect calls in sight.

QEMU's guards in practice also compiles down to direct calls, but yes 
that's only after the compiler inlines everything in the vtable.  I did 
it that way because the polymorphic locks already existed before, 
otherwise your solution with Linus's tweak for "bool" is as nice as it 
can be.  I like that it extends to irqoff and irqsave sections.

Paolo


^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2023-05-29 11:30 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-26 15:05 [RFC][PATCH 0/2] Lock and Pointer guards Peter Zijlstra
2023-05-26 15:05 ` [RFC][PATCH 1/2] locking: Introduce __cleanup__ based guards Peter Zijlstra
2023-05-26 17:05   ` Kees Cook
2023-05-26 18:39     ` Miguel Ojeda
2023-05-26 18:22   ` Linus Torvalds
2023-05-26 19:10     ` Peter Zijlstra
2023-05-26 18:49   ` Waiman Long
2023-05-26 18:58     ` Mathieu Desnoyers
2023-05-26 19:04       ` Waiman Long
2023-05-26 15:05 ` [RFC][PATCH 2/2] sched: Use fancy new guards Peter Zijlstra
2023-05-26 16:25   ` Greg KH
2023-05-26 16:27     ` Mathieu Desnoyers
2023-05-26 16:43       ` Peter Zijlstra
2023-05-26 17:08       ` Kees Cook
2023-05-26 17:41         ` Miguel Ojeda
2023-05-26 16:41     ` Peter Zijlstra
2023-05-29 11:29 ` [RFC][PATCH 0/2] Lock and Pointer guards Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.