* [PATCH 1/4] rcu/nocb: Pass a cpumask instead of a single CPU to offload/deoffload
2022-05-25 22:10 [PATCH 0/4] rcu/cpuset: Control RCU_NOCB offloading through cpusets Frederic Weisbecker
@ 2022-05-25 22:10 ` Frederic Weisbecker
2022-05-25 22:19 ` Frederic Weisbecker
2022-05-25 22:10 ` [PATCH 2/4] rcu/nocb: Prepare to change nocb cpumask from CPU-hotplug protected cpuset caller Frederic Weisbecker
` (2 subsequent siblings)
3 siblings, 1 reply; 28+ messages in thread
From: Frederic Weisbecker @ 2022-05-25 22:10 UTC (permalink / raw)
To: LKML
Cc: Frederic Weisbecker, Tejun Heo, Peter Zijlstra,
Paul E . McKenney, Paul Gortmaker, Johannes Weiner,
Marcelo Tosatti, Phil Auld, Zefan Li, Waiman Long,
Daniel Bristot de Oliveira, Nicolas Saenz Julienne, rcu
Currently the interface to toggle callbacks offloading state only takes
a single CPU per call. Now driving RCU NOCB through cpusets requires
to be able to change the offloading state of a whole set of CPUs.
To make it easier, extend the (de-)offloading interface to support a
cpumask.
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Zefan Li <lizefan.x@bytedance.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Phil Auld <pauld@redhat.com>
Cc: Nicolas Saenz Julienne <nsaenz@kernel.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
---
include/linux/rcupdate.h | 9 ++--
kernel/rcu/rcutorture.c | 4 +-
kernel/rcu/tree_nocb.h | 102 ++++++++++++++++++++++++++-------------
3 files changed, 76 insertions(+), 39 deletions(-)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index f9f75a3cfeb8..dc8bb7cc893a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -114,13 +114,14 @@ static inline void rcu_user_exit(void) { }
#ifdef CONFIG_RCU_NOCB_CPU
void rcu_init_nohz(void);
-int rcu_nocb_cpu_offload(int cpu);
-int rcu_nocb_cpu_deoffload(int cpu);
+int rcu_nocb_cpumask_update(struct cpumask *cpumask, bool offload);
void rcu_nocb_flush_deferred_wakeup(void);
#else /* #ifdef CONFIG_RCU_NOCB_CPU */
static inline void rcu_init_nohz(void) { }
-static inline int rcu_nocb_cpu_offload(int cpu) { return -EINVAL; }
-static inline int rcu_nocb_cpu_deoffload(int cpu) { return 0; }
+static inline int rcu_nocb_cpumask_update(struct cpumask *cpumask, bool offload)
+{
+ return -EINVAL;
+}
static inline void rcu_nocb_flush_deferred_wakeup(void) { }
#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index faf6b4c7a757..f912ff4869b3 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1887,10 +1887,10 @@ static int rcu_nocb_toggle(void *arg)
r = torture_random(&rand);
cpu = (r >> 4) % (maxcpu + 1);
if (r & 0x1) {
- rcu_nocb_cpu_offload(cpu);
+ rcu_nocb_cpumask_update(cpumask_of(cpu), true);
atomic_long_inc(&n_nocb_offload);
} else {
- rcu_nocb_cpu_deoffload(cpu);
+ rcu_nocb_cpumask_update(cpumask_of(cpu), false);
atomic_long_inc(&n_nocb_deoffload);
}
toggle_delay = torture_random(&rand) % toggle_fuzz + toggle_interval;
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index fa8e4f82e60c..428571ad11e3 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1084,29 +1084,23 @@ static long rcu_nocb_rdp_deoffload(void *arg)
return 0;
}
-int rcu_nocb_cpu_deoffload(int cpu)
+static int rcu_nocb_cpu_deoffload(int cpu)
{
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
int ret = 0;
- cpus_read_lock();
- mutex_lock(&rcu_state.barrier_mutex);
- if (rcu_rdp_is_offloaded(rdp)) {
- if (cpu_online(cpu)) {
- ret = work_on_cpu(cpu, rcu_nocb_rdp_deoffload, rdp);
- if (!ret)
- cpumask_clear_cpu(cpu, rcu_nocb_mask);
- } else {
- pr_info("NOCB: Can't CB-deoffload an offline CPU\n");
- ret = -EINVAL;
- }
- }
- mutex_unlock(&rcu_state.barrier_mutex);
- cpus_read_unlock();
+ if (cpu_is_offline(cpu))
+ return -EINVAL;
+
+ if (!rcu_rdp_is_offloaded(rdp))
+ return 0;
+
+ ret = work_on_cpu(cpu, rcu_nocb_rdp_deoffload, rdp);
+ if (!ret)
+ cpumask_clear_cpu(cpu, rcu_nocb_mask);
return ret;
}
-EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload);
static long rcu_nocb_rdp_offload(void *arg)
{
@@ -1117,12 +1111,6 @@ static long rcu_nocb_rdp_offload(void *arg)
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
- /*
- * For now we only support re-offload, ie: the rdp must have been
- * offloaded on boot first.
- */
- if (!rdp->nocb_gp_rdp)
- return -EINVAL;
if (WARN_ON_ONCE(!rdp_gp->nocb_gp_kthread))
return -EINVAL;
@@ -1169,29 +1157,77 @@ static long rcu_nocb_rdp_offload(void *arg)
return 0;
}
-int rcu_nocb_cpu_offload(int cpu)
+static int rcu_nocb_cpu_offload(int cpu)
{
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
- int ret = 0;
+ int ret;
+
+ if (cpu_is_offline(cpu))
+ return -EINVAL;
+
+ if (rcu_rdp_is_offloaded(rdp))
+ return 0;
+
+ ret = work_on_cpu(cpu, rcu_nocb_rdp_offload, rdp);
+ if (!ret)
+ cpumask_set_cpu(cpu, rcu_nocb_mask);
+
+ return ret;
+}
+
+int rcu_nocb_cpumask_update(struct cpumask *cpumask, bool offload)
+{
+ int cpu;
+ int err = 0;
+ int err_cpu;
+ cpumask_var_t saved_nocb_mask;
+
+ if (!alloc_cpumask_var(&saved_nocb_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_copy(saved_nocb_mask, rcu_nocb_mask);
cpus_read_lock();
mutex_lock(&rcu_state.barrier_mutex);
- if (!rcu_rdp_is_offloaded(rdp)) {
- if (cpu_online(cpu)) {
- ret = work_on_cpu(cpu, rcu_nocb_rdp_offload, rdp);
- if (!ret)
- cpumask_set_cpu(cpu, rcu_nocb_mask);
+ for_each_cpu(cpu, cpumask) {
+ if (offload) {
+ err = rcu_nocb_cpu_offload(cpu);
+ if (err < 0) {
+ err_cpu = cpu;
+ pr_err("NOCB: offload cpu %d failed (%d)\n", cpu, err);
+ break;
+ }
} else {
- pr_info("NOCB: Can't CB-offload an offline CPU\n");
- ret = -EINVAL;
+ err = rcu_nocb_cpu_deoffload(cpu);
+ if (err < 0) {
+ err_cpu = cpu;
+ pr_err("NOCB: deoffload cpu %d failed (%d)\n", cpu, err);
+ break;
+ }
}
}
+
+ /* Rollback in case of error */
+ if (err < 0) {
+ err_cpu = cpu;
+ for_each_cpu(cpu, cpumask) {
+ if (err_cpu == cpu)
+ break;
+ if (cpumask_test_cpu(cpu, saved_nocb_mask))
+ WARN_ON_ONCE(rcu_nocb_cpu_offload(cpu));
+ else
+ WARN_ON_ONCE(rcu_nocb_cpu_deoffload(cpu));
+ }
+ }
+
mutex_unlock(&rcu_state.barrier_mutex);
cpus_read_unlock();
- return ret;
+ free_cpumask_var(saved_nocb_mask);
+
+ return err;
}
-EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
+EXPORT_SYMBOL_GPL(rcu_nocb_cpumask_update);
void __init rcu_init_nohz(void)
{
--
2.25.1
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: [PATCH 1/4] rcu/nocb: Pass a cpumask instead of a single CPU to offload/deoffload
2022-05-25 22:10 ` [PATCH 1/4] rcu/nocb: Pass a cpumask instead of a single CPU to offload/deoffload Frederic Weisbecker
@ 2022-05-25 22:19 ` Frederic Weisbecker
2022-05-25 22:42 ` Paul E. McKenney
0 siblings, 1 reply; 28+ messages in thread
From: Frederic Weisbecker @ 2022-05-25 22:19 UTC (permalink / raw)
To: LKML
Cc: Tejun Heo, Peter Zijlstra, Paul E . McKenney, Paul Gortmaker,
Johannes Weiner, Marcelo Tosatti, Phil Auld, Zefan Li,
Waiman Long, Daniel Bristot de Oliveira, Nicolas Saenz Julienne,
rcu
On Thu, May 26, 2022 at 12:10:52AM +0200, Frederic Weisbecker wrote:
> @@ -1117,12 +1111,6 @@ static long rcu_nocb_rdp_offload(void *arg)
> struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
>
> WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
> - /*
> - * For now we only support re-offload, ie: the rdp must have been
> - * offloaded on boot first.
> - */
> - if (!rdp->nocb_gp_rdp)
> - return -EINVAL;
>
> if (WARN_ON_ONCE(!rdp_gp->nocb_gp_kthread))
> return -EINVAL;
And why did I remove this critical check? I have no answer...
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/4] rcu/nocb: Pass a cpumask instead of a single CPU to offload/deoffload
2022-05-25 22:19 ` Frederic Weisbecker
@ 2022-05-25 22:42 ` Paul E. McKenney
0 siblings, 0 replies; 28+ messages in thread
From: Paul E. McKenney @ 2022-05-25 22:42 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: LKML, Tejun Heo, Peter Zijlstra, Paul Gortmaker, Johannes Weiner,
Marcelo Tosatti, Phil Auld, Zefan Li, Waiman Long,
Daniel Bristot de Oliveira, Nicolas Saenz Julienne, rcu
On Thu, May 26, 2022 at 12:19:17AM +0200, Frederic Weisbecker wrote:
> On Thu, May 26, 2022 at 12:10:52AM +0200, Frederic Weisbecker wrote:
> > @@ -1117,12 +1111,6 @@ static long rcu_nocb_rdp_offload(void *arg)
> > struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
> >
> > WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
> > - /*
> > - * For now we only support re-offload, ie: the rdp must have been
> > - * offloaded on boot first.
> > - */
> > - if (!rdp->nocb_gp_rdp)
> > - return -EINVAL;
> >
> > if (WARN_ON_ONCE(!rdp_gp->nocb_gp_kthread))
> > return -EINVAL;
>
> And why did I remove this critical check? I have no answer...
Me, I was going to ask if rcutorture should (de)offload multiple
CPUs in one go... ;-)
Thanx, Paul
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH 2/4] rcu/nocb: Prepare to change nocb cpumask from CPU-hotplug protected cpuset caller
2022-05-25 22:10 [PATCH 0/4] rcu/cpuset: Control RCU_NOCB offloading through cpusets Frederic Weisbecker
2022-05-25 22:10 ` [PATCH 1/4] rcu/nocb: Pass a cpumask instead of a single CPU to offload/deoffload Frederic Weisbecker
@ 2022-05-25 22:10 ` Frederic Weisbecker
2022-05-25 22:10 ` [PATCH 3/4] sched/isolation: Infrastructure to support rcu nocb cpumask changes Frederic Weisbecker
2022-05-25 22:10 ` [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions Frederic Weisbecker
3 siblings, 0 replies; 28+ messages in thread
From: Frederic Weisbecker @ 2022-05-25 22:10 UTC (permalink / raw)
To: LKML
Cc: Frederic Weisbecker, Tejun Heo, Peter Zijlstra,
Paul E . McKenney, Paul Gortmaker, Johannes Weiner,
Marcelo Tosatti, Phil Auld, Zefan Li, Waiman Long,
Daniel Bristot de Oliveira, Nicolas Saenz Julienne, rcu
cpusets is going to use the NOCB (de-)offloading interface while
holding hotplug lock. Therefore pull out the responsibility of protecting
against concurrent CPU-hotplug changes to the callers of
rcu_nocb_cpumask_update().
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Zefan Li <lizefan.x@bytedance.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Phil Auld <pauld@redhat.com>
Cc: Nicolas Saenz Julienne <nsaenz@kernel.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
---
kernel/rcu/rcutorture.c | 2 ++
kernel/rcu/tree_nocb.h | 4 ++--
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index f912ff4869b3..5a3029550e83 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1886,6 +1886,7 @@ static int rcu_nocb_toggle(void *arg)
do {
r = torture_random(&rand);
cpu = (r >> 4) % (maxcpu + 1);
+ cpus_read_lock();
if (r & 0x1) {
rcu_nocb_cpumask_update(cpumask_of(cpu), true);
atomic_long_inc(&n_nocb_offload);
@@ -1893,6 +1894,7 @@ static int rcu_nocb_toggle(void *arg)
rcu_nocb_cpumask_update(cpumask_of(cpu), false);
atomic_long_inc(&n_nocb_deoffload);
}
+ cpus_read_unlock();
toggle_delay = torture_random(&rand) % toggle_fuzz + toggle_interval;
set_current_state(TASK_INTERRUPTIBLE);
schedule_hrtimeout(&toggle_delay, HRTIMER_MODE_REL);
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 428571ad11e3..6396af6c765a 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1182,12 +1182,13 @@ int rcu_nocb_cpumask_update(struct cpumask *cpumask, bool offload)
int err_cpu;
cpumask_var_t saved_nocb_mask;
+ lockdep_assert_cpus_held();
+
if (!alloc_cpumask_var(&saved_nocb_mask, GFP_KERNEL))
return -ENOMEM;
cpumask_copy(saved_nocb_mask, rcu_nocb_mask);
- cpus_read_lock();
mutex_lock(&rcu_state.barrier_mutex);
for_each_cpu(cpu, cpumask) {
if (offload) {
@@ -1221,7 +1222,6 @@ int rcu_nocb_cpumask_update(struct cpumask *cpumask, bool offload)
}
mutex_unlock(&rcu_state.barrier_mutex);
- cpus_read_unlock();
free_cpumask_var(saved_nocb_mask);
--
2.25.1
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 3/4] sched/isolation: Infrastructure to support rcu nocb cpumask changes
2022-05-25 22:10 [PATCH 0/4] rcu/cpuset: Control RCU_NOCB offloading through cpusets Frederic Weisbecker
2022-05-25 22:10 ` [PATCH 1/4] rcu/nocb: Pass a cpumask instead of a single CPU to offload/deoffload Frederic Weisbecker
2022-05-25 22:10 ` [PATCH 2/4] rcu/nocb: Prepare to change nocb cpumask from CPU-hotplug protected cpuset caller Frederic Weisbecker
@ 2022-05-25 22:10 ` Frederic Weisbecker
2022-08-19 7:12 ` Tobias Huschle
2022-05-25 22:10 ` [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions Frederic Weisbecker
3 siblings, 1 reply; 28+ messages in thread
From: Frederic Weisbecker @ 2022-05-25 22:10 UTC (permalink / raw)
To: LKML
Cc: Frederic Weisbecker, Tejun Heo, Peter Zijlstra,
Paul E . McKenney, Paul Gortmaker, Johannes Weiner,
Marcelo Tosatti, Phil Auld, Zefan Li, Waiman Long,
Daniel Bristot de Oliveira, Nicolas Saenz Julienne, rcu
Provide a minimal infrastructure to change the housekeeping cpumasks.
For now only RCU NOCB cpumask is handled.
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Zefan Li <lizefan.x@bytedance.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Phil Auld <pauld@redhat.com>
Cc: Nicolas Saenz Julienne <nsaenz@kernel.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
---
include/linux/sched/isolation.h | 13 +++++++++++
kernel/sched/isolation.c | 38 +++++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+)
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index 8c15abd67aed..c6d0e3f83a20 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -25,6 +25,8 @@ extern const struct cpumask *housekeeping_cpumask(enum hk_type type);
extern bool housekeeping_enabled(enum hk_type type);
extern void housekeeping_affine(struct task_struct *t, enum hk_type type);
extern bool housekeeping_test_cpu(int cpu, enum hk_type type);
+extern int housekeeping_cpumask_set(struct cpumask *cpumask, enum hk_type type);
+extern int housekeeping_cpumask_clear(struct cpumask *cpumask, enum hk_type type);
extern void __init housekeeping_init(void);
#else
@@ -46,6 +48,17 @@ static inline bool housekeeping_enabled(enum hk_type type)
static inline void housekeeping_affine(struct task_struct *t,
enum hk_type type) { }
+
+static inline int housekeeping_cpumask_set(struct cpumask *cpumask, enum hk_type type)
+{
+ return -EINVAL;
+}
+
+static inline int housekeeping_cpumask_clear(struct cpumask *cpumask, enum hk_type type)
+{
+ return -EINVAL;
+}
+
static inline void housekeeping_init(void) { }
#endif /* CONFIG_CPU_ISOLATION */
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 373d42c707bc..ab4aba795c01 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -79,6 +79,44 @@ bool housekeeping_test_cpu(int cpu, enum hk_type type)
}
EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
+static int housekeeping_cpumask_update(struct cpumask *cpumask,
+ enum hk_type type, bool on)
+{
+ int err;
+
+ switch (type) {
+ case HK_TYPE_RCU:
+ err = rcu_nocb_cpumask_update(cpumask, on);
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ if (err >= 0) {
+ if (on) {
+ cpumask_or(housekeeping.cpumasks[type],
+ housekeeping.cpumasks[type],
+ cpumask);
+ } else {
+ cpumask_andnot(housekeeping.cpumasks[type],
+ housekeeping.cpumasks[type],
+ cpumask);
+ }
+ }
+
+ return err;
+}
+
+int housekeeping_cpumask_set(struct cpumask *cpumask, enum hk_type type)
+{
+ return housekeeping_cpumask_update(cpumask, type, true);
+}
+
+int housekeeping_cpumask_clear(struct cpumask *cpumask, enum hk_type type)
+{
+ return housekeeping_cpumask_update(cpumask, type, false);
+}
+
void __init housekeeping_init(void)
{
enum hk_type type;
--
2.25.1
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: [PATCH 3/4] sched/isolation: Infrastructure to support rcu nocb cpumask changes
2022-05-25 22:10 ` [PATCH 3/4] sched/isolation: Infrastructure to support rcu nocb cpumask changes Frederic Weisbecker
@ 2022-08-19 7:12 ` Tobias Huschle
0 siblings, 0 replies; 28+ messages in thread
From: Tobias Huschle @ 2022-08-19 7:12 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: LKML, Tejun Heo, Peter Zijlstra, Paul E . McKenney,
Paul Gortmaker, Johannes Weiner, Marcelo Tosatti, Phil Auld,
Zefan Li, Waiman Long, Daniel Bristot de Oliveira,
Nicolas Saenz Julienne, rcu
On 2022-05-26 00:10, Frederic Weisbecker wrote:
> Provide a minimal infrastructure to change the housekeeping cpumasks.
> For now only RCU NOCB cpumask is handled.
>
> Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
> Cc: Zefan Li <lizefan.x@bytedance.com>
> Cc: Tejun Heo <tj@kernel.org>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Paul E. McKenney <paulmck@kernel.org>
> Cc: Phil Auld <pauld@redhat.com>
> Cc: Nicolas Saenz Julienne <nsaenz@kernel.org>
> Cc: Marcelo Tosatti <mtosatti@redhat.com>
> Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
> Cc: Waiman Long <longman@redhat.com>
> Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> ---
> include/linux/sched/isolation.h | 13 +++++++++++
> kernel/sched/isolation.c | 38 +++++++++++++++++++++++++++++++++
> 2 files changed, 51 insertions(+)
>
> diff --git a/include/linux/sched/isolation.h
> b/include/linux/sched/isolation.h
> index 8c15abd67aed..c6d0e3f83a20 100644
> --- a/include/linux/sched/isolation.h
> +++ b/include/linux/sched/isolation.h
> @@ -25,6 +25,8 @@ extern const struct cpumask
> *housekeeping_cpumask(enum hk_type type);
> extern bool housekeeping_enabled(enum hk_type type);
> extern void housekeeping_affine(struct task_struct *t, enum hk_type
> type);
> extern bool housekeeping_test_cpu(int cpu, enum hk_type type);
> +extern int housekeeping_cpumask_set(struct cpumask *cpumask, enum
> hk_type type);
> +extern int housekeeping_cpumask_clear(struct cpumask *cpumask, enum
> hk_type type);
> extern void __init housekeeping_init(void);
>
> #else
> @@ -46,6 +48,17 @@ static inline bool housekeeping_enabled(enum hk_type
> type)
>
> static inline void housekeeping_affine(struct task_struct *t,
> enum hk_type type) { }
> +
> +static inline int housekeeping_cpumask_set(struct cpumask *cpumask,
> enum hk_type type)
> +{
> + return -EINVAL;
> +}
> +
> +static inline int housekeeping_cpumask_clear(struct cpumask *cpumask,
> enum hk_type type)
> +{
> + return -EINVAL;
> +}
> +
> static inline void housekeeping_init(void) { }
> #endif /* CONFIG_CPU_ISOLATION */
>
> diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
> index 373d42c707bc..ab4aba795c01 100644
> --- a/kernel/sched/isolation.c
> +++ b/kernel/sched/isolation.c
> @@ -79,6 +79,44 @@ bool housekeeping_test_cpu(int cpu, enum hk_type
> type)
> }
> EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
>
> +static int housekeeping_cpumask_update(struct cpumask *cpumask,
> + enum hk_type type, bool on)
> +{
> + int err;
> +
> + switch (type) {
> + case HK_TYPE_RCU:
> + err = rcu_nocb_cpumask_update(cpumask, on);
> + break;
> + default:
> + err = -EINVAL;
> + }
> +
> + if (err >= 0) {
> + if (on) {
> + cpumask_or(housekeeping.cpumasks[type],
> + housekeeping.cpumasks[type],
> + cpumask);
> + } else {
> + cpumask_andnot(housekeeping.cpumasks[type],
> + housekeeping.cpumasks[type],
> + cpumask);
> + }
> + }
> +
> + return err;
> +}
> +
> +int housekeeping_cpumask_set(struct cpumask *cpumask, enum hk_type
> type)
> +{
> + return housekeeping_cpumask_update(cpumask, type, true);
> +}
> +
> +int housekeeping_cpumask_clear(struct cpumask *cpumask, enum hk_type
> type)
> +{
> + return housekeeping_cpumask_update(cpumask, type, false);
> +}
> +
> void __init housekeeping_init(void)
> {
> enum hk_type type;
Just stumbled upon this patch.
I would be interested to have a way to fully isolate CPUs during
runtime.
I tried some things similar to the patch above and the results looked
promising (removing certain CPUs from the housekeeping cpumasks during
runtime).
Offlining them might be too expensive and also go a bit too far, as I
might want to
be able to reactivate these CPUs quickly.
What kind of problems would you expect when making the housekeeping
masks editable?
Not just rcu_nocb, but all of them.
--
Tobias
^ permalink raw reply [flat|nested] 28+ messages in thread
* [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-25 22:10 [PATCH 0/4] rcu/cpuset: Control RCU_NOCB offloading through cpusets Frederic Weisbecker
` (2 preceding siblings ...)
2022-05-25 22:10 ` [PATCH 3/4] sched/isolation: Infrastructure to support rcu nocb cpumask changes Frederic Weisbecker
@ 2022-05-25 22:10 ` Frederic Weisbecker
2022-05-26 18:21 ` Tejun Heo
3 siblings, 1 reply; 28+ messages in thread
From: Frederic Weisbecker @ 2022-05-25 22:10 UTC (permalink / raw)
To: LKML
Cc: Frederic Weisbecker, Tejun Heo, Peter Zijlstra,
Paul E . McKenney, Paul Gortmaker, Johannes Weiner,
Marcelo Tosatti, Phil Auld, Zefan Li, Waiman Long,
Daniel Bristot de Oliveira, Nicolas Saenz Julienne, rcu
Introduce a new "isolation.rcu_nocb" file within a cgroup2/cpuset
directory which provides support for a set of CPUs to either enable ("1")
or disable ("0") RCU callbacks offloading (aka. RCU NOCB). This can
overwrite previous boot settings towards "rcu_nocbs=" kernel parameter.
The file is only writeable on "root" type partitions to exclude any
overlap. The deepest root type partition has the highest priority.
This means that given the following setting:
Top cpuset (CPUs: 0-7)
cpuset.isolation.rcu_nocb = 0
|
|
Subdirectory A (CPUs: 5-7)
cpuset.cpus.partition = root
cpuset.isolation.rcu_nocb = 0
|
|
Subdirectory B (CPUs: 7)
cpuset.cpus.partition = root
cpuset.isolation.rcu_nocb = 1
the result is that only CPU 7 is in rcu_nocb mode.
Note that "rcu_nocbs" kernel parameter must be passed on boot, even
without a cpulist, so that nocb support is enabled.
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Zefan Li <lizefan.x@bytedance.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Phil Auld <pauld@redhat.com>
Cc: Nicolas Saenz Julienne <nsaenz@kernel.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
---
kernel/cgroup/cpuset.c | 95 ++++++++++++++++++++++++++++++++++++++++--
1 file changed, 92 insertions(+), 3 deletions(-)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 9390bfd9f1cd..2d9f019bb590 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -225,6 +225,7 @@ typedef enum {
CS_SCHED_LOAD_BALANCE,
CS_SPREAD_PAGE,
CS_SPREAD_SLAB,
+ CS_RCU_NOCB,
} cpuset_flagbits_t;
/* convenient tests for these bits */
@@ -268,6 +269,11 @@ static inline int is_spread_slab(const struct cpuset *cs)
return test_bit(CS_SPREAD_SLAB, &cs->flags);
}
+static inline int is_rcu_nocb(const struct cpuset *cs)
+{
+ return test_bit(CS_RCU_NOCB, &cs->flags);
+}
+
static inline int is_partition_root(const struct cpuset *cs)
{
return cs->partition_root_state > 0;
@@ -590,6 +596,62 @@ static inline void free_cpuset(struct cpuset *cs)
kfree(cs);
}
+#ifdef CONFIG_RCU_NOCB_CPU
+static int cpuset_rcu_nocb_apply(struct cpuset *root)
+{
+ int err;
+
+ if (is_rcu_nocb(root))
+ err = housekeeping_cpumask_set(root->effective_cpus, HK_TYPE_RCU);
+ else
+ err = housekeeping_cpumask_clear(root->effective_cpus, HK_TYPE_RCU);
+
+ return err;
+}
+
+static int cpuset_rcu_nocb_update(struct cpuset *cur, struct cpuset *trialcs)
+{
+ struct cgroup_subsys_state *des_css;
+ struct cpuset *des;
+ int err;
+
+ if (cur->partition_root_state != PRS_ENABLED)
+ return -EINVAL;
+
+ err = cpuset_rcu_nocb_apply(trialcs);
+ if (err < 0)
+ return err;
+
+ rcu_read_lock();
+ cpuset_for_each_descendant_pre(des, des_css, cur) {
+ if (des == cur)
+ continue;
+ if (des->partition_root_state == PRS_ENABLED)
+ break;
+ spin_lock_irq(&callback_lock);
+ if (is_rcu_nocb(trialcs))
+ set_bit(CS_RCU_NOCB, &des->flags);
+ else
+ clear_bit(CS_RCU_NOCB, &des->flags);
+ spin_unlock_irq(&callback_lock);
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+#else
+static inline int cpuset_rcu_nocb_apply(struct cpuset *root)
+{
+ return 0;
+}
+
+static inline int cpuset_rcu_nocb_update(struct cpuset *cur,
+ struct cpuset *trialcs)
+{
+ return 0;
+}
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
+
/*
* validate_change_legacy() - Validate conditions specific to legacy (v1)
* behavior.
@@ -1655,6 +1717,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
if (cs->partition_root_state) {
struct cpuset *parent = parent_cs(cs);
+ WARN_ON_ONCE(cpuset_rcu_nocb_apply(parent) < 0);
+ WARN_ON_ONCE(cpuset_rcu_nocb_apply(cs) < 0);
+
/*
* For partition root, update the cpumasks of sibling
* cpusets if they use parent's effective_cpus.
@@ -2012,6 +2077,12 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
|| (is_spread_page(cs) != is_spread_page(trialcs)));
+ if (is_rcu_nocb(cs) != is_rcu_nocb(trialcs)) {
+ err = cpuset_rcu_nocb_update(cs, trialcs);
+ if (err < 0)
+ goto out;
+ }
+
spin_lock_irq(&callback_lock);
cs->flags = trialcs->flags;
spin_unlock_irq(&callback_lock);
@@ -2365,6 +2436,7 @@ typedef enum {
FILE_MEMORY_PRESSURE,
FILE_SPREAD_PAGE,
FILE_SPREAD_SLAB,
+ FILE_RCU_NOCB,
} cpuset_filetype_t;
static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
@@ -2406,6 +2478,9 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
case FILE_SPREAD_SLAB:
retval = update_flag(CS_SPREAD_SLAB, cs, val);
break;
+ case FILE_RCU_NOCB:
+ retval = update_flag(CS_RCU_NOCB, cs, val);
+ break;
default:
retval = -EINVAL;
break;
@@ -2573,6 +2648,8 @@ static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
return is_spread_page(cs);
case FILE_SPREAD_SLAB:
return is_spread_slab(cs);
+ case FILE_RCU_NOCB:
+ return is_rcu_nocb(cs);
default:
BUG();
}
@@ -2803,7 +2880,14 @@ static struct cftype dfl_files[] = {
.private = FILE_SUBPARTS_CPULIST,
.flags = CFTYPE_DEBUG,
},
-
+#ifdef CONFIG_RCU_NOCB_CPU
+ {
+ .name = "isolation.rcu_nocb",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_RCU_NOCB,
+ },
+#endif
{ } /* terminate */
};
@@ -2861,6 +2945,8 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
set_bit(CS_SPREAD_PAGE, &cs->flags);
if (is_spread_slab(parent))
set_bit(CS_SPREAD_SLAB, &cs->flags);
+ if (is_rcu_nocb(parent))
+ set_bit(CS_RCU_NOCB, &cs->flags);
cpuset_inc();
@@ -3227,12 +3313,15 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
if (mems_updated)
check_insane_mems_config(&new_mems);
- if (is_in_v2_mode())
+ if (is_in_v2_mode()) {
hotplug_update_tasks(cs, &new_cpus, &new_mems,
cpus_updated, mems_updated);
- else
+ if (cpus_updated)
+ WARN_ON_ONCE(cpuset_rcu_nocb_apply(cs) < 0);
+ } else {
hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems,
cpus_updated, mems_updated);
+ }
percpu_up_write(&cpuset_rwsem);
}
--
2.25.1
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-25 22:10 ` [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions Frederic Weisbecker
@ 2022-05-26 18:21 ` Tejun Heo
2022-05-26 22:51 ` Frederic Weisbecker
0 siblings, 1 reply; 28+ messages in thread
From: Tejun Heo @ 2022-05-26 18:21 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: LKML, Peter Zijlstra, Paul E . McKenney, Paul Gortmaker,
Johannes Weiner, Marcelo Tosatti, Phil Auld, Zefan Li,
Waiman Long, Daniel Bristot de Oliveira, Nicolas Saenz Julienne,
rcu
On Thu, May 26, 2022 at 12:10:55AM +0200, Frederic Weisbecker wrote:
> Introduce a new "isolation.rcu_nocb" file within a cgroup2/cpuset
> directory which provides support for a set of CPUs to either enable ("1")
> or disable ("0") RCU callbacks offloading (aka. RCU NOCB). This can
> overwrite previous boot settings towards "rcu_nocbs=" kernel parameter.
>
> The file is only writeable on "root" type partitions to exclude any
> overlap. The deepest root type partition has the highest priority.
> This means that given the following setting:
>
> Top cpuset (CPUs: 0-7)
> cpuset.isolation.rcu_nocb = 0
> |
> |
> Subdirectory A (CPUs: 5-7)
> cpuset.cpus.partition = root
> cpuset.isolation.rcu_nocb = 0
> |
> |
> Subdirectory B (CPUs: 7)
> cpuset.cpus.partition = root
> cpuset.isolation.rcu_nocb = 1
>
> the result is that only CPU 7 is in rcu_nocb mode.
>
> Note that "rcu_nocbs" kernel parameter must be passed on boot, even
> without a cpulist, so that nocb support is enabled.
Does it even make sense to make this hierarchical? What's wrong with a
cpumask under sys/ or proc/?
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-26 18:21 ` Tejun Heo
@ 2022-05-26 22:51 ` Frederic Weisbecker
2022-05-26 23:02 ` Tejun Heo
0 siblings, 1 reply; 28+ messages in thread
From: Frederic Weisbecker @ 2022-05-26 22:51 UTC (permalink / raw)
To: Tejun Heo
Cc: LKML, Peter Zijlstra, Paul E . McKenney, Paul Gortmaker,
Johannes Weiner, Marcelo Tosatti, Phil Auld, Zefan Li,
Waiman Long, Daniel Bristot de Oliveira, Nicolas Saenz Julienne,
rcu
On Thu, May 26, 2022 at 08:21:13AM -1000, Tejun Heo wrote:
> On Thu, May 26, 2022 at 12:10:55AM +0200, Frederic Weisbecker wrote:
> > Introduce a new "isolation.rcu_nocb" file within a cgroup2/cpuset
> > directory which provides support for a set of CPUs to either enable ("1")
> > or disable ("0") RCU callbacks offloading (aka. RCU NOCB). This can
> > overwrite previous boot settings towards "rcu_nocbs=" kernel parameter.
> >
> > The file is only writeable on "root" type partitions to exclude any
> > overlap. The deepest root type partition has the highest priority.
> > This means that given the following setting:
> >
> > Top cpuset (CPUs: 0-7)
> > cpuset.isolation.rcu_nocb = 0
> > |
> > |
> > Subdirectory A (CPUs: 5-7)
> > cpuset.cpus.partition = root
> > cpuset.isolation.rcu_nocb = 0
> > |
> > |
> > Subdirectory B (CPUs: 7)
> > cpuset.cpus.partition = root
> > cpuset.isolation.rcu_nocb = 1
> >
> > the result is that only CPU 7 is in rcu_nocb mode.
> >
> > Note that "rcu_nocbs" kernel parameter must be passed on boot, even
> > without a cpulist, so that nocb support is enabled.
>
> Does it even make sense to make this hierarchical? What's wrong with a
> cpumask under sys/ or proc/?
I'm usually told that cpusets is the current place where CPU attributes are
supposed to go. I personally don't mind much /sys either even though cpusets
looks like a more flexible way to partition CPUs with properties and tasks
placement altogether...
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-26 22:51 ` Frederic Weisbecker
@ 2022-05-26 23:02 ` Tejun Heo
2022-05-27 0:28 ` Waiman Long
0 siblings, 1 reply; 28+ messages in thread
From: Tejun Heo @ 2022-05-26 23:02 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: LKML, Peter Zijlstra, Paul E . McKenney, Paul Gortmaker,
Johannes Weiner, Marcelo Tosatti, Phil Auld, Zefan Li,
Waiman Long, Daniel Bristot de Oliveira, Nicolas Saenz Julienne,
rcu
On Fri, May 27, 2022 at 12:51:41AM +0200, Frederic Weisbecker wrote:
> > Does it even make sense to make this hierarchical? What's wrong with a
> > cpumask under sys/ or proc/?
>
> I'm usually told that cpusets is the current place where CPU attributes are
> supposed to go. I personally don't mind much /sys either even though cpusets
> looks like a more flexible way to partition CPUs with properties and tasks
> placement altogether...
Yeah, I mean, if it's hierarchical, it's the right place but I have a hard
time seeing anything hierarchical with this one. Somebody just has to know
which cpus are up for rcu processing and which aren't. Waiman, what do you
think?
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-26 23:02 ` Tejun Heo
@ 2022-05-27 0:28 ` Waiman Long
2022-05-27 0:37 ` Tejun Heo
0 siblings, 1 reply; 28+ messages in thread
From: Waiman Long @ 2022-05-27 0:28 UTC (permalink / raw)
To: Tejun Heo, Frederic Weisbecker
Cc: LKML, Peter Zijlstra, Paul E . McKenney, Paul Gortmaker,
Johannes Weiner, Marcelo Tosatti, Phil Auld, Zefan Li,
Daniel Bristot de Oliveira, Nicolas Saenz Julienne, rcu
On 5/26/22 19:02, Tejun Heo wrote:
> On Fri, May 27, 2022 at 12:51:41AM +0200, Frederic Weisbecker wrote:
>>> Does it even make sense to make this hierarchical? What's wrong with a
>>> cpumask under sys/ or proc/?
>> I'm usually told that cpusets is the current place where CPU attributes are
>> supposed to go. I personally don't mind much /sys either even though cpusets
>> looks like a more flexible way to partition CPUs with properties and tasks
>> placement altogether...
> Yeah, I mean, if it's hierarchical, it's the right place but I have a hard
> time seeing anything hierarchical with this one. Somebody just has to know
> which cpus are up for rcu processing and which aren't. Waiman, what do you
> think?
I am thinking along the line that it will not be hierarchical. However,
cpuset can be useful if we want to have multiple isolated partitions
underneath the top cpuset with different isolation attributes, but no
more sub-isolated partition with sub-attributes underneath them. IOW, we
can only set them at the first level under top_cpuset. Will that be useful?
Cheers,
Longman
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-27 0:28 ` Waiman Long
@ 2022-05-27 0:37 ` Tejun Heo
2022-05-27 8:30 ` Juri Lelli
0 siblings, 1 reply; 28+ messages in thread
From: Tejun Heo @ 2022-05-27 0:37 UTC (permalink / raw)
To: Waiman Long
Cc: Frederic Weisbecker, LKML, Peter Zijlstra, Paul E . McKenney,
Paul Gortmaker, Johannes Weiner, Marcelo Tosatti, Phil Auld,
Zefan Li, Daniel Bristot de Oliveira, Nicolas Saenz Julienne,
rcu
On Thu, May 26, 2022 at 08:28:43PM -0400, Waiman Long wrote:
> I am thinking along the line that it will not be hierarchical. However,
> cpuset can be useful if we want to have multiple isolated partitions
> underneath the top cpuset with different isolation attributes, but no more
> sub-isolated partition with sub-attributes underneath them. IOW, we can only
> set them at the first level under top_cpuset. Will that be useful?
At that point, I'd just prefer to have it under /proc or /sys.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-27 0:37 ` Tejun Heo
@ 2022-05-27 8:30 ` Juri Lelli
2022-05-27 8:45 ` Tejun Heo
2022-05-28 14:24 ` Peter Zijlstra
0 siblings, 2 replies; 28+ messages in thread
From: Juri Lelli @ 2022-05-27 8:30 UTC (permalink / raw)
To: Tejun Heo
Cc: Waiman Long, Frederic Weisbecker, LKML, Peter Zijlstra,
Paul E . McKenney, Paul Gortmaker, Johannes Weiner,
Marcelo Tosatti, Phil Auld, Zefan Li, Daniel Bristot de Oliveira,
Nicolas Saenz Julienne, rcu
Hi,
On 26/05/22 14:37, Tejun Heo wrote:
> On Thu, May 26, 2022 at 08:28:43PM -0400, Waiman Long wrote:
> > I am thinking along the line that it will not be hierarchical. However,
> > cpuset can be useful if we want to have multiple isolated partitions
> > underneath the top cpuset with different isolation attributes, but no more
> > sub-isolated partition with sub-attributes underneath them. IOW, we can only
> > set them at the first level under top_cpuset. Will that be useful?
>
> At that point, I'd just prefer to have it under /proc or /sys.
FWIW, I was under the impression that this would nicely fit along the
side of other feaures towards implenting dynamic isolation of CPUs (say
https://lore.kernel.org/lkml/20220510153413.400020-1-longman@redhat.com/
for example). Wouldn't be awkward to have to poke different places to
achieve isolation at runtime?
Also, I wonder if a proc/sys interface might be problematic for certain
middleware that is substantially based on using cgroups. I'll try to ask
around. :)
Best,
Juri
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-27 8:30 ` Juri Lelli
@ 2022-05-27 8:45 ` Tejun Heo
2022-05-27 12:58 ` Phil Auld
2022-05-28 14:24 ` Peter Zijlstra
1 sibling, 1 reply; 28+ messages in thread
From: Tejun Heo @ 2022-05-27 8:45 UTC (permalink / raw)
To: Juri Lelli
Cc: Waiman Long, Frederic Weisbecker, LKML, Peter Zijlstra,
Paul E . McKenney, Paul Gortmaker, Johannes Weiner,
Marcelo Tosatti, Phil Auld, Zefan Li, Daniel Bristot de Oliveira,
Nicolas Saenz Julienne, rcu
Hello,
On Fri, May 27, 2022 at 10:30:18AM +0200, Juri Lelli wrote:
> FWIW, I was under the impression that this would nicely fit along the
> side of other feaures towards implenting dynamic isolation of CPUs (say
> https://lore.kernel.org/lkml/20220510153413.400020-1-longman@redhat.com/
> for example). Wouldn't be awkward to have to poke different places to
> achieve isolation at runtime?
So, it were just being part of the isolated domain thing, it would make
sense, but as a separate flag which isn't hierarchical, it's weird to put it
there.
> Also, I wonder if a proc/sys interface might be problematic for certain
> middleware that is substantially based on using cgroups. I'll try to ask
> around. :)
There is a downside to making a feature a part of cpuset in that it makes
cgroup usage mandatory. This is fine for something which benefits from
hierarchical organization but it is weird to require building cgroup
hierarchy for straight-forward system-wide features.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-27 8:45 ` Tejun Heo
@ 2022-05-27 12:58 ` Phil Auld
0 siblings, 0 replies; 28+ messages in thread
From: Phil Auld @ 2022-05-27 12:58 UTC (permalink / raw)
To: Tejun Heo
Cc: Juri Lelli, Waiman Long, Frederic Weisbecker, LKML,
Peter Zijlstra, Paul E . McKenney, Paul Gortmaker,
Johannes Weiner, Marcelo Tosatti, Zefan Li,
Daniel Bristot de Oliveira, Nicolas Saenz Julienne, rcu
Hi,
On Thu, May 26, 2022 at 10:45:00PM -1000 Tejun Heo wrote:
> Hello,
>
> On Fri, May 27, 2022 at 10:30:18AM +0200, Juri Lelli wrote:
> > FWIW, I was under the impression that this would nicely fit along the
> > side of other feaures towards implenting dynamic isolation of CPUs (say
> > https://lore.kernel.org/lkml/20220510153413.400020-1-longman@redhat.com/
> > for example). Wouldn't be awkward to have to poke different places to
> > achieve isolation at runtime?
>
> So, it were just being part of the isolated domain thing, it would make
> sense, but as a separate flag which isn't hierarchical, it's weird to put it
> there.
The way I see it is more that the "isolated domain thing" is one part of
this whole dynamic isolation thing and is just one flag among many (most
still on the drawing board, but planned). It may be that Waiman's "isolated"
should be renamed "no_load_balance" or something.
Part of this is making cpu isolation more granular.
>
> > Also, I wonder if a proc/sys interface might be problematic for certain
> > middleware that is substantially based on using cgroups. I'll try to ask
> > around. :)
>
> There is a downside to making a feature a part of cpuset in that it makes
> cgroup usage mandatory. This is fine for something which benefits from
> hierarchical organization but it is weird to require building cgroup
> hierarchy for straight-forward system-wide features.
>
That ship may have sailed when SD_LOAD_BALANCE was removed, which is part
of what Waiman's feature addresses. That is, now in order to get control
over the system-wide feature of which CPUs get scheduler load balanced you
need to use cpusets.
My 3 cents anyway (inflation ;)
Cheers,
Phil
> Thanks.
>
> --
> tejun
>
--
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-27 8:30 ` Juri Lelli
2022-05-27 8:45 ` Tejun Heo
@ 2022-05-28 14:24 ` Peter Zijlstra
2022-05-30 0:40 ` Frederic Weisbecker
1 sibling, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2022-05-28 14:24 UTC (permalink / raw)
To: Juri Lelli
Cc: Tejun Heo, Waiman Long, Frederic Weisbecker, LKML,
Paul E . McKenney, Paul Gortmaker, Johannes Weiner,
Marcelo Tosatti, Phil Auld, Zefan Li, Daniel Bristot de Oliveira,
Nicolas Saenz Julienne, rcu
On Fri, May 27, 2022 at 10:30:18AM +0200, Juri Lelli wrote:
> Hi,
>
> On 26/05/22 14:37, Tejun Heo wrote:
> > On Thu, May 26, 2022 at 08:28:43PM -0400, Waiman Long wrote:
> > > I am thinking along the line that it will not be hierarchical. However,
> > > cpuset can be useful if we want to have multiple isolated partitions
> > > underneath the top cpuset with different isolation attributes, but no more
> > > sub-isolated partition with sub-attributes underneath them. IOW, we can only
> > > set them at the first level under top_cpuset. Will that be useful?
> >
> > At that point, I'd just prefer to have it under /proc or /sys.
>
> FWIW, I was under the impression that this would nicely fit along the
> side of other feaures towards implenting dynamic isolation of CPUs (say
> https://lore.kernel.org/lkml/20220510153413.400020-1-longman@redhat.com/
> for example). Wouldn't be awkward to have to poke different places to
> achieve isolation at runtime?
This, that's what I was thinking.
My main objection to the whole thing is that it's an RCU_NOCB specific
interface. *That* I think is daft.
I was thinking a partition would be able to designate a house-keeping
sub-partition/mask, but who cares about all the various different
housekeeping parties.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-28 14:24 ` Peter Zijlstra
@ 2022-05-30 0:40 ` Frederic Weisbecker
2022-05-30 8:11 ` Peter Zijlstra
2022-05-30 14:29 ` nicolas saenz julienne
0 siblings, 2 replies; 28+ messages in thread
From: Frederic Weisbecker @ 2022-05-30 0:40 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Juri Lelli, Tejun Heo, Waiman Long, LKML, Paul E . McKenney,
Paul Gortmaker, Johannes Weiner, Marcelo Tosatti, Phil Auld,
Zefan Li, Daniel Bristot de Oliveira, Nicolas Saenz Julienne,
rcu
On Sat, May 28, 2022 at 04:24:50PM +0200, Peter Zijlstra wrote:
> On Fri, May 27, 2022 at 10:30:18AM +0200, Juri Lelli wrote:
> > Hi,
> >
> > On 26/05/22 14:37, Tejun Heo wrote:
> > > On Thu, May 26, 2022 at 08:28:43PM -0400, Waiman Long wrote:
> > > > I am thinking along the line that it will not be hierarchical. However,
> > > > cpuset can be useful if we want to have multiple isolated partitions
> > > > underneath the top cpuset with different isolation attributes, but no more
> > > > sub-isolated partition with sub-attributes underneath them. IOW, we can only
> > > > set them at the first level under top_cpuset. Will that be useful?
> > >
> > > At that point, I'd just prefer to have it under /proc or /sys.
> >
> > FWIW, I was under the impression that this would nicely fit along the
> > side of other feaures towards implenting dynamic isolation of CPUs (say
> > https://lore.kernel.org/lkml/20220510153413.400020-1-longman@redhat.com/
> > for example). Wouldn't be awkward to have to poke different places to
> > achieve isolation at runtime?
>
> This, that's what I was thinking.
>
> My main objection to the whole thing is that it's an RCU_NOCB specific
> interface. *That* I think is daft.
>
> I was thinking a partition would be able to designate a house-keeping
> sub-partition/mask, but who cares about all the various different
> housekeeping parties.
It's time for the isolation users to step up here! I very rarely hear from them
and I just can't figure out by myself all the variants of uses for each of the
isolation features. May be some people are only interested in nocb for some
specific uses, or may be it never makes sense without nohz full and all the rest
of the isolation features. So for now I take the very cautious path to split the
interface.
Thanks.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-30 0:40 ` Frederic Weisbecker
@ 2022-05-30 8:11 ` Peter Zijlstra
2022-05-30 10:56 ` Frederic Weisbecker
2022-05-30 14:29 ` nicolas saenz julienne
1 sibling, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2022-05-30 8:11 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Juri Lelli, Tejun Heo, Waiman Long, LKML, Paul E . McKenney,
Paul Gortmaker, Johannes Weiner, Marcelo Tosatti, Phil Auld,
Zefan Li, Daniel Bristot de Oliveira, Nicolas Saenz Julienne,
rcu
On Mon, May 30, 2022 at 02:40:49AM +0200, Frederic Weisbecker wrote:
> On Sat, May 28, 2022 at 04:24:50PM +0200, Peter Zijlstra wrote:
> > On Fri, May 27, 2022 at 10:30:18AM +0200, Juri Lelli wrote:
> > > Hi,
> > >
> > > On 26/05/22 14:37, Tejun Heo wrote:
> > > > On Thu, May 26, 2022 at 08:28:43PM -0400, Waiman Long wrote:
> > > > > I am thinking along the line that it will not be hierarchical. However,
> > > > > cpuset can be useful if we want to have multiple isolated partitions
> > > > > underneath the top cpuset with different isolation attributes, but no more
> > > > > sub-isolated partition with sub-attributes underneath them. IOW, we can only
> > > > > set them at the first level under top_cpuset. Will that be useful?
> > > >
> > > > At that point, I'd just prefer to have it under /proc or /sys.
> > >
> > > FWIW, I was under the impression that this would nicely fit along the
> > > side of other feaures towards implenting dynamic isolation of CPUs (say
> > > https://lore.kernel.org/lkml/20220510153413.400020-1-longman@redhat.com/
> > > for example). Wouldn't be awkward to have to poke different places to
> > > achieve isolation at runtime?
> >
> > This, that's what I was thinking.
> >
> > My main objection to the whole thing is that it's an RCU_NOCB specific
> > interface. *That* I think is daft.
> >
> > I was thinking a partition would be able to designate a house-keeping
> > sub-partition/mask, but who cares about all the various different
> > housekeeping parties.
>
> It's time for the isolation users to step up here! I very rarely hear from them
> and I just can't figure out by myself all the variants of uses for each of the
> isolation features. May be some people are only interested in nocb for some
> specific uses, or may be it never makes sense without nohz full and all the rest
> of the isolation features. So for now I take the very cautious path to split the
> interface.
This is ABI, you can't walk back on it. I would suggest starting with an
'all feature' isolation. Only if there's real demand for something more
fine-grained add that on top. Simple first etc.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-30 8:11 ` Peter Zijlstra
@ 2022-05-30 10:56 ` Frederic Weisbecker
2022-05-30 13:16 ` Peter Zijlstra
0 siblings, 1 reply; 28+ messages in thread
From: Frederic Weisbecker @ 2022-05-30 10:56 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Juri Lelli, Tejun Heo, Waiman Long, LKML, Paul E . McKenney,
Paul Gortmaker, Johannes Weiner, Marcelo Tosatti, Phil Auld,
Zefan Li, Daniel Bristot de Oliveira, Nicolas Saenz Julienne,
rcu
On Mon, May 30, 2022 at 10:11:41AM +0200, Peter Zijlstra wrote:
> On Mon, May 30, 2022 at 02:40:49AM +0200, Frederic Weisbecker wrote:
> > On Sat, May 28, 2022 at 04:24:50PM +0200, Peter Zijlstra wrote:
> > > On Fri, May 27, 2022 at 10:30:18AM +0200, Juri Lelli wrote:
> > > > Hi,
> > > >
> > > > On 26/05/22 14:37, Tejun Heo wrote:
> > > > > On Thu, May 26, 2022 at 08:28:43PM -0400, Waiman Long wrote:
> > > > > > I am thinking along the line that it will not be hierarchical. However,
> > > > > > cpuset can be useful if we want to have multiple isolated partitions
> > > > > > underneath the top cpuset with different isolation attributes, but no more
> > > > > > sub-isolated partition with sub-attributes underneath them. IOW, we can only
> > > > > > set them at the first level under top_cpuset. Will that be useful?
> > > > >
> > > > > At that point, I'd just prefer to have it under /proc or /sys.
> > > >
> > > > FWIW, I was under the impression that this would nicely fit along the
> > > > side of other feaures towards implenting dynamic isolation of CPUs (say
> > > > https://lore.kernel.org/lkml/20220510153413.400020-1-longman@redhat.com/
> > > > for example). Wouldn't be awkward to have to poke different places to
> > > > achieve isolation at runtime?
> > >
> > > This, that's what I was thinking.
> > >
> > > My main objection to the whole thing is that it's an RCU_NOCB specific
> > > interface. *That* I think is daft.
> > >
> > > I was thinking a partition would be able to designate a house-keeping
> > > sub-partition/mask, but who cares about all the various different
> > > housekeeping parties.
> >
> > It's time for the isolation users to step up here! I very rarely hear from them
> > and I just can't figure out by myself all the variants of uses for each of the
> > isolation features. May be some people are only interested in nocb for some
> > specific uses, or may be it never makes sense without nohz full and all the rest
> > of the isolation features. So for now I take the very cautious path to split the
> > interface.
>
> This is ABI, you can't walk back on it. I would suggest starting with an
> 'all feature' isolation. Only if there's real demand for something more
> fine-grained add that on top. Simple first etc.
That's actually my worry. If we start with an all in one ABI, how do we later
mix that up with more finegrained features? Like what will be the behaviour of:
cpuset.isolation.rcu_nocb = 0
cpuset.isolation.all = 1
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-30 10:56 ` Frederic Weisbecker
@ 2022-05-30 13:16 ` Peter Zijlstra
2022-05-30 14:13 ` Juri Lelli
` (2 more replies)
0 siblings, 3 replies; 28+ messages in thread
From: Peter Zijlstra @ 2022-05-30 13:16 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Juri Lelli, Tejun Heo, Waiman Long, LKML, Paul E . McKenney,
Paul Gortmaker, Johannes Weiner, Marcelo Tosatti, Phil Auld,
Zefan Li, Daniel Bristot de Oliveira, Nicolas Saenz Julienne,
rcu
On Mon, May 30, 2022 at 12:56:50PM +0200, Frederic Weisbecker wrote:
> > This is ABI, you can't walk back on it. I would suggest starting with an
> > 'all feature' isolation. Only if there's real demand for something more
> > fine-grained add that on top. Simple first etc.
>
> That's actually my worry. If we start with an all in one ABI, how do we later
> mix that up with more finegrained features? Like what will be the behaviour of:
>
> cpuset.isolation.rcu_nocb = 0
> cpuset.isolation.all = 1
Well clearly that doesn't make sense. I was more thinking along the
lines of cgroup.subtree_control, where instead all features are enabled
by default.
But only if there's a real usecase, otherwise there's no point in
providing such knobs.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-30 13:16 ` Peter Zijlstra
@ 2022-05-30 14:13 ` Juri Lelli
2022-05-30 21:35 ` Frederic Weisbecker
2022-05-31 14:21 ` Waiman Long
2 siblings, 0 replies; 28+ messages in thread
From: Juri Lelli @ 2022-05-30 14:13 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Frederic Weisbecker, Tejun Heo, Waiman Long, LKML,
Paul E . McKenney, Paul Gortmaker, Johannes Weiner,
Marcelo Tosatti, Phil Auld, Zefan Li, Daniel Bristot de Oliveira,
Nicolas Saenz Julienne, rcu
On 30/05/22 15:16, Peter Zijlstra wrote:
> On Mon, May 30, 2022 at 12:56:50PM +0200, Frederic Weisbecker wrote:
>
> > > This is ABI, you can't walk back on it. I would suggest starting with an
> > > 'all feature' isolation. Only if there's real demand for something more
> > > fine-grained add that on top. Simple first etc.
> >
> > That's actually my worry. If we start with an all in one ABI, how do we later
> > mix that up with more finegrained features? Like what will be the behaviour of:
> >
> > cpuset.isolation.rcu_nocb = 0
> > cpuset.isolation.all = 1
>
> Well clearly that doesn't make sense. I was more thinking along the
> lines of cgroup.subtree_control, where instead all features are enabled
> by default.
>
> But only if there's a real usecase, otherwise there's no point in
> providing such knobs.
All features on/off knob + house-keeping sub-partition/mask seem to be
what isolation users I could reach so far (OCP mostly) would indeed like
to have in the future.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-30 13:16 ` Peter Zijlstra
2022-05-30 14:13 ` Juri Lelli
@ 2022-05-30 21:35 ` Frederic Weisbecker
2022-05-31 0:57 ` Tejun Heo
2022-05-31 14:21 ` Waiman Long
2 siblings, 1 reply; 28+ messages in thread
From: Frederic Weisbecker @ 2022-05-30 21:35 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Juri Lelli, Tejun Heo, Waiman Long, LKML, Paul E . McKenney,
Paul Gortmaker, Johannes Weiner, Marcelo Tosatti, Phil Auld,
Zefan Li, Daniel Bristot de Oliveira, Nicolas Saenz Julienne,
rcu
On Mon, May 30, 2022 at 03:16:27PM +0200, Peter Zijlstra wrote:
> On Mon, May 30, 2022 at 12:56:50PM +0200, Frederic Weisbecker wrote:
>
> > > This is ABI, you can't walk back on it. I would suggest starting with an
> > > 'all feature' isolation. Only if there's real demand for something more
> > > fine-grained add that on top. Simple first etc.
> >
> > That's actually my worry. If we start with an all in one ABI, how do we later
> > mix that up with more finegrained features? Like what will be the behaviour of:
> >
> > cpuset.isolation.rcu_nocb = 0
> > cpuset.isolation.all = 1
>
> Well clearly that doesn't make sense. I was more thinking along the
> lines of cgroup.subtree_control, where instead all features are enabled
> by default.
>
> But only if there's a real usecase, otherwise there's no point in
> providing such knobs.
That makes sense. So there would be a simple cpuset.isolation that can
be either 1 or 0 where 1 has all possible isolation stuff on. Then
if the need arises we can provide more tuning through a new specific
cgroup controller, right?
If so that sounds good to me.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-30 21:35 ` Frederic Weisbecker
@ 2022-05-31 0:57 ` Tejun Heo
0 siblings, 0 replies; 28+ messages in thread
From: Tejun Heo @ 2022-05-31 0:57 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Peter Zijlstra, Juri Lelli, Waiman Long, LKML, Paul E . McKenney,
Paul Gortmaker, Johannes Weiner, Marcelo Tosatti, Phil Auld,
Zefan Li, Daniel Bristot de Oliveira, Nicolas Saenz Julienne,
rcu
On Mon, May 30, 2022 at 11:35:56PM +0200, Frederic Weisbecker wrote:
> That makes sense. So there would be a simple cpuset.isolation that can
> be either 1 or 0 where 1 has all possible isolation stuff on. Then
> if the need arises we can provide more tuning through a new specific
> cgroup controller, right?
Given that there isn't much that is hierarchical about them, I'm pretty
skeptical about introducing a new controller or fancy hierarchical interface
for it. If isolation is intertwined with cpuset partitioning and a simple
knob for it fits well with the rest of configuration, yeah, but let's please
try to avoid maximizing the interface. We want the interface to encode
users' intentions (e.g., here, I want these cpus isolated) not the
implementation details to make that happen. Of course, there are gradients
but it becomes really ugly when you try to expose low level details on
cgroups because of the implied flexibility (I can organize however I want
hierarchically and the controls must nest and be delegatable properly).
So, If you think isolation feature will need lots of low level knobs
exposed, cgroup isn't the right place. It should be something simpler and
lower level. This probably is a good time to spend some time thinking how
it'd look like, say, five years down the line. If it's gonna be the "I want
isolation" knob + maybe some obscure system wide knobs that most people
don't need to think about, it's gonna be fine. Otherwise, we shouldn't put
this in cgroup until we have better ideas on what the interface should look
like.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-30 13:16 ` Peter Zijlstra
2022-05-30 14:13 ` Juri Lelli
2022-05-30 21:35 ` Frederic Weisbecker
@ 2022-05-31 14:21 ` Waiman Long
2 siblings, 0 replies; 28+ messages in thread
From: Waiman Long @ 2022-05-31 14:21 UTC (permalink / raw)
To: Peter Zijlstra, Frederic Weisbecker
Cc: Juri Lelli, Tejun Heo, LKML, Paul E . McKenney, Paul Gortmaker,
Johannes Weiner, Marcelo Tosatti, Phil Auld, Zefan Li,
Daniel Bristot de Oliveira, Nicolas Saenz Julienne, rcu
On 5/30/22 09:16, Peter Zijlstra wrote:
> On Mon, May 30, 2022 at 12:56:50PM +0200, Frederic Weisbecker wrote:
>
>>> This is ABI, you can't walk back on it. I would suggest starting with an
>>> 'all feature' isolation. Only if there's real demand for something more
>>> fine-grained add that on top. Simple first etc.
>> That's actually my worry. If we start with an all in one ABI, how do we later
>> mix that up with more finegrained features? Like what will be the behaviour of:
>>
>> cpuset.isolation.rcu_nocb = 0
>> cpuset.isolation.all = 1
> Well clearly that doesn't make sense. I was more thinking along the
> lines of cgroup.subtree_control, where instead all features are enabled
> by default.
>
> But only if there's a real usecase, otherwise there's no point in
> providing such knobs.
I am actually thinking about extending the cpuset partition interface
for isolation. Right now, I have an outstanding patch [1] to add an
"isolated" state to partition which disable load balancing somewhat
similar to isolcpus command line option. In the future, we can add
attribute to the isolation state like "isolated:full" to similar to
nohz_full currently. If the needs arise, we can even extend the
attribute to allow list like "isolated:rcu_nocbs". I don't think it is
good idea to keep on adding new cpuset control files extensively. I
would prefer extending the existing ones.
[1] https://lore.kernel.org/lkml/20220510153413.400020-1-longman@redhat.com/
Cheers,
Longman
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-30 0:40 ` Frederic Weisbecker
2022-05-30 8:11 ` Peter Zijlstra
@ 2022-05-30 14:29 ` nicolas saenz julienne
2022-05-30 14:49 ` Paul E. McKenney
1 sibling, 1 reply; 28+ messages in thread
From: nicolas saenz julienne @ 2022-05-30 14:29 UTC (permalink / raw)
To: Frederic Weisbecker, Peter Zijlstra
Cc: Juri Lelli, Tejun Heo, Waiman Long, LKML, Paul E . McKenney,
Paul Gortmaker, Johannes Weiner, Marcelo Tosatti, Phil Auld,
Zefan Li, Daniel Bristot de Oliveira, rcu
On Mon, 2022-05-30 at 02:40 +0200, Frederic Weisbecker wrote:
> On Sat, May 28, 2022 at 04:24:50PM +0200, Peter Zijlstra wrote:
> > On Fri, May 27, 2022 at 10:30:18AM +0200, Juri Lelli wrote:
> > > Hi,
> > >
> > > On 26/05/22 14:37, Tejun Heo wrote:
> > > > On Thu, May 26, 2022 at 08:28:43PM -0400, Waiman Long wrote:
> > > > > I am thinking along the line that it will not be hierarchical. However,
> > > > > cpuset can be useful if we want to have multiple isolated partitions
> > > > > underneath the top cpuset with different isolation attributes, but no more
> > > > > sub-isolated partition with sub-attributes underneath them. IOW, we can only
> > > > > set them at the first level under top_cpuset. Will that be useful?
> > > >
> > > > At that point, I'd just prefer to have it under /proc or /sys.
> > >
> > > FWIW, I was under the impression that this would nicely fit along the
> > > side of other feaures towards implenting dynamic isolation of CPUs (say
> > > https://lore.kernel.org/lkml/20220510153413.400020-1-longman@redhat.com/
> > > for example). Wouldn't be awkward to have to poke different places to
> > > achieve isolation at runtime?
> >
> > This, that's what I was thinking.
> >
> > My main objection to the whole thing is that it's an RCU_NOCB specific
> > interface. *That* I think is daft.
> >
> > I was thinking a partition would be able to designate a house-keeping
> > sub-partition/mask, but who cares about all the various different
> > housekeeping parties.
>
> It's time for the isolation users to step up here! I very rarely hear from them
> and I just can't figure out by myself all the variants of uses for each of the
> isolation features. May be some people are only interested in nocb for some
> specific uses, or may be it never makes sense without nohz full and all the rest
> of the isolation features. So for now I take the very cautious path to split the
> interface.
OK, my 2 cents. I personally deal with virtualisation setups that involve RT
and CPU isolation on both host and guests.
The main use-case ATM is running DPDK-like workloads. We want to achieve
latencies in the order of tens of microseconds, so it's essential to avoid
entering the kernel at all cost. So, no HW interrupts, sched tick, RCU
callbacks, clocksource watchdogs, softlockup, intel_pstate, timers, etc...
Everything is deferred onto housekeeping CPUs or disabled.
Then we have setups that need to deal with HW on the host, exposed to the guest
through emulation or VirtIO. The same rules apply really, except for some IRQ
affinity tweaks and sched priority magic.
I find it hard to see how running RCU callback locally could be useful to any
latency sensitive workload.
Frederic, out of curiosity, do you have a use-case in mind that might benefit
from nohz_full but not rcu_nocb? Maybe HPC?
Regards,
Nicolas
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-30 14:29 ` nicolas saenz julienne
@ 2022-05-30 14:49 ` Paul E. McKenney
2022-05-30 22:36 ` Alison Chaiken
0 siblings, 1 reply; 28+ messages in thread
From: Paul E. McKenney @ 2022-05-30 14:49 UTC (permalink / raw)
To: nicolas saenz julienne
Cc: Frederic Weisbecker, Peter Zijlstra, Juri Lelli, Tejun Heo,
Waiman Long, LKML, Paul Gortmaker, Johannes Weiner,
Marcelo Tosatti, Phil Auld, Zefan Li, Daniel Bristot de Oliveira,
rcu
On Mon, May 30, 2022 at 04:29:56PM +0200, nicolas saenz julienne wrote:
> On Mon, 2022-05-30 at 02:40 +0200, Frederic Weisbecker wrote:
> > On Sat, May 28, 2022 at 04:24:50PM +0200, Peter Zijlstra wrote:
> > > On Fri, May 27, 2022 at 10:30:18AM +0200, Juri Lelli wrote:
> > > > Hi,
> > > >
> > > > On 26/05/22 14:37, Tejun Heo wrote:
> > > > > On Thu, May 26, 2022 at 08:28:43PM -0400, Waiman Long wrote:
> > > > > > I am thinking along the line that it will not be hierarchical. However,
> > > > > > cpuset can be useful if we want to have multiple isolated partitions
> > > > > > underneath the top cpuset with different isolation attributes, but no more
> > > > > > sub-isolated partition with sub-attributes underneath them. IOW, we can only
> > > > > > set them at the first level under top_cpuset. Will that be useful?
> > > > >
> > > > > At that point, I'd just prefer to have it under /proc or /sys.
> > > >
> > > > FWIW, I was under the impression that this would nicely fit along the
> > > > side of other feaures towards implenting dynamic isolation of CPUs (say
> > > > https://lore.kernel.org/lkml/20220510153413.400020-1-longman@redhat.com/
> > > > for example). Wouldn't be awkward to have to poke different places to
> > > > achieve isolation at runtime?
> > >
> > > This, that's what I was thinking.
> > >
> > > My main objection to the whole thing is that it's an RCU_NOCB specific
> > > interface. *That* I think is daft.
> > >
> > > I was thinking a partition would be able to designate a house-keeping
> > > sub-partition/mask, but who cares about all the various different
> > > housekeeping parties.
> >
> > It's time for the isolation users to step up here! I very rarely hear from them
> > and I just can't figure out by myself all the variants of uses for each of the
> > isolation features. May be some people are only interested in nocb for some
> > specific uses, or may be it never makes sense without nohz full and all the rest
> > of the isolation features. So for now I take the very cautious path to split the
> > interface.
>
> OK, my 2 cents. I personally deal with virtualisation setups that involve RT
> and CPU isolation on both host and guests.
>
> The main use-case ATM is running DPDK-like workloads. We want to achieve
> latencies in the order of tens of microseconds, so it's essential to avoid
> entering the kernel at all cost. So, no HW interrupts, sched tick, RCU
> callbacks, clocksource watchdogs, softlockup, intel_pstate, timers, etc...
> Everything is deferred onto housekeeping CPUs or disabled.
>
> Then we have setups that need to deal with HW on the host, exposed to the guest
> through emulation or VirtIO. The same rules apply really, except for some IRQ
> affinity tweaks and sched priority magic.
>
> I find it hard to see how running RCU callback locally could be useful to any
> latency sensitive workload.
>
> Frederic, out of curiosity, do you have a use-case in mind that might benefit
> from nohz_full but not rcu_nocb? Maybe HPC?
Would users looking for millisecond-scale latencies want rcu_nocbs but
not nohz_full, that is, the other way around?
Thanx, Paul
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [RFC PATCH 4/4] cpuset: Support RCU-NOCB toggle on v2 root partitions
2022-05-30 14:49 ` Paul E. McKenney
@ 2022-05-30 22:36 ` Alison Chaiken
0 siblings, 0 replies; 28+ messages in thread
From: Alison Chaiken @ 2022-05-30 22:36 UTC (permalink / raw)
To: paulmck
Cc: nicolas saenz julienne, Frederic Weisbecker, Peter Zijlstra,
Juri Lelli, Tejun Heo, Waiman Long, LKML, Paul Gortmaker,
Johannes Weiner, Marcelo Tosatti, Phil Auld, Zefan Li,
Daniel Bristot de Oliveira, rcu
> On Mon, May 30, 2022 at 04:29:56PM +0200, nicolas saenz julienne wrote:
> > On Mon, 2022-05-30 at 02:40 +0200, Frederic Weisbecker wrote:
> > > On Sat, May 28, 2022 at 04:24:50PM +0200, Peter Zijlstra wrote:
> > > > On Fri, May 27, 2022 at 10:30:18AM +0200, Juri Lelli wrote:
> > > > > Hi,
> > > > >
> > > > > On 26/05/22 14:37, Tejun Heo wrote:
> > > > > > On Thu, May 26, 2022 at 08:28:43PM -0400, Waiman Long wrote:
> > > > > > > I am thinking along the line that it will not be hierarchical. However,
> > > > > > > cpuset can be useful if we want to have multiple isolated partitions
> > > > > > > underneath the top cpuset with different isolation attributes, but no more
> > > > > > > sub-isolated partition with sub-attributes underneath them. IOW, we can only
> > > > > > > set them at the first level under top_cpuset. Will that be useful?
> > > > > >
> > > > > > At that point, I'd just prefer to have it under /proc or /sys.
> > > > >
> > > > > FWIW, I was under the impression that this would nicely fit along the
> > > > > side of other feaures towards implenting dynamic isolation of CPUs (say
> > > > > https://lore.kernel.org/lkml/20220510153413.400020-1-longman@redhat.com/
> > > > > for example). Wouldn't be awkward to have to poke different places to
> > > > > achieve isolation at runtime?
> > > >
> > > > This, that's what I was thinking.
> > > >
> > > > My main objection to the whole thing is that it's an RCU_NOCB specific
> > > > interface. *That* I think is daft.
> > > >
> > > > I was thinking a partition would be able to designate a house-keeping
> > > > sub-partition/mask, but who cares about all the various different
> > > > housekeeping parties.
> > >
> > > It's time for the isolation users to step up here! I very rarely hear from them
> > > and I just can't figure out by myself all the variants of uses for each of the
> > > isolation features. May be some people are only interested in nocb for some
> > > specific uses, or may be it never makes sense without nohz full and all the rest
> > > of the isolation features. So for now I take the very cautious path to split the
> > > interface.
> >
> > OK, my 2 cents. I personally deal with virtualisation setups that involve RT
> > and CPU isolation on both host and guests.
> >
> > The main use-case ATM is running DPDK-like workloads. We want to achieve
> > latencies in the order of tens of microseconds, so it's essential to avoid
> > entering the kernel at all cost. So, no HW interrupts, sched tick, RCU
> > callbacks, clocksource watchdogs, softlockup, intel_pstate, timers, etc...
> > Everything is deferred onto housekeeping CPUs or disabled.
> >
> > Then we have setups that need to deal with HW on the host, exposed to the guest
> > through emulation or VirtIO. The same rules apply really, except for some IRQ
> > affinity tweaks and sched priority magic.
> >
> > I find it hard to see how running RCU callback locally could be useful to any
> > latency sensitive workload.
> >
> > Frederic, out of curiosity, do you have a use-case in mind that might benefit
> > from nohz_full but not rcu_nocb? Maybe HPC?
On Mon, May 30, 2022 at 8:42 AM Paul E. McKenney <paulmck@kernel.org> wrote:
> Would users looking for millisecond-scale latencies want rcu_nocbs but
> not nohz_full, that is, the other way around?
On Intel processors running 5.15 with the timersd patches from Siewior
backported and Weisbecker's bug-fix, choosing CONFIG_HZ_PERIODIC
prevents cores from entering deeper C-states when hrtimers are
pending. With CONFIG_NO_HZ_COMMON, the cores do not service
non-blocking timer callbacks until another thread wakes them. Since
low latency is critical, this is a use case where NO_HZ_FULL will not
work but rcu_nocbs is needed.
-- Alison Chaiken, Aurora Innovation
^ permalink raw reply [flat|nested] 28+ messages in thread