* [Xen-devel] [PATCH v5 1/4] xen/rcu: don't use stop_machine_run() for rcu_barrier()
2020-03-12 8:28 [Xen-devel] [PATCH v5 0/4] xen/rcu: let rcu work better with core scheduling Juergen Gross
@ 2020-03-12 8:28 ` Juergen Gross
2020-03-13 11:02 ` Julien Grall
2020-03-12 8:28 ` [Xen-devel] [PATCH v5 2/4] xen: don't process rcu callbacks when holding a rcu_read_lock() Juergen Gross
` (2 subsequent siblings)
3 siblings, 1 reply; 9+ messages in thread
From: Juergen Gross @ 2020-03-12 8:28 UTC (permalink / raw)
To: xen-devel
Cc: Juergen Gross, Stefano Stabellini, Julien Grall, Wei Liu,
Andrew Cooper, Ian Jackson, George Dunlap, Jan Beulich
Today rcu_barrier() is calling stop_machine_run() to synchronize all
physical cpus in order to ensure all pending rcu calls have finished
when returning.
As stop_machine_run() is using tasklets this requires scheduling of
idle vcpus on all cpus imposing the need to call rcu_barrier() on idle
cpus only in case of core scheduling being active, as otherwise a
scheduling deadlock would occur.
There is no need at all to do the syncing of the cpus in tasklets, as
rcu activity is started in __do_softirq() called whenever softirq
activity is allowed. So rcu_barrier() can easily be modified to use
softirq for synchronization of the cpus no longer requiring any
scheduling activity.
As there already is a rcu softirq reuse that for the synchronization.
Remove the barrier element from struct rcu_data as it isn't used.
Finally switch rcu_barrier() to return void as it now can never fail.
Partially-based-on-patch-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- add recursion detection
V3:
- fix races (Igor Druzhinin)
V5:
- rename done_count to pending_count (Jan Beulich)
- fix race (Jan Beulich)
---
xen/common/rcupdate.c | 92 ++++++++++++++++++++++++++++++++--------------
xen/include/xen/rcupdate.h | 2 +-
2 files changed, 66 insertions(+), 28 deletions(-)
diff --git a/xen/common/rcupdate.c b/xen/common/rcupdate.c
index 03d84764d2..c5ef6acb1e 100644
--- a/xen/common/rcupdate.c
+++ b/xen/common/rcupdate.c
@@ -83,7 +83,6 @@ struct rcu_data {
struct rcu_head **donetail;
long blimit; /* Upper limit on a processed batch */
int cpu;
- struct rcu_head barrier;
long last_rs_qlen; /* qlen during the last resched */
/* 3) idle CPUs handling */
@@ -91,6 +90,7 @@ struct rcu_data {
bool idle_timer_active;
bool process_callbacks;
+ bool barrier_active;
};
/*
@@ -143,51 +143,82 @@ static int qhimark = 10000;
static int qlowmark = 100;
static int rsinterval = 1000;
-struct rcu_barrier_data {
- struct rcu_head head;
- atomic_t *cpu_count;
-};
+/*
+ * rcu_barrier() handling:
+ * cpu_count holds the number of cpu required to finish barrier handling.
+ * pending_count is initialized to nr_cpus + 1.
+ * Cpus are synchronized via softirq mechanism. rcu_barrier() is regarded to
+ * be active if pending_count is not zero. In case rcu_barrier() is called on
+ * multiple cpus it is enough to check for pending_count being not zero on entry
+ * and to call process_pending_softirqs() in a loop until pending_count drops to
+ * zero, before starting the new rcu_barrier() processing.
+ * In order to avoid hangs when rcu_barrier() is called multiple times on the
+ * same cpu in fast sequence and a slave cpu couldn't drop out of the
+ * barrier handling fast enough a second counter pending_count is needed.
+ * The rcu_barrier() invoking cpu will wait until pending_count reaches 1
+ * (meaning that all cpus have finished processing the barrier) and then will
+ * reset pending_count to 0 to enable entering rcu_barrier() again.
+ */
+static atomic_t cpu_count = ATOMIC_INIT(0);
+static atomic_t pending_count = ATOMIC_INIT(0);
static void rcu_barrier_callback(struct rcu_head *head)
{
- struct rcu_barrier_data *data = container_of(
- head, struct rcu_barrier_data, head);
- atomic_inc(data->cpu_count);
+ atomic_dec(&cpu_count);
}
-static int rcu_barrier_action(void *_cpu_count)
+static void rcu_barrier_action(void)
{
- struct rcu_barrier_data data = { .cpu_count = _cpu_count };
-
- ASSERT(!local_irq_is_enabled());
- local_irq_enable();
+ struct rcu_head head;
/*
* When callback is executed, all previously-queued RCU work on this CPU
- * is completed. When all CPUs have executed their callback, data.cpu_count
- * will have been incremented to include every online CPU.
+ * is completed. When all CPUs have executed their callback, cpu_count
+ * will have been decremented to 0.
*/
- call_rcu(&data.head, rcu_barrier_callback);
+ call_rcu(&head, rcu_barrier_callback);
- while ( atomic_read(data.cpu_count) != num_online_cpus() )
+ while ( atomic_read(&cpu_count) )
{
process_pending_softirqs();
cpu_relax();
}
- local_irq_disable();
-
- return 0;
+ atomic_dec(&pending_count);
}
-/*
- * As rcu_barrier() is using stop_machine_run() it is allowed to be used in
- * idle context only (see comment for stop_machine_run()).
- */
-int rcu_barrier(void)
+void rcu_barrier(void)
{
- atomic_t cpu_count = ATOMIC_INIT(0);
- return stop_machine_run(rcu_barrier_action, &cpu_count, NR_CPUS);
+ unsigned int n_cpus;
+
+ for ( ;; )
+ {
+ if ( !atomic_read(&pending_count) && get_cpu_maps() )
+ {
+ n_cpus = num_online_cpus();
+
+ if ( atomic_cmpxchg(&pending_count, 0, n_cpus + 1) == 0 )
+ break;
+
+ put_cpu_maps();
+ }
+
+ process_pending_softirqs();
+ cpu_relax();
+ }
+
+ atomic_set(&cpu_count, n_cpus);
+ cpumask_raise_softirq(&cpu_online_map, RCU_SOFTIRQ);
+
+ put_cpu_maps();
+
+ while ( atomic_read(&pending_count) != 1 )
+ {
+ process_pending_softirqs();
+ cpu_relax();
+ }
+
+ atomic_set(&pending_count, 0);
}
/* Is batch a before batch b ? */
@@ -426,6 +457,13 @@ static void rcu_process_callbacks(void)
rdp->process_callbacks = false;
__rcu_process_callbacks(&rcu_ctrlblk, rdp);
}
+
+ if ( atomic_read(&cpu_count) && !rdp->barrier_active )
+ {
+ rdp->barrier_active = true;
+ rcu_barrier_action();
+ rdp->barrier_active = false;
+ }
}
static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
diff --git a/xen/include/xen/rcupdate.h b/xen/include/xen/rcupdate.h
index eb9b60df07..31c8b86d13 100644
--- a/xen/include/xen/rcupdate.h
+++ b/xen/include/xen/rcupdate.h
@@ -144,7 +144,7 @@ void rcu_check_callbacks(int cpu);
void call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *head));
-int rcu_barrier(void);
+void rcu_barrier(void);
void rcu_idle_enter(unsigned int cpu);
void rcu_idle_exit(unsigned int cpu);
--
2.16.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [Xen-devel] [PATCH v5 1/4] xen/rcu: don't use stop_machine_run() for rcu_barrier()
2020-03-12 8:28 ` [Xen-devel] [PATCH v5 1/4] xen/rcu: don't use stop_machine_run() for rcu_barrier() Juergen Gross
@ 2020-03-13 11:02 ` Julien Grall
2020-03-13 11:18 ` Jürgen Groß
0 siblings, 1 reply; 9+ messages in thread
From: Julien Grall @ 2020-03-13 11:02 UTC (permalink / raw)
To: Juergen Gross, xen-devel
Cc: Stefano Stabellini, Wei Liu, Andrew Cooper, Ian Jackson,
George Dunlap, Jan Beulich
On 12/03/2020 08:28, Juergen Gross wrote:
> Today rcu_barrier() is calling stop_machine_run() to synchronize all
> physical cpus in order to ensure all pending rcu calls have finished
> when returning.
>
> As stop_machine_run() is using tasklets this requires scheduling of
> idle vcpus on all cpus imposing the need to call rcu_barrier() on idle
> cpus only in case of core scheduling being active, as otherwise a
> scheduling deadlock would occur.
>
> There is no need at all to do the syncing of the cpus in tasklets, as
> rcu activity is started in __do_softirq() called whenever softirq
> activity is allowed. So rcu_barrier() can easily be modified to use
> softirq for synchronization of the cpus no longer requiring any
> scheduling activity.
>
> As there already is a rcu softirq reuse that for the synchronization.
>
> Remove the barrier element from struct rcu_data as it isn't used.
>
> Finally switch rcu_barrier() to return void as it now can never fail.
>
> Partially-based-on-patch-by: Igor Druzhinin <igor.druzhinin@citrix.com>
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
> V2:
> - add recursion detection
>
> V3:
> - fix races (Igor Druzhinin)
>
> V5:
> - rename done_count to pending_count (Jan Beulich)
> - fix race (Jan Beulich)
> ---
> xen/common/rcupdate.c | 92 ++++++++++++++++++++++++++++++++--------------
> xen/include/xen/rcupdate.h | 2 +-
> 2 files changed, 66 insertions(+), 28 deletions(-)
>
> diff --git a/xen/common/rcupdate.c b/xen/common/rcupdate.c
> index 03d84764d2..c5ef6acb1e 100644
> --- a/xen/common/rcupdate.c
> +++ b/xen/common/rcupdate.c
> @@ -83,7 +83,6 @@ struct rcu_data {
> struct rcu_head **donetail;
> long blimit; /* Upper limit on a processed batch */
> int cpu;
> - struct rcu_head barrier;
> long last_rs_qlen; /* qlen during the last resched */
>
> /* 3) idle CPUs handling */
> @@ -91,6 +90,7 @@ struct rcu_data {
> bool idle_timer_active;
>
> bool process_callbacks;
> + bool barrier_active;
> };
>
> /*
> @@ -143,51 +143,82 @@ static int qhimark = 10000;
> static int qlowmark = 100;
> static int rsinterval = 1000;
>
> -struct rcu_barrier_data {
> - struct rcu_head head;
> - atomic_t *cpu_count;
> -};
> +/*
> + * rcu_barrier() handling:
> + * cpu_count holds the number of cpu required to finish barrier handling.
NIT: the number of cpus (I think)
> + * pending_count is initialized to nr_cpus + 1.
> + * Cpus are synchronized via softirq mechanism. rcu_barrier() is regarded to
> + * be active if pending_count is not zero. In case rcu_barrier() is called on
> + * multiple cpus it is enough to check for pending_count being not zero on entry
> + * and to call process_pending_softirqs() in a loop until pending_count drops to
> + * zero, before starting the new rcu_barrier() processing.
> + * In order to avoid hangs when rcu_barrier() is called multiple times on the
> + * same cpu in fast sequence and a slave cpu couldn't drop out of the
> + * barrier handling fast enough a second counter pending_count is needed.
As an aside question, don't we miss a memory barrier in
rcu_barrier_callback or rcu_barrier_action()? This barrier would ensure
that the RCU changes have been seen before we tell the "master" CPU we
are done.
> + * The rcu_barrier() invoking cpu will wait until pending_count reaches 1
> + * (meaning that all cpus have finished processing the barrier) and then will
> + * reset pending_count to 0 to enable entering rcu_barrier() again.
> + */
> +static atomic_t cpu_count = ATOMIC_INIT(0);
> +static atomic_t pending_count = ATOMIC_INIT(0);
>
> static void rcu_barrier_callback(struct rcu_head *head)
> {
> - struct rcu_barrier_data *data = container_of(
> - head, struct rcu_barrier_data, head);
> - atomic_inc(data->cpu_count);
> + atomic_dec(&cpu_count);
> }
>
> -static int rcu_barrier_action(void *_cpu_count)
> +static void rcu_barrier_action(void)
> {
> - struct rcu_barrier_data data = { .cpu_count = _cpu_count };
> -
> - ASSERT(!local_irq_is_enabled());
> - local_irq_enable();
> + struct rcu_head head;
>
> /*
> * When callback is executed, all previously-queued RCU work on this CPU
> - * is completed. When all CPUs have executed their callback, data.cpu_count
> - * will have been incremented to include every online CPU.
> + * is completed. When all CPUs have executed their callback, cpu_count
> + * will have been decremented to 0.
> */
> - call_rcu(&data.head, rcu_barrier_callback);
> + call_rcu(&head, rcu_barrier_callback);
>
> - while ( atomic_read(data.cpu_count) != num_online_cpus() )
> + while ( atomic_read(&cpu_count) )
> {
> process_pending_softirqs();
> cpu_relax();
> }
>
> - local_irq_disable();
> -
> - return 0;
> + atomic_dec(&pending_count);
> }
>
> -/*
> - * As rcu_barrier() is using stop_machine_run() it is allowed to be used in
> - * idle context only (see comment for stop_machine_run()).
> - */
> -int rcu_barrier(void)
> +void rcu_barrier(void)
> {
> - atomic_t cpu_count = ATOMIC_INIT(0);
> - return stop_machine_run(rcu_barrier_action, &cpu_count, NR_CPUS);
> + unsigned int n_cpus;
> +
It would be good to spell out this code has to be called with interrupt
enabled and not in an interrupt context.
> + for ( ;; )
> + {
> + if ( !atomic_read(&pending_count) && get_cpu_maps() )
> + {
> + n_cpus = num_online_cpus();
> +
> + if ( atomic_cmpxchg(&pending_count, 0, n_cpus + 1) == 0 )
> + break;
> +
> + put_cpu_maps();
> + }
> +
> + process_pending_softirqs();
> + cpu_relax();
> + }
> +
> + atomic_set(&cpu_count, n_cpus);
> + cpumask_raise_softirq(&cpu_online_map, RCU_SOFTIRQ);
> +
> + put_cpu_maps();
If you put the CPU maps, wouldn't it be possible to have a CPU turned
off? If not, can you add a comment in the code why this is safe?
> +
> + while ( atomic_read(&pending_count) != 1 )
> + {
> + process_pending_softirqs();
> + cpu_relax();
> + }
> +
> + atomic_set(&pending_count, 0);
> }
>
> /* Is batch a before batch b ? */
> @@ -426,6 +457,13 @@ static void rcu_process_callbacks(void)
> rdp->process_callbacks = false;
> __rcu_process_callbacks(&rcu_ctrlblk, rdp);
> }
> +
> + if ( atomic_read(&cpu_count) && !rdp->barrier_active )
> + {
> + rdp->barrier_active = true;
> + rcu_barrier_action();
> + rdp->barrier_active = false;
> + }
> }
>
> static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> diff --git a/xen/include/xen/rcupdate.h b/xen/include/xen/rcupdate.h
> index eb9b60df07..31c8b86d13 100644
> --- a/xen/include/xen/rcupdate.h
> +++ b/xen/include/xen/rcupdate.h
> @@ -144,7 +144,7 @@ void rcu_check_callbacks(int cpu);
> void call_rcu(struct rcu_head *head,
> void (*func)(struct rcu_head *head));
>
> -int rcu_barrier(void);
> +void rcu_barrier(void);
>
> void rcu_idle_enter(unsigned int cpu);
> void rcu_idle_exit(unsigned int cpu);
>
Cheers,
--
Julien Grall
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Xen-devel] [PATCH v5 1/4] xen/rcu: don't use stop_machine_run() for rcu_barrier()
2020-03-13 11:02 ` Julien Grall
@ 2020-03-13 11:18 ` Jürgen Groß
2020-03-13 11:22 ` Jan Beulich
0 siblings, 1 reply; 9+ messages in thread
From: Jürgen Groß @ 2020-03-13 11:18 UTC (permalink / raw)
To: Julien Grall, xen-devel
Cc: Stefano Stabellini, Wei Liu, Andrew Cooper, Ian Jackson,
George Dunlap, Jan Beulich
On 13.03.20 12:02, Julien Grall wrote:
>
>
> On 12/03/2020 08:28, Juergen Gross wrote:
>> Today rcu_barrier() is calling stop_machine_run() to synchronize all
>> physical cpus in order to ensure all pending rcu calls have finished
>> when returning.
>>
>> As stop_machine_run() is using tasklets this requires scheduling of
>> idle vcpus on all cpus imposing the need to call rcu_barrier() on idle
>> cpus only in case of core scheduling being active, as otherwise a
>> scheduling deadlock would occur.
>>
>> There is no need at all to do the syncing of the cpus in tasklets, as
>> rcu activity is started in __do_softirq() called whenever softirq
>> activity is allowed. So rcu_barrier() can easily be modified to use
>> softirq for synchronization of the cpus no longer requiring any
>> scheduling activity.
>>
>> As there already is a rcu softirq reuse that for the synchronization.
>>
>> Remove the barrier element from struct rcu_data as it isn't used.
>>
>> Finally switch rcu_barrier() to return void as it now can never fail.
>>
>> Partially-based-on-patch-by: Igor Druzhinin <igor.druzhinin@citrix.com>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
>> ---
>> V2:
>> - add recursion detection
>>
>> V3:
>> - fix races (Igor Druzhinin)
>>
>> V5:
>> - rename done_count to pending_count (Jan Beulich)
>> - fix race (Jan Beulich)
>> ---
>> xen/common/rcupdate.c | 92
>> ++++++++++++++++++++++++++++++++--------------
>> xen/include/xen/rcupdate.h | 2 +-
>> 2 files changed, 66 insertions(+), 28 deletions(-)
>>
>> diff --git a/xen/common/rcupdate.c b/xen/common/rcupdate.c
>> index 03d84764d2..c5ef6acb1e 100644
>> --- a/xen/common/rcupdate.c
>> +++ b/xen/common/rcupdate.c
>> @@ -83,7 +83,6 @@ struct rcu_data {
>> struct rcu_head **donetail;
>> long blimit; /* Upper limit on a processed
>> batch */
>> int cpu;
>> - struct rcu_head barrier;
>> long last_rs_qlen; /* qlen during the last
>> resched */
>> /* 3) idle CPUs handling */
>> @@ -91,6 +90,7 @@ struct rcu_data {
>> bool idle_timer_active;
>> bool process_callbacks;
>> + bool barrier_active;
>> };
>> /*
>> @@ -143,51 +143,82 @@ static int qhimark = 10000;
>> static int qlowmark = 100;
>> static int rsinterval = 1000;
>> -struct rcu_barrier_data {
>> - struct rcu_head head;
>> - atomic_t *cpu_count;
>> -};
>> +/*
>> + * rcu_barrier() handling:
>> + * cpu_count holds the number of cpu required to finish barrier
>> handling.
>
> NIT: the number of cpus (I think)
>
>> + * pending_count is initialized to nr_cpus + 1.
>> + * Cpus are synchronized via softirq mechanism. rcu_barrier() is
>> regarded to
>> + * be active if pending_count is not zero. In case rcu_barrier() is
>> called on
>> + * multiple cpus it is enough to check for pending_count being not
>> zero on entry
>> + * and to call process_pending_softirqs() in a loop until
>> pending_count drops to
>> + * zero, before starting the new rcu_barrier() processing.
>> + * In order to avoid hangs when rcu_barrier() is called multiple
>> times on the
>> + * same cpu in fast sequence and a slave cpu couldn't drop out of the
>> + * barrier handling fast enough a second counter pending_count is
>> needed.
>
> As an aside question, don't we miss a memory barrier in
> rcu_barrier_callback or rcu_barrier_action()? This barrier would ensure
> that the RCU changes have been seen before we tell the "master" CPU we
> are done.
Sounds like a sensible idea.
>
>> + * The rcu_barrier() invoking cpu will wait until pending_count
>> reaches 1
>> + * (meaning that all cpus have finished processing the barrier) and
>> then will
>> + * reset pending_count to 0 to enable entering rcu_barrier() again.
>> + */
>> +static atomic_t cpu_count = ATOMIC_INIT(0);
>> +static atomic_t pending_count = ATOMIC_INIT(0);
>> static void rcu_barrier_callback(struct rcu_head *head)
>> {
>> - struct rcu_barrier_data *data = container_of(
>> - head, struct rcu_barrier_data, head);
>> - atomic_inc(data->cpu_count);
>> + atomic_dec(&cpu_count);
>> }
>> -static int rcu_barrier_action(void *_cpu_count)
>> +static void rcu_barrier_action(void)
>> {
>> - struct rcu_barrier_data data = { .cpu_count = _cpu_count };
>> -
>> - ASSERT(!local_irq_is_enabled());
>> - local_irq_enable();
>> + struct rcu_head head;
>> /*
>> * When callback is executed, all previously-queued RCU work on
>> this CPU
>> - * is completed. When all CPUs have executed their callback,
>> data.cpu_count
>> - * will have been incremented to include every online CPU.
>> + * is completed. When all CPUs have executed their callback,
>> cpu_count
>> + * will have been decremented to 0.
>> */
>> - call_rcu(&data.head, rcu_barrier_callback);
>> + call_rcu(&head, rcu_barrier_callback);
>> - while ( atomic_read(data.cpu_count) != num_online_cpus() )
>> + while ( atomic_read(&cpu_count) )
>> {
>> process_pending_softirqs();
>> cpu_relax();
>> }
>> - local_irq_disable();
>> -
>> - return 0;
>> + atomic_dec(&pending_count);
>> }
>> -/*
>> - * As rcu_barrier() is using stop_machine_run() it is allowed to be
>> used in
>> - * idle context only (see comment for stop_machine_run()).
>> - */
>> -int rcu_barrier(void)
>> +void rcu_barrier(void)
>> {
>> - atomic_t cpu_count = ATOMIC_INIT(0);
>> - return stop_machine_run(rcu_barrier_action, &cpu_count, NR_CPUS);
>> + unsigned int n_cpus;
>> +
>
> It would be good to spell out this code has to be called with interrupt
> enabled and not in an interrupt context.
I'll add an ASSERT().
>
>> + for ( ;; )
>> + {
>> + if ( !atomic_read(&pending_count) && get_cpu_maps() )
>> + {
>> + n_cpus = num_online_cpus();
>> +
>> + if ( atomic_cmpxchg(&pending_count, 0, n_cpus + 1) == 0 )
>> + break;
>> +
>> + put_cpu_maps();
>> + }
>> +
>> + process_pending_softirqs();
>> + cpu_relax();
>> + }
>> +
>> + atomic_set(&cpu_count, n_cpus);
>> + cpumask_raise_softirq(&cpu_online_map, RCU_SOFTIRQ);
>> +
>> + put_cpu_maps();
>
> If you put the CPU maps, wouldn't it be possible to have a CPU turned
> off? If not, can you add a comment in the code why this is safe?
Yes, you are right. This might be possible, even if rather
unlikely as a cpu being removed has to be in idle already, so
the pending softirq should be picked up rather fast.
Juergen
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Xen-devel] [PATCH v5 1/4] xen/rcu: don't use stop_machine_run() for rcu_barrier()
2020-03-13 11:18 ` Jürgen Groß
@ 2020-03-13 11:22 ` Jan Beulich
0 siblings, 0 replies; 9+ messages in thread
From: Jan Beulich @ 2020-03-13 11:22 UTC (permalink / raw)
To: Jürgen Groß
Cc: Stefano Stabellini, Julien Grall, Wei Liu, Andrew Cooper,
Ian Jackson, George Dunlap, xen-devel
On 13.03.2020 12:18, Jürgen Groß wrote:
> On 13.03.20 12:02, Julien Grall wrote:
>> On 12/03/2020 08:28, Juergen Gross wrote:
>>> + for ( ;; )
>>> + {
>>> + if ( !atomic_read(&pending_count) && get_cpu_maps() )
>>> + {
>>> + n_cpus = num_online_cpus();
>>> +
>>> + if ( atomic_cmpxchg(&pending_count, 0, n_cpus + 1) == 0 )
>>> + break;
>>> +
>>> + put_cpu_maps();
>>> + }
>>> +
>>> + process_pending_softirqs();
>>> + cpu_relax();
>>> + }
>>> +
>>> + atomic_set(&cpu_count, n_cpus);
>>> + cpumask_raise_softirq(&cpu_online_map, RCU_SOFTIRQ);
>>> +
>>> + put_cpu_maps();
>>
>> If you put the CPU maps, wouldn't it be possible to have a CPU turned
>> off? If not, can you add a comment in the code why this is safe?
>
> Yes, you are right. This might be possible, even if rather
> unlikely as a cpu being removed has to be in idle already, so
> the pending softirq should be picked up rather fast.
I think that's not the main aspect. The CPU to be removed may
already be spinning in cpu_hotplug_begin() (and may in particular
also already be past the rcu_barrier() that Igor's patch is going
to put there).
Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
^ permalink raw reply [flat|nested] 9+ messages in thread
* [Xen-devel] [PATCH v5 2/4] xen: don't process rcu callbacks when holding a rcu_read_lock()
2020-03-12 8:28 [Xen-devel] [PATCH v5 0/4] xen/rcu: let rcu work better with core scheduling Juergen Gross
2020-03-12 8:28 ` [Xen-devel] [PATCH v5 1/4] xen/rcu: don't use stop_machine_run() for rcu_barrier() Juergen Gross
@ 2020-03-12 8:28 ` Juergen Gross
2020-03-12 8:28 ` [Xen-devel] [PATCH v5 3/4] xen/rcu: add assertions to debug build Juergen Gross
2020-03-12 8:28 ` [Xen-devel] [PATCH v5 4/4] xen/rcu: add per-lock counter in debug builds Juergen Gross
3 siblings, 0 replies; 9+ messages in thread
From: Juergen Gross @ 2020-03-12 8:28 UTC (permalink / raw)
To: xen-devel
Cc: Juergen Gross, Stefano Stabellini, Julien Grall, Wei Liu,
Andrew Cooper, Ian Jackson, George Dunlap, Jan Beulich
Some keyhandlers are calling process_pending_softirqs() while holding
a rcu_read_lock(). This is wrong, as process_pending_softirqs() might
activate rcu calls which should not happen inside a rcu_read_lock().
For that purpose modify process_pending_softirqs() to not allow rcu
callback processing when a rcu_read_lock() is being held.
Signed-off-by: Juergen Gross <jgross@suse.com>
---
V3:
- add RCU_SOFTIRQ to ignore in process_pending_softirqs_norcu()
(Roger Pau Monné)
V5:
- block rcu processing depending on rch_read_lock() being held or not
(Jan Beulich)
---
xen/common/softirq.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/xen/common/softirq.c b/xen/common/softirq.c
index b83ad96d6c..00d676b62c 100644
--- a/xen/common/softirq.c
+++ b/xen/common/softirq.c
@@ -29,6 +29,7 @@ static void __do_softirq(unsigned long ignore_mask)
{
unsigned int i, cpu;
unsigned long pending;
+ bool rcu_allowed = !(ignore_mask & (1ul << RCU_SOFTIRQ));
for ( ; ; )
{
@@ -38,7 +39,7 @@ static void __do_softirq(unsigned long ignore_mask)
*/
cpu = smp_processor_id();
- if ( rcu_pending(cpu) )
+ if ( rcu_allowed && rcu_pending(cpu) )
rcu_check_callbacks(cpu);
if ( ((pending = (softirq_pending(cpu) & ~ignore_mask)) == 0)
@@ -53,9 +54,16 @@ static void __do_softirq(unsigned long ignore_mask)
void process_pending_softirqs(void)
{
+ unsigned long ignore_mask = (1ul << SCHEDULE_SOFTIRQ) |
+ (1ul << SCHED_SLAVE_SOFTIRQ);
+
+ /* Block RCU processing in case of rcu_read_lock() held. */
+ if ( preempt_count() )
+ ignore_mask |= 1ul << RCU_SOFTIRQ;
+
ASSERT(!in_irq() && local_irq_is_enabled());
/* Do not enter scheduler as it can preempt the calling context. */
- __do_softirq((1ul << SCHEDULE_SOFTIRQ) | (1ul << SCHED_SLAVE_SOFTIRQ));
+ __do_softirq(ignore_mask);
}
void do_softirq(void)
--
2.16.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [Xen-devel] [PATCH v5 3/4] xen/rcu: add assertions to debug build
2020-03-12 8:28 [Xen-devel] [PATCH v5 0/4] xen/rcu: let rcu work better with core scheduling Juergen Gross
2020-03-12 8:28 ` [Xen-devel] [PATCH v5 1/4] xen/rcu: don't use stop_machine_run() for rcu_barrier() Juergen Gross
2020-03-12 8:28 ` [Xen-devel] [PATCH v5 2/4] xen: don't process rcu callbacks when holding a rcu_read_lock() Juergen Gross
@ 2020-03-12 8:28 ` Juergen Gross
2020-03-13 8:09 ` Jürgen Groß
2020-03-12 8:28 ` [Xen-devel] [PATCH v5 4/4] xen/rcu: add per-lock counter in debug builds Juergen Gross
3 siblings, 1 reply; 9+ messages in thread
From: Juergen Gross @ 2020-03-12 8:28 UTC (permalink / raw)
To: xen-devel
Cc: Juergen Gross, Stefano Stabellini, Julien Grall, Wei Liu,
Andrew Cooper, Ian Jackson, George Dunlap, Jan Beulich
Xen's RCU implementation relies on no softirq handling taking place
while being in a RCU critical section. Add ASSERT()s in debug builds
in order to catch any violations.
For that purpose modify rcu_read_[un]lock() to use a dedicated percpu
counter instead of preempt_[en|dis]able() as this enables to test
that condition in __do_softirq() (ASSERT_NOT_IN_ATOMIC() is not
usable there due to __cpu_up() calling process_pending_softirqs()
while holding the cpu hotplug lock).
Dropping the now no longer needed #include of preempt.h in rcupdate.h
requires adding it in some sources.
While at it switch the rcu_read_[un]lock() implementation to static
inline functions instead of macros.
Signed-off-by: Juergen Gross <jgross@suse.com>
---
V3:
- add barriers to rcu_[en|dis]able() (Roger Pau Monné)
- add rcu_quiesce_allowed() to ASSERT_NOT_IN_ATOMIC (Roger Pau Monné)
- convert macros to static inline functions
- add sanity check in rcu_read_unlock()
V4:
- use barrier() in rcu_[en|dis]able() (Julien Grall)
V5:
- use rcu counter even if not using a debug build
---
xen/common/multicall.c | 1 +
xen/common/preempt.c | 5 ++++-
xen/common/rcupdate.c | 2 ++
xen/common/softirq.c | 4 +++-
xen/common/wait.c | 1 +
xen/include/xen/rcupdate.h | 35 +++++++++++++++++++++++++++++++----
6 files changed, 42 insertions(+), 6 deletions(-)
diff --git a/xen/common/multicall.c b/xen/common/multicall.c
index 5a199ebf8f..67f1a23485 100644
--- a/xen/common/multicall.c
+++ b/xen/common/multicall.c
@@ -10,6 +10,7 @@
#include <xen/multicall.h>
#include <xen/guest_access.h>
#include <xen/perfc.h>
+#include <xen/preempt.h>
#include <xen/trace.h>
#include <asm/current.h>
#include <asm/hardirq.h>
diff --git a/xen/common/preempt.c b/xen/common/preempt.c
index 3b4178fd44..8a351e644b 100644
--- a/xen/common/preempt.c
+++ b/xen/common/preempt.c
@@ -21,13 +21,15 @@
#include <xen/preempt.h>
#include <xen/irq.h>
+#include <xen/rcupdate.h>
#include <asm/system.h>
DEFINE_PER_CPU(unsigned int, __preempt_count);
bool_t in_atomic(void)
{
- return preempt_count() || in_irq() || !local_irq_is_enabled();
+ return preempt_count() || in_irq() || !local_irq_is_enabled() ||
+ !rcu_quiesce_allowed();
}
#ifndef NDEBUG
@@ -36,5 +38,6 @@ void ASSERT_NOT_IN_ATOMIC(void)
ASSERT(!preempt_count());
ASSERT(!in_irq());
ASSERT(local_irq_is_enabled());
+ ASSERT(rcu_quiesce_allowed());
}
#endif
diff --git a/xen/common/rcupdate.c b/xen/common/rcupdate.c
index c5ef6acb1e..d73735235d 100644
--- a/xen/common/rcupdate.c
+++ b/xen/common/rcupdate.c
@@ -46,6 +46,8 @@
#include <xen/cpu.h>
#include <xen/stop_machine.h>
+DEFINE_PER_CPU(unsigned int, rcu_lock_cnt);
+
/* Global control variables for rcupdate callback mechanism. */
static struct rcu_ctrlblk {
long cur; /* Current batch number. */
diff --git a/xen/common/softirq.c b/xen/common/softirq.c
index 00d676b62c..eba65c5fc0 100644
--- a/xen/common/softirq.c
+++ b/xen/common/softirq.c
@@ -31,6 +31,8 @@ static void __do_softirq(unsigned long ignore_mask)
unsigned long pending;
bool rcu_allowed = !(ignore_mask & (1ul << RCU_SOFTIRQ));
+ ASSERT(!rcu_allowed || rcu_quiesce_allowed());
+
for ( ; ; )
{
/*
@@ -58,7 +60,7 @@ void process_pending_softirqs(void)
(1ul << SCHED_SLAVE_SOFTIRQ);
/* Block RCU processing in case of rcu_read_lock() held. */
- if ( preempt_count() )
+ if ( !rcu_quiesce_allowed() )
ignore_mask |= 1ul << RCU_SOFTIRQ;
ASSERT(!in_irq() && local_irq_is_enabled());
diff --git a/xen/common/wait.c b/xen/common/wait.c
index 24716e7676..9cdb174036 100644
--- a/xen/common/wait.c
+++ b/xen/common/wait.c
@@ -19,6 +19,7 @@
* along with this program; If not, see <http://www.gnu.org/licenses/>.
*/
+#include <xen/preempt.h>
#include <xen/sched.h>
#include <xen/softirq.h>
#include <xen/wait.h>
diff --git a/xen/include/xen/rcupdate.h b/xen/include/xen/rcupdate.h
index 31c8b86d13..be807694e7 100644
--- a/xen/include/xen/rcupdate.h
+++ b/xen/include/xen/rcupdate.h
@@ -32,12 +32,32 @@
#define __XEN_RCUPDATE_H
#include <xen/cache.h>
+#include <xen/compiler.h>
#include <xen/spinlock.h>
#include <xen/cpumask.h>
-#include <xen/preempt.h>
+#include <xen/percpu.h>
#define __rcu
+DECLARE_PER_CPU(unsigned int, rcu_lock_cnt);
+
+static inline void rcu_quiesce_disable(void)
+{
+ this_cpu(rcu_lock_cnt)++;
+ barrier();
+}
+
+static inline void rcu_quiesce_enable(void)
+{
+ barrier();
+ this_cpu(rcu_lock_cnt)--;
+}
+
+static inline bool rcu_quiesce_allowed(void)
+{
+ return !this_cpu(rcu_lock_cnt);
+}
+
/**
* struct rcu_head - callback structure for use with RCU
* @next: next update requests in a list
@@ -91,16 +111,23 @@ typedef struct _rcu_read_lock rcu_read_lock_t;
* will be deferred until the outermost RCU read-side critical section
* completes.
*
- * It is illegal to block while in an RCU read-side critical section.
+ * It is illegal to process softirqs while in an RCU read-side critical section.
*/
-#define rcu_read_lock(x) ({ ((void)(x)); preempt_disable(); })
+static inline void rcu_read_lock(rcu_read_lock_t *lock)
+{
+ rcu_quiesce_disable();
+}
/**
* rcu_read_unlock - marks the end of an RCU read-side critical section.
*
* See rcu_read_lock() for more information.
*/
-#define rcu_read_unlock(x) ({ ((void)(x)); preempt_enable(); })
+static inline void rcu_read_unlock(rcu_read_lock_t *lock)
+{
+ ASSERT(!rcu_quiesce_allowed());
+ rcu_quiesce_enable();
+}
/*
* So where is rcu_write_lock()? It does not exist, as there is no
--
2.16.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [Xen-devel] [PATCH v5 3/4] xen/rcu: add assertions to debug build
2020-03-12 8:28 ` [Xen-devel] [PATCH v5 3/4] xen/rcu: add assertions to debug build Juergen Gross
@ 2020-03-13 8:09 ` Jürgen Groß
0 siblings, 0 replies; 9+ messages in thread
From: Jürgen Groß @ 2020-03-13 8:09 UTC (permalink / raw)
To: xen-devel
Cc: Stefano Stabellini, Julien Grall, Wei Liu, Andrew Cooper,
Ian Jackson, George Dunlap, Jan Beulich
On 12.03.20 09:28, Juergen Gross wrote:
> Xen's RCU implementation relies on no softirq handling taking place
> while being in a RCU critical section. Add ASSERT()s in debug builds
> in order to catch any violations.
>
> For that purpose modify rcu_read_[un]lock() to use a dedicated percpu
> counter instead of preempt_[en|dis]able() as this enables to test
> that condition in __do_softirq() (ASSERT_NOT_IN_ATOMIC() is not
> usable there due to __cpu_up() calling process_pending_softirqs()
> while holding the cpu hotplug lock).
>
> Dropping the now no longer needed #include of preempt.h in rcupdate.h
> requires adding it in some sources.
>
> While at it switch the rcu_read_[un]lock() implementation to static
> inline functions instead of macros.
>
> Signed-off-by: Juergen Gross <jgross@suse.com>
Depending on the acceptance of my just sent series for fixing
preemption disabling in locks I might send a fixup to this patch, too,
re-adding preempt_disable() to rcu_read_lock().
Juergen
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
^ permalink raw reply [flat|nested] 9+ messages in thread
* [Xen-devel] [PATCH v5 4/4] xen/rcu: add per-lock counter in debug builds
2020-03-12 8:28 [Xen-devel] [PATCH v5 0/4] xen/rcu: let rcu work better with core scheduling Juergen Gross
` (2 preceding siblings ...)
2020-03-12 8:28 ` [Xen-devel] [PATCH v5 3/4] xen/rcu: add assertions to debug build Juergen Gross
@ 2020-03-12 8:28 ` Juergen Gross
3 siblings, 0 replies; 9+ messages in thread
From: Juergen Gross @ 2020-03-12 8:28 UTC (permalink / raw)
To: xen-devel
Cc: Juergen Gross, Stefano Stabellini, Julien Grall, Wei Liu,
Andrew Cooper, Ian Jackson, George Dunlap, Jan Beulich
Add a lock specific counter to rcu read locks in debug builds. This
allows to test for matching lock/unlock calls.
This will help to avoid cases like the one fixed by commit
98ed1f43cc2c89 where different rcu read locks were referenced in the
lock and unlock calls.
Signed-off-by: Juergen Gross <jgross@suse.com>
---
V5:
- updated commit message (Jan Beulich)
---
xen/include/xen/rcupdate.h | 46 +++++++++++++++++++++++++++++++++-------------
1 file changed, 33 insertions(+), 13 deletions(-)
diff --git a/xen/include/xen/rcupdate.h b/xen/include/xen/rcupdate.h
index be807694e7..3e9b0b180e 100644
--- a/xen/include/xen/rcupdate.h
+++ b/xen/include/xen/rcupdate.h
@@ -36,20 +36,49 @@
#include <xen/spinlock.h>
#include <xen/cpumask.h>
#include <xen/percpu.h>
+#include <asm/atomic.h>
#define __rcu
+#ifndef NDEBUG
+/* * Lock type for passing to rcu_read_{lock,unlock}. */
+struct _rcu_read_lock {
+ atomic_t cnt;
+};
+typedef struct _rcu_read_lock rcu_read_lock_t;
+#define DEFINE_RCU_READ_LOCK(x) rcu_read_lock_t x = { .cnt = ATOMIC_INIT(0) }
+#define RCU_READ_LOCK_INIT(x) atomic_set(&(x)->cnt, 0)
+
+#else
+/*
+ * Dummy lock type for passing to rcu_read_{lock,unlock}. Currently exists
+ * only to document the reason for rcu_read_lock() critical sections.
+ */
+struct _rcu_read_lock {};
+typedef struct _rcu_read_lock rcu_read_lock_t;
+#define DEFINE_RCU_READ_LOCK(x) rcu_read_lock_t x
+#define RCU_READ_LOCK_INIT(x)
+
+#endif
+
DECLARE_PER_CPU(unsigned int, rcu_lock_cnt);
-static inline void rcu_quiesce_disable(void)
+static inline void rcu_quiesce_disable(rcu_read_lock_t *lock)
{
this_cpu(rcu_lock_cnt)++;
+#ifndef NDEBUG
+ atomic_inc(&lock->cnt);
+#endif
barrier();
}
-static inline void rcu_quiesce_enable(void)
+static inline void rcu_quiesce_enable(rcu_read_lock_t *lock)
{
barrier();
+#ifndef NDEBUG
+ ASSERT(atomic_read(&lock->cnt));
+ atomic_dec(&lock->cnt);
+#endif
this_cpu(rcu_lock_cnt)--;
}
@@ -78,15 +107,6 @@ struct rcu_head {
int rcu_pending(int cpu);
int rcu_needs_cpu(int cpu);
-/*
- * Dummy lock type for passing to rcu_read_{lock,unlock}. Currently exists
- * only to document the reason for rcu_read_lock() critical sections.
- */
-struct _rcu_read_lock {};
-typedef struct _rcu_read_lock rcu_read_lock_t;
-#define DEFINE_RCU_READ_LOCK(x) rcu_read_lock_t x
-#define RCU_READ_LOCK_INIT(x)
-
/**
* rcu_read_lock - mark the beginning of an RCU read-side critical section.
*
@@ -115,7 +135,7 @@ typedef struct _rcu_read_lock rcu_read_lock_t;
*/
static inline void rcu_read_lock(rcu_read_lock_t *lock)
{
- rcu_quiesce_disable();
+ rcu_quiesce_disable(lock);
}
/**
@@ -126,7 +146,7 @@ static inline void rcu_read_lock(rcu_read_lock_t *lock)
static inline void rcu_read_unlock(rcu_read_lock_t *lock)
{
ASSERT(!rcu_quiesce_allowed());
- rcu_quiesce_enable();
+ rcu_quiesce_enable(lock);
}
/*
--
2.16.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
^ permalink raw reply related [flat|nested] 9+ messages in thread