All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/2] accel/tcg: Fix monitor deadlock
@ 2021-10-19  5:56 Greg Kurz
  2021-10-19  5:56 ` [PATCH v2 1/2] rcu: Introduce force_rcu notifier Greg Kurz
  2021-10-19  5:56 ` [PATCH v2 2/2] accel/tcg: Register a " Greg Kurz
  0 siblings, 2 replies; 5+ messages in thread
From: Greg Kurz @ 2021-10-19  5:56 UTC (permalink / raw)
  To: qemu-devel
  Cc: Eduardo Habkost, Richard Henderson, Greg Kurz, qemu-stable,
	Paolo Bonzini

Commit 7bed89958bfb ("device_core: use drain_call_rcu in in qmp_device_add")
introduced a regression in QEMU 6.0 : passing device_add without argument
hangs the monitor. This was reported against qemu-system-mips64 with TGC,
but I could consistently reproduce it with other targets (x86 and ppc64).

See https://gitlab.com/qemu-project/qemu/-/issues/650 for details.

The problem is that an emulated busy-looping vCPU can stay forever in
its RCU read-side critical section and prevent drain_call_rcu() to return.
This series fixes the issue by letting RCU kick vCPUs out of the read-side
critical section when drain_call_rcu() is in progress. This is achieved
through notifiers, as suggested by Paolo Bonzini.

v2:
- moved notifier list to RCU reader data
- separate API for notifier registration
- CPUState passed as an opaque pointer

Greg Kurz (2):
  rcu: Introduce force_rcu notifier
  accel/tcg: Register a force_rcu notifier

 accel/tcg/tcg-accel-ops-mttcg.c |  3 +++
 accel/tcg/tcg-accel-ops-rr.c    |  3 +++
 accel/tcg/tcg-accel-ops.c       | 15 +++++++++++++++
 accel/tcg/tcg-accel-ops.h       |  2 ++
 include/qemu/rcu.h              | 16 ++++++++++++++++
 util/rcu.c                      | 22 +++++++++++++++++++++-
 6 files changed, 60 insertions(+), 1 deletion(-)

-- 
2.31.1




^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 1/2] rcu: Introduce force_rcu notifier
  2021-10-19  5:56 [PATCH v2 0/2] accel/tcg: Fix monitor deadlock Greg Kurz
@ 2021-10-19  5:56 ` Greg Kurz
  2021-10-19 11:26   ` Paolo Bonzini
  2021-10-19  5:56 ` [PATCH v2 2/2] accel/tcg: Register a " Greg Kurz
  1 sibling, 1 reply; 5+ messages in thread
From: Greg Kurz @ 2021-10-19  5:56 UTC (permalink / raw)
  To: qemu-devel
  Cc: Eduardo Habkost, Richard Henderson, Greg Kurz, qemu-stable,
	Paolo Bonzini

The drain_rcu_call() function can be blocked as long as an RCU reader
stays in a read-side critical section. This is typically what happens
when a TCG vCPU is executing a busy loop. It can deadlock the QEMU
monitor as reported in https://gitlab.com/qemu-project/qemu/-/issues/650 .

This can be avoided by allowing drain_rcu_call() to enforce an RCU grace
period. Since each reader might need to do specific actions to end a
read-side critical section, do it with notifiers.

Prepare ground for this by adding a notifier list to the RCU reader
struct and use it in wait_for_readers() if drain_rcu_call() is in
progress. An API is added for readers to register their notifiers.

This is largely based on a draft from Paolo Bonzini.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
---
 include/qemu/rcu.h | 16 ++++++++++++++++
 util/rcu.c         | 22 +++++++++++++++++++++-
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index 515d327cf11c..d8c4fd8686b4 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -27,6 +27,7 @@
 #include "qemu/thread.h"
 #include "qemu/queue.h"
 #include "qemu/atomic.h"
+#include "qemu/notify.h"
 #include "qemu/sys_membarrier.h"
 
 #ifdef __cplusplus
@@ -66,6 +67,14 @@ struct rcu_reader_data {
 
     /* Data used for registry, protected by rcu_registry_lock */
     QLIST_ENTRY(rcu_reader_data) node;
+
+    /*
+     * NotifierList used to force an RCU grace period.  Accessed under
+     * rcu_registry_lock.  Note that the notifier is called _outside_
+     * the thread!
+     */
+    NotifierList force_rcu;
+    void *force_rcu_data;
 };
 
 extern __thread struct rcu_reader_data rcu_reader;
@@ -180,6 +189,13 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(RCUReadAuto, rcu_read_auto_unlock)
 #define RCU_READ_LOCK_GUARD() \
     g_autoptr(RCUReadAuto) _rcu_read_auto __attribute__((unused)) = rcu_read_auto_lock()
 
+/*
+ * Force-RCU notifiers tell readers that they should exit their
+ * read-side critical section.
+ */
+void rcu_add_force_rcu_notifier(Notifier *n, void *data);
+void rcu_remove_force_rcu_notifier(Notifier *n);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/util/rcu.c b/util/rcu.c
index 13ac0f75cb2a..0c68f068e23d 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -46,6 +46,7 @@
 unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
 
 QemuEvent rcu_gp_event;
+static int in_drain_call_rcu;
 static QemuMutex rcu_registry_lock;
 static QemuMutex rcu_sync_lock;
 
@@ -107,6 +108,8 @@ static void wait_for_readers(void)
                  * get some extra futex wakeups.
                  */
                 qatomic_set(&index->waiting, false);
+            } else if (qatomic_read(&in_drain_call_rcu)) {
+                notifier_list_notify(&index->force_rcu, index->force_rcu_data);
             }
         }
 
@@ -293,7 +296,6 @@ void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node))
     qemu_event_set(&rcu_call_ready_event);
 }
 
-
 struct rcu_drain {
     struct rcu_head rcu;
     QemuEvent drain_complete_event;
@@ -339,8 +341,10 @@ void drain_call_rcu(void)
      * assumed.
      */
 
+    qatomic_inc(&in_drain_call_rcu);
     call_rcu1(&rcu_drain.rcu, drain_rcu_callback);
     qemu_event_wait(&rcu_drain.drain_complete_event);
+    qatomic_dec(&in_drain_call_rcu);
 
     if (locked) {
         qemu_mutex_lock_iothread();
@@ -363,6 +367,22 @@ void rcu_unregister_thread(void)
     qemu_mutex_unlock(&rcu_registry_lock);
 }
 
+void rcu_add_force_rcu_notifier(Notifier *n, void *data)
+{
+    qemu_mutex_lock(&rcu_registry_lock);
+    notifier_list_add(&rcu_reader.force_rcu, n);
+    rcu_reader.force_rcu_data = data;
+    qemu_mutex_unlock(&rcu_registry_lock);
+}
+
+void rcu_remove_force_rcu_notifier(Notifier *n)
+{
+    qemu_mutex_lock(&rcu_registry_lock);
+    rcu_reader.force_rcu_data = NULL;
+    notifier_remove(n);
+    qemu_mutex_unlock(&rcu_registry_lock);
+}
+
 static void rcu_init_complete(void)
 {
     QemuThread thread;
-- 
2.31.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 2/2] accel/tcg: Register a force_rcu notifier
  2021-10-19  5:56 [PATCH v2 0/2] accel/tcg: Fix monitor deadlock Greg Kurz
  2021-10-19  5:56 ` [PATCH v2 1/2] rcu: Introduce force_rcu notifier Greg Kurz
@ 2021-10-19  5:56 ` Greg Kurz
  1 sibling, 0 replies; 5+ messages in thread
From: Greg Kurz @ 2021-10-19  5:56 UTC (permalink / raw)
  To: qemu-devel
  Cc: Eduardo Habkost, Richard Henderson, Greg Kurz, qemu-stable,
	Paolo Bonzini

A TCG vCPU doing a busy loop systematicaly hangs the QEMU monitor
if the user passes 'device_add' without argument. This is because
drain_cpu_all() which is called from qmp_device_add() cannot return
if readers don't exit read-side critical sections. That is typically
what busy-looping TCG vCPUs do, both in MTTCG and RR modes:

int cpu_exec(CPUState *cpu)
{
[...]
    rcu_read_lock();
[...]
    while (!cpu_handle_exception(cpu, &ret)) {
        // Busy loop keeps vCPU here
    }
[...]
    rcu_read_unlock();

    return ret;
}

Have all vCPUs register a force_rcu notifier that will kick them out
of the loop using async_run_on_cpu(). The notifier is called with the
rcu_registry_lock mutex held, using async_run_on_cpu() ensures there
are no deadlocks.

The notifier implementation is shared by MTTCG and RR since both are
affected.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Fixes: 7bed89958bfb ("device_core: use drain_call_rcu in in qmp_device_add")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/650
Signed-off-by: Greg Kurz <groug@kaod.org>
---
 accel/tcg/tcg-accel-ops-mttcg.c |  3 +++
 accel/tcg/tcg-accel-ops-rr.c    |  3 +++
 accel/tcg/tcg-accel-ops.c       | 15 +++++++++++++++
 accel/tcg/tcg-accel-ops.h       |  2 ++
 4 files changed, 23 insertions(+)

diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index 847d2079d21f..ea4a3217ce3f 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -44,11 +44,13 @@
 static void *mttcg_cpu_thread_fn(void *arg)
 {
     CPUState *cpu = arg;
+    Notifier force_rcu = { .notify = tcg_cpus_force_rcu };
 
     assert(tcg_enabled());
     g_assert(!icount_enabled());
 
     rcu_register_thread();
+    rcu_add_force_rcu_notifier(&force_rcu, cpu);
     tcg_register_thread();
 
     qemu_mutex_lock_iothread();
@@ -100,6 +102,7 @@ static void *mttcg_cpu_thread_fn(void *arg)
 
     tcg_cpus_destroy(cpu);
     qemu_mutex_unlock_iothread();
+    rcu_remove_force_rcu_notifier(&force_rcu);
     rcu_unregister_thread();
     return NULL;
 }
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index a5fd26190e20..fc0c4905caf5 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -144,9 +144,11 @@ static void rr_deal_with_unplugged_cpus(void)
 static void *rr_cpu_thread_fn(void *arg)
 {
     CPUState *cpu = arg;
+    Notifier force_rcu = { .notify = tcg_cpus_force_rcu };
 
     assert(tcg_enabled());
     rcu_register_thread();
+    rcu_add_force_rcu_notifier(&force_rcu, cpu);
     tcg_register_thread();
 
     qemu_mutex_lock_iothread();
@@ -255,6 +257,7 @@ static void *rr_cpu_thread_fn(void *arg)
         rr_deal_with_unplugged_cpus();
     }
 
+    rcu_remove_force_rcu_notifier(&force_rcu);
     rcu_unregister_thread();
     return NULL;
 }
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 1a8e8390bd60..7f0d2b06044a 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -91,6 +91,21 @@ void tcg_handle_interrupt(CPUState *cpu, int mask)
     }
 }
 
+static void do_nothing(CPUState *cpu, run_on_cpu_data d)
+{
+}
+
+void tcg_cpus_force_rcu(Notifier *notify, void *data)
+{
+    CPUState *cpu = data;
+
+    /*
+     * Called with rcu_registry_lock held, using async_run_on_cpu() ensures
+     * that there are no deadlocks.
+     */
+    async_run_on_cpu(cpu, do_nothing, RUN_ON_CPU_NULL);
+}
+
 static void tcg_accel_ops_init(AccelOpsClass *ops)
 {
     if (qemu_tcg_mttcg_enabled()) {
diff --git a/accel/tcg/tcg-accel-ops.h b/accel/tcg/tcg-accel-ops.h
index 6a5fcef88980..8742041c8aea 100644
--- a/accel/tcg/tcg-accel-ops.h
+++ b/accel/tcg/tcg-accel-ops.h
@@ -18,5 +18,7 @@ void tcg_cpus_destroy(CPUState *cpu);
 int tcg_cpus_exec(CPUState *cpu);
 void tcg_handle_interrupt(CPUState *cpu, int mask);
 void tcg_cpu_init_cflags(CPUState *cpu, bool parallel);
+/* Common force_rcu notifier for MTTCG and RR */
+void tcg_cpus_force_rcu(Notifier *notify, void *data);
 
 #endif /* TCG_CPUS_H */
-- 
2.31.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/2] rcu: Introduce force_rcu notifier
  2021-10-19  5:56 ` [PATCH v2 1/2] rcu: Introduce force_rcu notifier Greg Kurz
@ 2021-10-19 11:26   ` Paolo Bonzini
  2021-10-20  7:59     ` Greg Kurz
  0 siblings, 1 reply; 5+ messages in thread
From: Paolo Bonzini @ 2021-10-19 11:26 UTC (permalink / raw)
  To: Greg Kurz, qemu-devel; +Cc: Richard Henderson, Eduardo Habkost, qemu-stable

On 19/10/21 07:56, Greg Kurz wrote:
> The drain_rcu_call() function can be blocked as long as an RCU reader
> stays in a read-side critical section. This is typically what happens
> when a TCG vCPU is executing a busy loop. It can deadlock the QEMU
> monitor as reported in https://gitlab.com/qemu-project/qemu/-/issues/650 .
> 
> This can be avoided by allowing drain_rcu_call() to enforce an RCU grace
> period. Since each reader might need to do specific actions to end a
> read-side critical section, do it with notifiers.
> 
> Prepare ground for this by adding a notifier list to the RCU reader
> struct and use it in wait_for_readers() if drain_rcu_call() is in
> progress. An API is added for readers to register their notifiers.
> 
> This is largely based on a draft from Paolo Bonzini.
> 
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---
>   include/qemu/rcu.h | 16 ++++++++++++++++
>   util/rcu.c         | 22 +++++++++++++++++++++-
>   2 files changed, 37 insertions(+), 1 deletion(-)
> 
> diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
> index 515d327cf11c..d8c4fd8686b4 100644
> --- a/include/qemu/rcu.h
> +++ b/include/qemu/rcu.h
> @@ -27,6 +27,7 @@
>   #include "qemu/thread.h"
>   #include "qemu/queue.h"
>   #include "qemu/atomic.h"
> +#include "qemu/notify.h"
>   #include "qemu/sys_membarrier.h"
>   
>   #ifdef __cplusplus
> @@ -66,6 +67,14 @@ struct rcu_reader_data {
>   
>       /* Data used for registry, protected by rcu_registry_lock */
>       QLIST_ENTRY(rcu_reader_data) node;
> +
> +    /*
> +     * NotifierList used to force an RCU grace period.  Accessed under
> +     * rcu_registry_lock.  Note that the notifier is called _outside_
> +     * the thread!
> +     */
> +    NotifierList force_rcu;
> +    void *force_rcu_data;

This is a bit ugly because the force_rcu_data is shared across all 
notifiers.  Sure right now we have only one, but still the data argument 
should be in rcu_register_thread rather than rcu_add_force_rcu_notifier.

It's a pity because I liked the Notifier local variable...  But after 
thinking about it more and deleting some suggestions that won't work, 
it's just easiest to have the notifier in CPUState.

Maybe even move the unregistration to the existing function 
tcg_cpus_destroy, and add tcg_cpus_init that calls tcg_register_thread() 
and rcu_add_force_rcu_notifier().  This way you don't have to export 
tcg_cpus_force_rcu, and the tcg-accel-ops.h APIs are a bit more tidy.

Paolo

> +void rcu_add_force_rcu_notifier(Notifier *n, void *data)
> +{
> +    qemu_mutex_lock(&rcu_registry_lock);
> +    notifier_list_add(&rcu_reader.force_rcu, n);
> +    rcu_reader.force_rcu_data = data;
> +    qemu_mutex_unlock(&rcu_registry_lock);
> +}
> +
> +void rcu_remove_force_rcu_notifier(Notifier *n)
> +{
> +    qemu_mutex_lock(&rcu_registry_lock);
> +    rcu_reader.force_rcu_data = NULL;
> +    notifier_remove(n);
> +    qemu_mutex_unlock(&rcu_registry_lock);
> +}
> +
>   static void rcu_init_complete(void)
>   {
>       QemuThread thread;
> 



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/2] rcu: Introduce force_rcu notifier
  2021-10-19 11:26   ` Paolo Bonzini
@ 2021-10-20  7:59     ` Greg Kurz
  0 siblings, 0 replies; 5+ messages in thread
From: Greg Kurz @ 2021-10-20  7:59 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-stable, Richard Henderson, qemu-devel, Eduardo Habkost

On Tue, 19 Oct 2021 13:26:25 +0200
Paolo Bonzini <pbonzini@redhat.com> wrote:

> On 19/10/21 07:56, Greg Kurz wrote:
> > The drain_rcu_call() function can be blocked as long as an RCU reader
> > stays in a read-side critical section. This is typically what happens
> > when a TCG vCPU is executing a busy loop. It can deadlock the QEMU
> > monitor as reported in https://gitlab.com/qemu-project/qemu/-/issues/650 .
> > 
> > This can be avoided by allowing drain_rcu_call() to enforce an RCU grace
> > period. Since each reader might need to do specific actions to end a
> > read-side critical section, do it with notifiers.
> > 
> > Prepare ground for this by adding a notifier list to the RCU reader
> > struct and use it in wait_for_readers() if drain_rcu_call() is in
> > progress. An API is added for readers to register their notifiers.
> > 
> > This is largely based on a draft from Paolo Bonzini.
> > 
> > Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > ---
> >   include/qemu/rcu.h | 16 ++++++++++++++++
> >   util/rcu.c         | 22 +++++++++++++++++++++-
> >   2 files changed, 37 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
> > index 515d327cf11c..d8c4fd8686b4 100644
> > --- a/include/qemu/rcu.h
> > +++ b/include/qemu/rcu.h
> > @@ -27,6 +27,7 @@
> >   #include "qemu/thread.h"
> >   #include "qemu/queue.h"
> >   #include "qemu/atomic.h"
> > +#include "qemu/notify.h"
> >   #include "qemu/sys_membarrier.h"
> >   
> >   #ifdef __cplusplus
> > @@ -66,6 +67,14 @@ struct rcu_reader_data {
> >   
> >       /* Data used for registry, protected by rcu_registry_lock */
> >       QLIST_ENTRY(rcu_reader_data) node;
> > +
> > +    /*
> > +     * NotifierList used to force an RCU grace period.  Accessed under
> > +     * rcu_registry_lock.  Note that the notifier is called _outside_
> > +     * the thread!
> > +     */
> > +    NotifierList force_rcu;
> > +    void *force_rcu_data;
> 
> This is a bit ugly because the force_rcu_data is shared across all 
> notifiers.  Sure right now we have only one, but still the data argument 
> should be in rcu_register_thread rather than rcu_add_force_rcu_notifier.
> 

I don't quite see why we'd need more than one notifier, but indeed
this isn't conceptually correct.

> It's a pity because I liked the Notifier local variable...  But after 
> thinking about it more and deleting some suggestions that won't work, 
> it's just easiest to have the notifier in CPUState.
> 

Agreed.

> Maybe even move the unregistration to the existing function 
> tcg_cpus_destroy, and add tcg_cpus_init that calls tcg_register_thread() 
> and rcu_add_force_rcu_notifier().  This way you don't have to export 
> tcg_cpus_force_rcu, and the tcg-accel-ops.h APIs are a bit more tidy.
> 

I don't think we can do that because of round-robin : we only have one
thread in this case but tcg_cpus_destroy() must still be called for all
vCPUs. Also, a single notifier will work just fine no matter which
vCPU is running when wait_for_readers() is called if I understand
correctly how round-robin works.

> Paolo
> 
> > +void rcu_add_force_rcu_notifier(Notifier *n, void *data)
> > +{
> > +    qemu_mutex_lock(&rcu_registry_lock);
> > +    notifier_list_add(&rcu_reader.force_rcu, n);
> > +    rcu_reader.force_rcu_data = data;
> > +    qemu_mutex_unlock(&rcu_registry_lock);
> > +}
> > +
> > +void rcu_remove_force_rcu_notifier(Notifier *n)
> > +{
> > +    qemu_mutex_lock(&rcu_registry_lock);
> > +    rcu_reader.force_rcu_data = NULL;
> > +    notifier_remove(n);
> > +    qemu_mutex_unlock(&rcu_registry_lock);
> > +}
> > +
> >   static void rcu_init_complete(void)
> >   {
> >       QemuThread thread;
> > 
> 



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-10-20  8:01 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-19  5:56 [PATCH v2 0/2] accel/tcg: Fix monitor deadlock Greg Kurz
2021-10-19  5:56 ` [PATCH v2 1/2] rcu: Introduce force_rcu notifier Greg Kurz
2021-10-19 11:26   ` Paolo Bonzini
2021-10-20  7:59     ` Greg Kurz
2021-10-19  5:56 ` [PATCH v2 2/2] accel/tcg: Register a " Greg Kurz

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.