All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU
@ 2014-02-12 19:09 Mike Day
  2014-02-13  9:11 ` Alex Bligh
  2014-02-15 10:23 ` Alex Bligh
  0 siblings, 2 replies; 10+ messages in thread
From: Mike Day @ 2014-02-12 19:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, alex, peter.maydell


Allow readers to use RCU when reading Qemu timer lists. Applies to
Paolo Bonzini's RCU branch, https://github.com/bonzini/qemu/tree/rcu.

This patch is for comment and review only.  The rcu branch needs to be
rebased on upstream.

Signed-off-by: Mike Day <ncmike@ncultra.org>
---
 include/qemu/timer.h |  9 +++++-
 qemu-timer.c         | 88 +++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 67 insertions(+), 30 deletions(-)

diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index b58903b..5aaa213 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -4,7 +4,13 @@
 #include "qemu/typedefs.h"
 #include "qemu-common.h"
 #include "qemu/notify.h"
-
+#ifdef __GNUC__
+#ifndef __ATOMIC_RELEASE
+#define __ATOMIC_RELEASE
+#endif
+#endif
+#include "qemu/atomic.h"
+#include "qemu/rcu.h"
 /* timers */
 
 #define SCALE_MS 1000000
@@ -61,6 +67,7 @@ struct QEMUTimer {
     void *opaque;
     QEMUTimer *next;
     int scale;
+    struct rcu_head rcu;
 };
 
 extern QEMUTimerListGroup main_loop_tlg;
diff --git a/qemu-timer.c b/qemu-timer.c
index 6b62e88..27285ca 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -29,6 +29,7 @@
 #include "hw/hw.h"
 
 #include "qemu/timer.h"
+#include "qemu/rcu_queue.h"
 #ifdef CONFIG_POSIX
 #include <pthread.h>
 #endif
@@ -49,7 +50,6 @@ typedef struct QEMUClock {
 
     NotifierList reset_notifiers;
     int64_t last;
-
     QEMUClockType type;
     bool enabled;
 } QEMUClock;
@@ -71,6 +71,7 @@ struct QEMUTimerList {
     QLIST_ENTRY(QEMUTimerList) list;
     QEMUTimerListNotifyCB *notify_cb;
     void *notify_opaque;
+    struct rcu_head rcu;
 };
 
 /**
@@ -107,6 +108,12 @@ QEMUTimerList *timerlist_new(QEMUClockType type,
     return timer_list;
 }
 
+static void reclaim_timer_list(struct rcu_head *rcu)
+{
+    QEMUTimerList *timer_list = container_of(rcu, QEMUTimerList, rcu);
+    g_free(timer_list);
+}
+
 void timerlist_free(QEMUTimerList *timer_list)
 {
     assert(!timerlist_has_timers(timer_list));
@@ -114,7 +121,7 @@ void timerlist_free(QEMUTimerList *timer_list)
         QLIST_REMOVE(timer_list, list);
     }
     qemu_mutex_destroy(&timer_list->active_timers_lock);
-    g_free(timer_list);
+    call_rcu1(&timer_list->rcu, reclaim_timer_list);
 }
 
 static void qemu_clock_init(QEMUClockType type)
@@ -138,9 +145,11 @@ void qemu_clock_notify(QEMUClockType type)
 {
     QEMUTimerList *timer_list;
     QEMUClock *clock = qemu_clock_ptr(type);
-    QLIST_FOREACH(timer_list, &clock->timerlists, list) {
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(timer_list, &clock->timerlists, list) {
         timerlist_notify(timer_list);
     }
+    rcu_read_unlock();
 }
 
 void qemu_clock_enable(QEMUClockType type, bool enabled)
@@ -155,7 +164,7 @@ void qemu_clock_enable(QEMUClockType type, bool enabled)
 
 bool timerlist_has_timers(QEMUTimerList *timer_list)
 {
-    return !!timer_list->active_timers;
+    return !!atomic_rcu_read(&timer_list->active_timers);
 }
 
 bool qemu_clock_has_timers(QEMUClockType type)
@@ -168,15 +177,15 @@ bool timerlist_expired(QEMUTimerList *timer_list)
 {
     int64_t expire_time;
 
-    qemu_mutex_lock(&timer_list->active_timers_lock);
-    if (!timer_list->active_timers) {
-        qemu_mutex_unlock(&timer_list->active_timers_lock);
+    rcu_read_lock();
+    if (atomic_rcu_read(&timer_list->active_timers)) {
+        rcu_read_unlock();
         return false;
     }
-    expire_time = timer_list->active_timers->expire_time;
-    qemu_mutex_unlock(&timer_list->active_timers_lock);
 
+    expire_time = timer_list->active_timers->expire_time;
     return expire_time < qemu_clock_get_ns(timer_list->clock->type);
+    rcu_read_unlock();
 }
 
 bool qemu_clock_expired(QEMUClockType type)
@@ -194,8 +203,10 @@ int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
 {
     int64_t delta;
     int64_t expire_time;
-
-    if (!timer_list->clock->enabled) {
+    bool enabled;
+    rcu_read_lock();
+    enabled = atomic_rcu_read(&timer_list->clock->enabled);
+    if (!enabled) {
         return -1;
     }
 
@@ -203,16 +214,13 @@ int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
      * value but ->notify_cb() is called when the deadline changes.  Therefore
      * the caller should notice the change and there is no race condition.
      */
-    qemu_mutex_lock(&timer_list->active_timers_lock);
     if (!timer_list->active_timers) {
-        qemu_mutex_unlock(&timer_list->active_timers_lock);
+        rcu_read_unlock();
         return -1;
     }
     expire_time = timer_list->active_timers->expire_time;
-    qemu_mutex_unlock(&timer_list->active_timers_lock);
-
     delta = expire_time - qemu_clock_get_ns(timer_list->clock->type);
-
+    rcu_read_unlock();
     if (delta <= 0) {
         return 0;
     }
@@ -230,16 +238,22 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type)
     int64_t deadline = -1;
     QEMUTimerList *timer_list;
     QEMUClock *clock = qemu_clock_ptr(type);
-    QLIST_FOREACH(timer_list, &clock->timerlists, list) {
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(timer_list, &clock->timerlists, list) {
         deadline = qemu_soonest_timeout(deadline,
                                         timerlist_deadline_ns(timer_list));
     }
+    rcu_read_unlock();
     return deadline;
 }
 
 QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list)
 {
-    return timer_list->clock->type;
+    QEMUClockType type;
+    rcu_read_lock();
+    type = atomic_rcu_read(&timer_list->clock->type);
+    rcu_read_unlock();
+    return type;
 }
 
 QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type)
@@ -249,11 +263,13 @@ QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type)
 
 void timerlist_notify(QEMUTimerList *timer_list)
 {
-    if (timer_list->notify_cb) {
+    rcu_read_lock();
+    if (atomic_rcu_read(&timer_list->notify_cb)) {
         timer_list->notify_cb(timer_list->notify_opaque);
     } else {
         qemu_notify_event();
     }
+    rcu_read_unlock();
 }
 
 /* Transition function to convert a nanosecond timeout to ms
@@ -308,18 +324,24 @@ void timer_init(QEMUTimer *ts,
                 QEMUTimerList *timer_list, int scale,
                 QEMUTimerCB *cb, void *opaque)
 {
-    ts->timer_list = timer_list;
     ts->cb = cb;
     ts->opaque = opaque;
     ts->scale = scale;
     ts->expire_time = -1;
+    ts->timer_list = timer_list;
 }
 
-void timer_free(QEMUTimer *ts)
+static void reclaim_timer(struct rcu_head *rcu)
 {
+    QEMUTimer *ts = container_of(rcu, QEMUTimer, rcu);
     g_free(ts);
 }
 
+void timer_free(QEMUTimer *ts)
+{
+    call_rcu1(&ts->rcu, reclaim_timer);
+}
+
 static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
 {
     QEMUTimer **pt, *t;
@@ -331,7 +353,7 @@ static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
         if (!t)
             break;
         if (t == ts) {
-            *pt = t->next;
+            atomic_rcu_set(pt, t->next);
             break;
         }
         pt = &t->next;
@@ -369,15 +391,17 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
     }
     ts->expire_time = MAX(expire_time, 0);
     ts->next = *pt;
-    *pt = ts;
+    atomic_rcu_set(pt, ts);
     qemu_mutex_unlock(&timer_list->active_timers_lock);
-
     /* Rearm if necessary  */
+    rcu_read_unlock();
     if (pt == &timer_list->active_timers) {
         /* Interrupt execution to force deadline recalculation.  */
         qemu_clock_warp(timer_list->clock->type);
         timerlist_notify(timer_list);
     }
+    rcu_read_unlock();
+
 }
 
 void timer_mod(QEMUTimer *ts, int64_t expire_time)
@@ -402,30 +426,35 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
     bool progress = false;
     QEMUTimerCB *cb;
     void *opaque;
+    bool enabled;
 
-    if (!timer_list->clock->enabled) {
+    enabled = atomic_rcu_read(&timer_list->clock->enabled);
+    if (!enabled) {
         return progress;
     }
-
+    rcu_read_lock();
     current_time = qemu_clock_get_ns(timer_list->clock->type);
     for(;;) {
-        qemu_mutex_lock(&timer_list->active_timers_lock);
         ts = timer_list->active_timers;
         if (!timer_expired_ns(ts, current_time)) {
-            qemu_mutex_unlock(&timer_list->active_timers_lock);
+            rcu_read_unlock();
             break;
         }
+        rcu_read_unlock();
 
+        qemu_mutex_lock(&timer_list->active_timers_lock);
         /* remove timer from the list before calling the callback */
-        timer_list->active_timers = ts->next;
         ts->next = NULL;
         ts->expire_time = -1;
         cb = ts->cb;
         opaque = ts->opaque;
+        timer_list->active_timers = ts->next;
         qemu_mutex_unlock(&timer_list->active_timers_lock);
 
         /* run the callback (the timer list can be modified) */
+        rcu_read_lock();
         cb(opaque);
+        rcu_read_unlock();
         progress = true;
     }
     return progress;
@@ -477,6 +506,7 @@ int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg)
     return deadline;
 }
 
+/* caller holds an rcu read lock */
 int64_t qemu_clock_get_ns(QEMUClockType type)
 {
     int64_t now, last;

-- 
Mike Day | "Endurance is a Virtue"

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU
  2014-02-12 19:09 [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU Mike Day
@ 2014-02-13  9:11 ` Alex Bligh
  2014-02-13  9:25   ` Paolo Bonzini
  2014-02-15 10:23 ` Alex Bligh
  1 sibling, 1 reply; 10+ messages in thread
From: Alex Bligh @ 2014-02-13  9:11 UTC (permalink / raw)
  To: Mike Day; +Cc: pbonzini, qemu-devel, Alex Bligh, peter.maydell

Mike,

On 12 Feb 2014, at 19:09, Mike Day wrote:

> Allow readers to use RCU when reading Qemu timer lists. Applies to
> Paolo Bonzini's RCU branch, https://github.com/bonzini/qemu/tree/rcu.
> 
> This patch is for comment and review only.  The rcu branch needs to be
> rebased on upstream.

I'll certainly have a look through this. However before I do, what
problem is this trying to solve? Do we think there is possibility
of contention on the active timers lock? I used to think this was
taken (let alone contented) relatively infrequently, but Rob Herring's
recent email suggests to me the list is being modified in some
circumstances rather more frequently than I thought.

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU
  2014-02-13  9:11 ` Alex Bligh
@ 2014-02-13  9:25   ` Paolo Bonzini
  2014-02-13 12:06     ` Mike Day
  0 siblings, 1 reply; 10+ messages in thread
From: Paolo Bonzini @ 2014-02-13  9:25 UTC (permalink / raw)
  To: Alex Bligh, Mike Day; +Cc: peter.maydell, qemu-devel

Il 13/02/2014 10:11, Alex Bligh ha scritto:
> Mike,
>
> On 12 Feb 2014, at 19:09, Mike Day wrote:
>
>> Allow readers to use RCU when reading Qemu timer lists. Applies to
>> Paolo Bonzini's RCU branch, https://github.com/bonzini/qemu/tree/rcu.
>>
>> This patch is for comment and review only.  The rcu branch needs to be
>> rebased on upstream.
>
> I'll certainly have a look through this. However before I do, what
> problem is this trying to solve? Do we think there is possibility
> of contention on the active timers lock? I used to think this was
> taken (let alone contented) relatively infrequently, but Rob Herring's
> recent email suggests to me the list is being modified in some
> circumstances rather more frequently than I thought.

I think that, more than contention, it tries to reduce the cost of 
synchronization primitives, especially the locking and unlocking of the 
list around the invocation of timer callbacks.

Paolo

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU
  2014-02-13  9:25   ` Paolo Bonzini
@ 2014-02-13 12:06     ` Mike Day
  2014-02-13 12:57       ` Alex Bligh
  0 siblings, 1 reply; 10+ messages in thread
From: Mike Day @ 2014-02-13 12:06 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Peter Maydell, qemu-devel, Alex Bligh

On Thu, Feb 13, 2014 at 4:25 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 13/02/2014 10:11, Alex Bligh ha scritto:
>>
>> I'll certainly have a look through this. However before I do, what
>> problem is this trying to solve? Do we think there is possibility
>> of contention on the active timers lock? I used to think this was
>> taken (let alone contented) relatively infrequently, but Rob Herring's
>> recent email suggests to me the list is being modified in some
>> circumstances rather more frequently than I thought.
>
> I think that, more than contention, it tries to reduce the cost of
> synchronization primitives, especially the locking and unlocking of the list
> around the invocation of timer callbacks.

Yes, the assumption is that the active timers are a read-mostly list,
so rcu is a win.

Mike

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU
  2014-02-13 12:06     ` Mike Day
@ 2014-02-13 12:57       ` Alex Bligh
  0 siblings, 0 replies; 10+ messages in thread
From: Alex Bligh @ 2014-02-13 12:57 UTC (permalink / raw)
  To: Mike Day; +Cc: Paolo Bonzini, qemu-devel, Alex Bligh, Peter Maydell


On 13 Feb 2014, at 12:06, Mike Day wrote:

>> I think that, more than contention, it tries to reduce the cost of
>> synchronization primitives, especially the locking and unlocking of the list
>> around the invocation of timer callbacks.
> 
> Yes, the assumption is that the active timers are a read-mostly list,
> so rcu is a win.

Thanks - I'll have a look through.

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU
  2014-02-12 19:09 [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU Mike Day
  2014-02-13  9:11 ` Alex Bligh
@ 2014-02-15 10:23 ` Alex Bligh
  2014-02-15 20:33   ` Mike Day
  2014-02-17 16:13   ` Mike Day
  1 sibling, 2 replies; 10+ messages in thread
From: Alex Bligh @ 2014-02-15 10:23 UTC (permalink / raw)
  To: Mike Day; +Cc: pbonzini, qemu-devel, Alex Bligh, peter.maydell

Mike,

Some comments:

1. You seem to be removing the use of the active_timers_lock and replacing it by
   rcu (fine). However, you seem to have left the qemu_mutex_destroy in
   timerlist_free, and left the mutex in QEMUTimerList. Any reason why we need
   both?

2. You have introduced rcu not only to protect active_timers, the list of
   active timers within one timerlist, but also to protect (I think)
   the list of timerlists, as evidenced by the fact you have
   reclaim_timer_list as well as reclaim_timer. We currently have no
   locking in the creation/deletion of timerlists as these only happen on init
   or on when an AioContext is created/destroyed (as these are the only things
   that create or delete TimerListGroups); both these operations are
   required to happen within the main thread and are protected by the
   BQL. Indeed currently nothing every destroys an AioContext. I suspect
   if there is really a need to destroy timerlists in threads other than
   the main thread we are going to have more issues than locking of the list of
   timerlists. I suggest you remove these changes and solely look at protecting
   the list of active timers (as opposed to the list of timerlists) by RCU.

   I wrote something describing the new structure here:
     http://blog.alex.org.uk/2013/08/24/changes-to-qemus-timer-system/

   I suspect simplifying this such that you are *only* protecting the active
   timer list would make the patch far less intrusive.

3. If you are going to protect the list of timerlists as well, do you
   not need to at least take the rcu read lock in timerlist_new?

4. In timerlist_notify(), you are holding the rcu_read_lock across
   the callback or qemu_notify_event. Why is this necessary? EG can't you
   do something like:

    void timerlist_notify(QEMUTimerList *timer_list)
    {
         rcu_read_lock();
         if (atomic_rcu_read(&timer_list->notify_cb)) {
            QEMUTimerListNotifyCB *notify_cb = timer_list->notify_cb;
            void *notify_opaque = timer_list->notify_opaque;
            rcu_read_unlock();
            notify_cb(notify_opaque);
        } else {
            rcu_read_unlock();
            qemu_notify_event();
        }
    }

   as if the callback modifies the timer it will take its own rcu read lock.
   My concern here is that the qemu_notify_event stuff could in theory be
   quite long running. Perhaps we don't care about how often reclaim occurs
   though.

5. Similarly to the above, in qemu_clock_notify, you take the
   rcu_read_lock, and then do a QLIST_FOREACH_RCU. That will mean
   the rcu_read_lock is taken anyway in 99% of calls to timerlist_notify.
   This is going through the list of timerlists, and that list is in
   practice static (protected by the BQL). Similarly in
   qemu_clock_deadline_ns_all, timerlist_get_clock, etc.

6. In timerlist_expired, rcu_read_unlock() appears to get called
   at the end after the return statement (last line of the function).

7. May be my reading of the diff, but in timer_mod_ns it seems to be
   introducing two rcu_read_unlock()s at the end of the function
   (after 'Rearm if necessary') - should the first not be a
   rcu_read_lock()?

8. Again, may be my reading of the diff, but you appear to have
   introduced a rcu_read_lock() outside the loop, but then
   unlock it within the loop. If the loop iterates more than once,
   won't it do more unlocks than locks?

Alex




On 12 Feb 2014, at 19:09, Mike Day wrote:

> 
> Allow readers to use RCU when reading Qemu timer lists. Applies to
> Paolo Bonzini's RCU branch, https://github.com/bonzini/qemu/tree/rcu.
> 
> This patch is for comment and review only.  The rcu branch needs to be
> rebased on upstream.
> 
> Signed-off-by: Mike Day <ncmike@ncultra.org>
> ---
> include/qemu/timer.h |  9 +++++-
> qemu-timer.c         | 88 +++++++++++++++++++++++++++++++++++-----------------
> 2 files changed, 67 insertions(+), 30 deletions(-)
> 
> diff --git a/include/qemu/timer.h b/include/qemu/timer.h
> index b58903b..5aaa213 100644
> --- a/include/qemu/timer.h
> +++ b/include/qemu/timer.h
> @@ -4,7 +4,13 @@
> #include "qemu/typedefs.h"
> #include "qemu-common.h"
> #include "qemu/notify.h"
> -
> +#ifdef __GNUC__
> +#ifndef __ATOMIC_RELEASE
> +#define __ATOMIC_RELEASE
> +#endif
> +#endif
> +#include "qemu/atomic.h"
> +#include "qemu/rcu.h"
> /* timers */
> 
> #define SCALE_MS 1000000
> @@ -61,6 +67,7 @@ struct QEMUTimer {
>     void *opaque;
>     QEMUTimer *next;
>     int scale;
> +    struct rcu_head rcu;
> };
> 
> extern QEMUTimerListGroup main_loop_tlg;
> diff --git a/qemu-timer.c b/qemu-timer.c
> index 6b62e88..27285ca 100644
> --- a/qemu-timer.c
> +++ b/qemu-timer.c
> @@ -29,6 +29,7 @@
> #include "hw/hw.h"
> 
> #include "qemu/timer.h"
> +#include "qemu/rcu_queue.h"
> #ifdef CONFIG_POSIX
> #include <pthread.h>
> #endif
> @@ -49,7 +50,6 @@ typedef struct QEMUClock {
> 
>     NotifierList reset_notifiers;
>     int64_t last;
> -
>     QEMUClockType type;
>     bool enabled;
> } QEMUClock;
> @@ -71,6 +71,7 @@ struct QEMUTimerList {
>     QLIST_ENTRY(QEMUTimerList) list;
>     QEMUTimerListNotifyCB *notify_cb;
>     void *notify_opaque;
> +    struct rcu_head rcu;
> };
> 
> /**
> @@ -107,6 +108,12 @@ QEMUTimerList *timerlist_new(QEMUClockType type,
>     return timer_list;
> }
> 
> +static void reclaim_timer_list(struct rcu_head *rcu)
> +{
> +    QEMUTimerList *timer_list = container_of(rcu, QEMUTimerList, rcu);
> +    g_free(timer_list);
> +}
> +
> void timerlist_free(QEMUTimerList *timer_list)
> {
>     assert(!timerlist_has_timers(timer_list));
> @@ -114,7 +121,7 @@ void timerlist_free(QEMUTimerList *timer_list)
>         QLIST_REMOVE(timer_list, list);
>     }
>     qemu_mutex_destroy(&timer_list->active_timers_lock);
> -    g_free(timer_list);
> +    call_rcu1(&timer_list->rcu, reclaim_timer_list);
> }
> 
> static void qemu_clock_init(QEMUClockType type)
> @@ -138,9 +145,11 @@ void qemu_clock_notify(QEMUClockType type)
> {
>     QEMUTimerList *timer_list;
>     QEMUClock *clock = qemu_clock_ptr(type);
> -    QLIST_FOREACH(timer_list, &clock->timerlists, list) {
> +    rcu_read_lock();
> +    QLIST_FOREACH_RCU(timer_list, &clock->timerlists, list) {
>         timerlist_notify(timer_list);
>     }
> +    rcu_read_unlock();
> }
> 
> void qemu_clock_enable(QEMUClockType type, bool enabled)
> @@ -155,7 +164,7 @@ void qemu_clock_enable(QEMUClockType type, bool enabled)
> 
> bool timerlist_has_timers(QEMUTimerList *timer_list)
> {
> -    return !!timer_list->active_timers;
> +    return !!atomic_rcu_read(&timer_list->active_timers);
> }
> 
> bool qemu_clock_has_timers(QEMUClockType type)
> @@ -168,15 +177,15 @@ bool timerlist_expired(QEMUTimerList *timer_list)
> {
>     int64_t expire_time;
> 
> -    qemu_mutex_lock(&timer_list->active_timers_lock);
> -    if (!timer_list->active_timers) {
> -        qemu_mutex_unlock(&timer_list->active_timers_lock);
> +    rcu_read_lock();
> +    if (atomic_rcu_read(&timer_list->active_timers)) {
> +        rcu_read_unlock();
>         return false;
>     }
> -    expire_time = timer_list->active_timers->expire_time;
> -    qemu_mutex_unlock(&timer_list->active_timers_lock);
> 
> +    expire_time = timer_list->active_timers->expire_time;
>     return expire_time < qemu_clock_get_ns(timer_list->clock->type);
> +    rcu_read_unlock();
> }
> 
> bool qemu_clock_expired(QEMUClockType type)
> @@ -194,8 +203,10 @@ int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
> {
>     int64_t delta;
>     int64_t expire_time;
> -
> -    if (!timer_list->clock->enabled) {
> +    bool enabled;
> +    rcu_read_lock();
> +    enabled = atomic_rcu_read(&timer_list->clock->enabled);
> +    if (!enabled) {
>         return -1;
>     }
> 
> @@ -203,16 +214,13 @@ int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
>      * value but ->notify_cb() is called when the deadline changes.  Therefore
>      * the caller should notice the change and there is no race condition.
>      */
> -    qemu_mutex_lock(&timer_list->active_timers_lock);
>     if (!timer_list->active_timers) {
> -        qemu_mutex_unlock(&timer_list->active_timers_lock);
> +        rcu_read_unlock();
>         return -1;
>     }
>     expire_time = timer_list->active_timers->expire_time;
> -    qemu_mutex_unlock(&timer_list->active_timers_lock);
> -
>     delta = expire_time - qemu_clock_get_ns(timer_list->clock->type);
> -
> +    rcu_read_unlock();
>     if (delta <= 0) {
>         return 0;
>     }
> @@ -230,16 +238,22 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type)
>     int64_t deadline = -1;
>     QEMUTimerList *timer_list;
>     QEMUClock *clock = qemu_clock_ptr(type);
> -    QLIST_FOREACH(timer_list, &clock->timerlists, list) {
> +    rcu_read_lock();
> +    QLIST_FOREACH_RCU(timer_list, &clock->timerlists, list) {
>         deadline = qemu_soonest_timeout(deadline,
>                                         timerlist_deadline_ns(timer_list));
>     }
> +    rcu_read_unlock();
>     return deadline;
> }
> 
> QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list)
> {
> -    return timer_list->clock->type;
> +    QEMUClockType type;
> +    rcu_read_lock();
> +    type = atomic_rcu_read(&timer_list->clock->type);
> +    rcu_read_unlock();
> +    return type;
> }
> 
> QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type)
> @@ -249,11 +263,13 @@ QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type)
> 
> void timerlist_notify(QEMUTimerList *timer_list)
> {
> -    if (timer_list->notify_cb) {
> +    rcu_read_lock();
> +    if (atomic_rcu_read(&timer_list->notify_cb)) {
>         timer_list->notify_cb(timer_list->notify_opaque);
>     } else {
>         qemu_notify_event();
>     }
> +    rcu_read_unlock();
> }
> 
> /* Transition function to convert a nanosecond timeout to ms
> @@ -308,18 +324,24 @@ void timer_init(QEMUTimer *ts,
>                 QEMUTimerList *timer_list, int scale,
>                 QEMUTimerCB *cb, void *opaque)
> {
> -    ts->timer_list = timer_list;
>     ts->cb = cb;
>     ts->opaque = opaque;
>     ts->scale = scale;
>     ts->expire_time = -1;
> +    ts->timer_list = timer_list;
> }
> 
> -void timer_free(QEMUTimer *ts)
> +static void reclaim_timer(struct rcu_head *rcu)
> {
> +    QEMUTimer *ts = container_of(rcu, QEMUTimer, rcu);
>     g_free(ts);
> }
> 
> +void timer_free(QEMUTimer *ts)
> +{
> +    call_rcu1(&ts->rcu, reclaim_timer);
> +}
> +
> static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
> {
>     QEMUTimer **pt, *t;
> @@ -331,7 +353,7 @@ static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
>         if (!t)
>             break;
>         if (t == ts) {
> -            *pt = t->next;
> +            atomic_rcu_set(pt, t->next);
>             break;
>         }
>         pt = &t->next;
> @@ -369,15 +391,17 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
>     }
>     ts->expire_time = MAX(expire_time, 0);
>     ts->next = *pt;
> -    *pt = ts;
> +    atomic_rcu_set(pt, ts);
>     qemu_mutex_unlock(&timer_list->active_timers_lock);
> -
>     /* Rearm if necessary  */
> +    rcu_read_unlock();
>     if (pt == &timer_list->active_timers) {
>         /* Interrupt execution to force deadline recalculation.  */
>         qemu_clock_warp(timer_list->clock->type);
>         timerlist_notify(timer_list);
>     }
> +    rcu_read_unlock();
> +
> }
> 
> void timer_mod(QEMUTimer *ts, int64_t expire_time)
> @@ -402,30 +426,35 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
>     bool progress = false;
>     QEMUTimerCB *cb;
>     void *opaque;
> +    bool enabled;
> 
> -    if (!timer_list->clock->enabled) {
> +    enabled = atomic_rcu_read(&timer_list->clock->enabled);
> +    if (!enabled) {
>         return progress;
>     }
> -
> +    rcu_read_lock();
>     current_time = qemu_clock_get_ns(timer_list->clock->type);
>     for(;;) {
> -        qemu_mutex_lock(&timer_list->active_timers_lock);
>         ts = timer_list->active_timers;
>         if (!timer_expired_ns(ts, current_time)) {
> -            qemu_mutex_unlock(&timer_list->active_timers_lock);
> +            rcu_read_unlock();
>             break;
>         }
> +        rcu_read_unlock();
> 
> +        qemu_mutex_lock(&timer_list->active_timers_lock);
>         /* remove timer from the list before calling the callback */
> -        timer_list->active_timers = ts->next;
>         ts->next = NULL;
>         ts->expire_time = -1;
>         cb = ts->cb;
>         opaque = ts->opaque;
> +        timer_list->active_timers = ts->next;
>         qemu_mutex_unlock(&timer_list->active_timers_lock);
> 
>         /* run the callback (the timer list can be modified) */
> +        rcu_read_lock();
>         cb(opaque);
> +        rcu_read_unlock();
>         progress = true;
>     }
>     return progress;
> @@ -477,6 +506,7 @@ int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg)
>     return deadline;
> }
> 
> +/* caller holds an rcu read lock */
> int64_t qemu_clock_get_ns(QEMUClockType type)
> {
>     int64_t now, last;
> 
> -- 
> Mike Day | "Endurance is a Virtue"
> 
> 

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU
  2014-02-15 10:23 ` Alex Bligh
@ 2014-02-15 20:33   ` Mike Day
  2014-02-15 22:00     ` Alex Bligh
  2014-02-17 16:13   ` Mike Day
  1 sibling, 1 reply; 10+ messages in thread
From: Mike Day @ 2014-02-15 20:33 UTC (permalink / raw)
  To: Alex Bligh; +Cc: pbonzini, qemu-devel, peter.maydell


Alex Bligh <alex@alex.org.uk> writes:

> Some comments:
Thanks for the thorough review!
> 1. You seem to be removing the use of the active_timers_lock and replacing it by
>    rcu (fine). However, you seem to have left the qemu_mutex_destroy in
>    timerlist_free, and left the mutex in QEMUTimerList. Any reason why we need
>    both?
This was an oversight on my part - which I will correct.

> 2. You have introduced rcu not only to protect active_timers, the list of
>    active timers within one timerlist, but also to protect (I think)
>    the list of timerlists, as evidenced by the fact you have
>    reclaim_timer_list as well as reclaim_timer. We currently have no
>    locking in the creation/deletion of timerlists as these only happen on init
>    or on when an AioContext is created/destroyed (as these are the only things
>    that create or delete TimerListGroups); both these operations are
>    required to happen within the main thread and are protected by the
>    BQL. Indeed currently nothing every destroys an AioContext. I suspect
>    if there is really a need to destroy timerlists in threads other than
>    the main thread we are going to have more issues than locking of the list of
>    timerlists. I suggest you remove these changes and solely look at protecting
>    the list of active timers (as opposed to the list of timerlists) by RCU.

I thought about this a lot and and I'm glad you are bringing up the
timerlist here. As you said, I couldn't find anywhere in the code base
that manipulates this list. But I also noticed that the timer and clock
structures are accessed by unrelated code modules. I don't know enough
about the timer usage to predict how timers will be used by other code
modules so I decided to overprotect and see what you thought about it.

Removing the protection of the timerlist will vastly simplify this
patch. Eventually, though, when we remove the BQL, I'm assuming we will
need to protect the timerlist somehow. I'm happy now re-factoring toward
a much simpler patch that just protects the active timer list.

>
> 3. If you are going to protect the list of timerlists as well, do you
>    not need to at least take the rcu read lock in timerlist_new?

Thanks for catching this. It probably needs a proper mutex (which is the
BQL now as you stated).

> 4. In timerlist_notify(), you are holding the rcu_read_lock across
>    the callback or qemu_notify_event. Why is this necessary? EG can't you
>    do something like:
>
>     void timerlist_notify(QEMUTimerList *timer_list)
>     {
>          rcu_read_lock();
>          if (atomic_rcu_read(&timer_list->notify_cb)) {
>             QEMUTimerListNotifyCB *notify_cb = timer_list->notify_cb;
>             void *notify_opaque = timer_list->notify_opaque;
>             rcu_read_unlock();
>             notify_cb(notify_opaque);
>         } else {
>             rcu_read_unlock();
>             qemu_notify_event();
>         }
>     }

I thought a lot about this one and went back and forth a couple of
times, so I'm glad you are looking at it. My concern is that the timer
could be reclaimed during execution ofthe callback. And, the callback
may be referincing the timer itself. Even if the callback takes an
rcu_read_lock, there is a window between when the timer code leaves the
rcu critical section and calls the notification code, and when the
notification callback enters the rcu critical section via
rcu_read_lock. It may be possible though unlikely that a reclaim could
occur during this small window.

The downside, as far as I know, is that the notification callback will
take a long time to execute. This is not good, but it won't cause an
error by itself.

> 5. Similarly to the above, in qemu_clock_notify, you take the
>    rcu_read_lock, and then do a QLIST_FOREACH_RCU. That will mean
>    the rcu_read_lock is taken anyway in 99% of calls to timerlist_notify.
>    This is going through the list of timerlists, and that list is in
>    practice static (protected by the BQL). Similarly in
>    qemu_clock_deadline_ns_all, timerlist_get_clock, etc.

My thinking here is that rcu_read_lock can be called recursively with no
bad effect other than possibly taking up cache space. So when you have a
function like this that is usually called as an internal function but
occasionally called directly, take the rcu read lock in both
functions. I think this will be removed anyway with the timerlist
change.

> 6. In timerlist_expired, rcu_read_unlock() appears to get called
>    at the end after the return statement (last line of the function).
It does indeed, thanks. 

> 7. May be my reading of the diff, but in timer_mod_ns it seems to be
>    introducing two rcu_read_unlock()s at the end of the function
>    (after 'Rearm if necessary') - should the first not be a
>    rcu_read_lock()?
Yes, thank you.

> 8. Again, may be my reading of the diff, but you appear to have
>    introduced a rcu_read_lock() outside the loop, but then
>    unlock it within the loop. If the loop iterates more than once,
>    won't it do more unlocks than locks?

Yes, I think the right fix is to omit the last rcu_read_unlock.

-- 
Mike Day | "Endurance is a Virtue"

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU
  2014-02-15 20:33   ` Mike Day
@ 2014-02-15 22:00     ` Alex Bligh
  0 siblings, 0 replies; 10+ messages in thread
From: Alex Bligh @ 2014-02-15 22:00 UTC (permalink / raw)
  To: Mike Day; +Cc: pbonzini, qemu-devel, Alex Bligh, peter.maydell

Mike,

On 15 Feb 2014, at 20:33, Mike Day wrote:
>> 
>> 2. You have introduced rcu not only to protect active_timers, the list of
>>   active timers within one timerlist, but also to protect (I think)
>>   the list of timerlists, as evidenced by the fact you have
>>   reclaim_timer_list as well as reclaim_timer. We currently have no
>>   locking in the creation/deletion of timerlists as these only happen on init
>>   or on when an AioContext is created/destroyed (as these are the only things
>>   that create or delete TimerListGroups); both these operations are
>>   required to happen within the main thread and are protected by the
>>   BQL. Indeed currently nothing every destroys an AioContext. I suspect
>>   if there is really a need to destroy timerlists in threads other than
>>   the main thread we are going to have more issues than locking of the list of
>>   timerlists. I suggest you remove these changes and solely look at protecting
>>   the list of active timers (as opposed to the list of timerlists) by RCU.
> 
> I thought about this a lot and and I'm glad you are bringing up the
> timerlist here. As you said, I couldn't find anywhere in the code base
> that manipulates this list. But I also noticed that the timer and clock
> structures are accessed by unrelated code modules. I don't know enough
> about the timer usage to predict how timers will be used by other code
> modules so I decided to overprotect and see what you thought about it.
> 
> Removing the protection of the timerlist will vastly simplify this
> patch. Eventually, though, when we remove the BQL, I'm assuming we will
> need to protect the timerlist somehow. I'm happy now re-factoring toward
> a much simpler patch that just protects the active timer list.

I think you mean "protect the list of timerlists somehow". I agree.

I suppose RCU would ultimately be better than a mutex as it's the
archetypal read lots write almost never list. I suppose I somewhat
grudgingly see the point! If you want to do that this time round, I'd
suggest you break it into two patches.

>> 4. In timerlist_notify(), you are holding the rcu_read_lock across
>>   the callback or qemu_notify_event. Why is this necessary? EG can't you
>>   do something like:
>> 
>>    void timerlist_notify(QEMUTimerList *timer_list)
>>    {
>>         rcu_read_lock();
>>         if (atomic_rcu_read(&timer_list->notify_cb)) {
>>            QEMUTimerListNotifyCB *notify_cb = timer_list->notify_cb;
>>            void *notify_opaque = timer_list->notify_opaque;
>>            rcu_read_unlock();
>>            notify_cb(notify_opaque);
>>        } else {
>>            rcu_read_unlock();
>>            qemu_notify_event();
>>        }
>>    }
> 
> I thought a lot about this one and went back and forth a couple of
> times, so I'm glad you are looking at it. My concern is that the timer
> could be reclaimed during execution ofthe callback. And, the callback
> may be referincing the timer itself. Even if the callback takes an
> rcu_read_lock, there is a window between when the timer code leaves the
> rcu critical section and calls the notification code, and when the
> notification callback enters the rcu critical section via
> rcu_read_lock. It may be possible though unlikely that a reclaim could
> occur during this small window.

Fair enough.

> The downside, as far as I know, is that the notification callback will
> take a long time to execute. This is not good, but it won't cause an
> error by itself.

Agree.

>> 5. Similarly to the above, in qemu_clock_notify, you take the
>>   rcu_read_lock, and then do a QLIST_FOREACH_RCU. That will mean
>>   the rcu_read_lock is taken anyway in 99% of calls to timerlist_notify.
>>   This is going through the list of timerlists, and that list is in
>>   practice static (protected by the BQL). Similarly in
>>   qemu_clock_deadline_ns_all, timerlist_get_clock, etc.
> 
> My thinking here is that rcu_read_lock can be called recursively with no
> bad effect other than possibly taking up cache space. So when you have a
> function like this that is usually called as an internal function but
> occasionally called directly, take the rcu read lock in both
> functions. I think this will be removed anyway with the timerlist
> change.

It will. But I suppose it isn't a great concern.

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU
  2014-02-15 10:23 ` Alex Bligh
  2014-02-15 20:33   ` Mike Day
@ 2014-02-17 16:13   ` Mike Day
  2014-02-17 16:37     ` Alex Bligh
  1 sibling, 1 reply; 10+ messages in thread
From: Mike Day @ 2014-02-17 16:13 UTC (permalink / raw)
  To: Alex Bligh; +Cc: Paolo Bonzini, qemu-devel, Peter Maydell

> 1. You seem to be removing the use of the active_timers_lock and replacing it by
>    rcu (fine). However, you seem to have left the qemu_mutex_destroy in
>    timerlist_free, and left the mutex in QEMUTimerList. Any reason why we need  both?
>

I responded incorrectly to this yesterday. We still need the mutex
here (active_timers_lock) to provide synchronization for list updates.
The difference now is that we don't need to hold the mutex for
traversing the list. But to update the list we still need to hold the
mutex.

Mike

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU
  2014-02-17 16:13   ` Mike Day
@ 2014-02-17 16:37     ` Alex Bligh
  0 siblings, 0 replies; 10+ messages in thread
From: Alex Bligh @ 2014-02-17 16:37 UTC (permalink / raw)
  To: Mike Day; +Cc: Paolo Bonzini, qemu-devel, Alex Bligh, Peter Maydell


On 17 Feb 2014, at 16:13, Mike Day wrote:

>> 1. You seem to be removing the use of the active_timers_lock and replacing it by
>>   rcu (fine). However, you seem to have left the qemu_mutex_destroy in
>>   timerlist_free, and left the mutex in QEMUTimerList. Any reason why we need  both?
>> 
> 
> I responded incorrectly to this yesterday. We still need the mutex
> here (active_timers_lock) to provide synchronization for list updates.
> The difference now is that we don't need to hold the mutex for
> traversing the list. But to update the list we still need to hold the
> mutex.

Of course, my bad.

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2014-02-17 16:37 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-02-12 19:09 [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU Mike Day
2014-02-13  9:11 ` Alex Bligh
2014-02-13  9:25   ` Paolo Bonzini
2014-02-13 12:06     ` Mike Day
2014-02-13 12:57       ` Alex Bligh
2014-02-15 10:23 ` Alex Bligh
2014-02-15 20:33   ` Mike Day
2014-02-15 22:00     ` Alex Bligh
2014-02-17 16:13   ` Mike Day
2014-02-17 16:37     ` Alex Bligh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.