All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] mirror: Fix hang (operation waiting for itself)
@ 2020-01-28 15:17 Kevin Wolf
  2020-01-28 15:17 ` [PATCH 1/2] mirror: Store MirrorOp.co for debuggability Kevin Wolf
  2020-01-28 15:17 ` [PATCH 2/2] mirror: Don't let an operation wait for itself Kevin Wolf
  0 siblings, 2 replies; 5+ messages in thread
From: Kevin Wolf @ 2020-01-28 15:17 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, jsnow, qemu-devel, mreitz

Kevin Wolf (2):
  mirror: Store MirrorOp.co for debuggability
  mirror: Don't let an operation wait for itself

 block/mirror.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

-- 
2.20.1



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] mirror: Store MirrorOp.co for debuggability
  2020-01-28 15:17 [PATCH 0/2] mirror: Fix hang (operation waiting for itself) Kevin Wolf
@ 2020-01-28 15:17 ` Kevin Wolf
  2020-01-28 16:26   ` Eric Blake
  2020-01-28 15:17 ` [PATCH 2/2] mirror: Don't let an operation wait for itself Kevin Wolf
  1 sibling, 1 reply; 5+ messages in thread
From: Kevin Wolf @ 2020-01-28 15:17 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, jsnow, qemu-devel, mreitz

If a coroutine is launched, but the coroutine pointer isn't stored
anywhere, debugging any problems inside the coroutine is quite hard.
Let's store the coroutine pointer of a mirror operation in MirrorOp to
have it available in the debugger.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/mirror.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block/mirror.c b/block/mirror.c
index f0f2d9dff1..8959e4255f 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -103,6 +103,7 @@ struct MirrorOp {
     bool is_pseudo_op;
     bool is_active_write;
     CoQueue waiting_requests;
+    Coroutine *co;
 
     QTAILQ_ENTRY(MirrorOp) next;
 };
@@ -429,6 +430,7 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
     default:
         abort();
     }
+    op->co = co;
 
     QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next);
     qemu_coroutine_enter(co);
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] mirror: Don't let an operation wait for itself
  2020-01-28 15:17 [PATCH 0/2] mirror: Fix hang (operation waiting for itself) Kevin Wolf
  2020-01-28 15:17 ` [PATCH 1/2] mirror: Store MirrorOp.co for debuggability Kevin Wolf
@ 2020-01-28 15:17 ` Kevin Wolf
  2020-01-28 16:27   ` Eric Blake
  1 sibling, 1 reply; 5+ messages in thread
From: Kevin Wolf @ 2020-01-28 15:17 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, jsnow, qemu-devel, mreitz

mirror_wait_for_free_in_flight_slot() just picks a random operation to
wait for. However, when mirror_co_read() waits for free slots, its
MirrorOp is already in s->ops_in_flight, so if not enough slots are
immediately available, an operation can end up waiting for itself to
complete, which results in a hang.

Fix this by passing the current MirrorOp and skipping this operation
when picking an operation to wait for.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/mirror.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 8959e4255f..cacbc70014 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -283,11 +283,14 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
 }
 
 static inline void coroutine_fn
-mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
+mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active)
 {
     MirrorOp *op;
 
     QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
+        if (self == op) {
+            continue;
+        }
         /* Do not wait on pseudo ops, because it may in turn wait on
          * some other operation to start, which may in fact be the
          * caller of this function.  Since there is only one pseudo op
@@ -302,10 +305,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
 }
 
 static inline void coroutine_fn
-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
+mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self)
 {
     /* Only non-active operations use up in-flight slots */
-    mirror_wait_for_any_operation(s, false);
+    mirror_wait_for_any_operation(s, self, false);
 }
 
 /* Perform a mirror copy operation.
@@ -348,7 +351,7 @@ static void coroutine_fn mirror_co_read(void *opaque)
 
     while (s->buf_free_count < nb_chunks) {
         trace_mirror_yield_in_flight(s, op->offset, s->in_flight);
-        mirror_wait_for_free_in_flight_slot(s);
+        mirror_wait_for_free_in_flight_slot(s, op);
     }
 
     /* Now make a QEMUIOVector taking enough granularity-sized chunks
@@ -555,7 +558,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 
         while (s->in_flight >= MAX_IN_FLIGHT) {
             trace_mirror_yield_in_flight(s, offset, s->in_flight);
-            mirror_wait_for_free_in_flight_slot(s);
+            mirror_wait_for_free_in_flight_slot(s, pseudo_op);
         }
 
         if (s->ret < 0) {
@@ -609,7 +612,7 @@ static void mirror_free_init(MirrorBlockJob *s)
 static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s)
 {
     while (s->in_flight > 0) {
-        mirror_wait_for_free_in_flight_slot(s);
+        mirror_wait_for_free_in_flight_slot(s, NULL);
     }
 }
 
@@ -794,7 +797,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
             if (s->in_flight >= MAX_IN_FLIGHT) {
                 trace_mirror_yield(s, UINT64_MAX, s->buf_free_count,
                                    s->in_flight);
-                mirror_wait_for_free_in_flight_slot(s);
+                mirror_wait_for_free_in_flight_slot(s, NULL);
                 continue;
             }
 
@@ -947,7 +950,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
         /* Do not start passive operations while there are active
          * writes in progress */
         while (s->in_active_write_counter) {
-            mirror_wait_for_any_operation(s, true);
+            mirror_wait_for_any_operation(s, NULL, true);
         }
 
         if (s->ret < 0) {
@@ -973,7 +976,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
             if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                 (cnt == 0 && s->in_flight > 0)) {
                 trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
-                mirror_wait_for_free_in_flight_slot(s);
+                mirror_wait_for_free_in_flight_slot(s, NULL);
                 continue;
             } else if (cnt != 0) {
                 delay_ns = mirror_iteration(s);
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] mirror: Store MirrorOp.co for debuggability
  2020-01-28 15:17 ` [PATCH 1/2] mirror: Store MirrorOp.co for debuggability Kevin Wolf
@ 2020-01-28 16:26   ` Eric Blake
  0 siblings, 0 replies; 5+ messages in thread
From: Eric Blake @ 2020-01-28 16:26 UTC (permalink / raw)
  To: Kevin Wolf, qemu-block; +Cc: jsnow, qemu-devel, mreitz

On 1/28/20 9:17 AM, Kevin Wolf wrote:
> If a coroutine is launched, but the coroutine pointer isn't stored
> anywhere, debugging any problems inside the coroutine is quite hard.
> Let's store the coroutine pointer of a mirror operation in MirrorOp to
> have it available in the debugger.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>   block/mirror.c | 2 ++
>   1 file changed, 2 insertions(+)

Doesn't change semantics other than an unobservable performance delay; 
but the rewards in improved debugability warrant it.

Reviewed-by: Eric Blake <eblake@redhat.com>

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3226
Virtualization:  qemu.org | libvirt.org



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] mirror: Don't let an operation wait for itself
  2020-01-28 15:17 ` [PATCH 2/2] mirror: Don't let an operation wait for itself Kevin Wolf
@ 2020-01-28 16:27   ` Eric Blake
  0 siblings, 0 replies; 5+ messages in thread
From: Eric Blake @ 2020-01-28 16:27 UTC (permalink / raw)
  To: Kevin Wolf, qemu-block; +Cc: jsnow, qemu-devel, mreitz

On 1/28/20 9:17 AM, Kevin Wolf wrote:
> mirror_wait_for_free_in_flight_slot() just picks a random operation to
> wait for. However, when mirror_co_read() waits for free slots, its
> MirrorOp is already in s->ops_in_flight, so if not enough slots are
> immediately available, an operation can end up waiting for itself to
> complete, which results in a hang.

Eww. That can't have been fun to debug.

> 
> Fix this by passing the current MirrorOp and skipping this operation
> when picking an operation to wait for.
> 
> Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>   block/mirror.c | 21 ++++++++++++---------
>   1 file changed, 12 insertions(+), 9 deletions(-)
> 
> diff --git a/block/mirror.c b/block/mirror.c
> index 8959e4255f..cacbc70014 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -283,11 +283,14 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
>   }
>   
>   static inline void coroutine_fn
> -mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
> +mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active)
>   {
>       MirrorOp *op;
>   
>       QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
> +        if (self == op) {
> +            continue;
> +        }

But this is the obvious fix.

Reviewed-by: Eric Blake <eblake@redhat.com>

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3226
Virtualization:  qemu.org | libvirt.org



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-01-28 16:29 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-28 15:17 [PATCH 0/2] mirror: Fix hang (operation waiting for itself) Kevin Wolf
2020-01-28 15:17 ` [PATCH 1/2] mirror: Store MirrorOp.co for debuggability Kevin Wolf
2020-01-28 16:26   ` Eric Blake
2020-01-28 15:17 ` [PATCH 2/2] mirror: Don't let an operation wait for itself Kevin Wolf
2020-01-28 16:27   ` Eric Blake

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.