All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jeff Cody <jcody@redhat.com>
To: qemu-devel@nongnu.org
Cc: qemu-block@nongnu.org, mreitz@redhat.com, stefanha@redhat.com,
	famz@redhat.com, pbonzini@redhat.com, kwolf@redhat.com
Subject: [Qemu-devel] [PATCH v2 for-2.11 2/4] coroutine: abort if we try to schedule or enter a pending coroutine
Date: Mon, 20 Nov 2017 21:23:24 -0500	[thread overview]
Message-ID: <1f0fd95c2096688add2c7b3cfcd7016756ef19fb.1511230683.git.jcody@redhat.com> (raw)
In-Reply-To: <cover.1511230683.git.jcody@redhat.com>
In-Reply-To: <cover.1511230683.git.jcody@redhat.com>

The previous patch fixed a race condition, in which there were
coroutines being executing doubly, or after coroutine deletion.

We can detect common scenarios when this happens, and print an error
message and abort before we corrupt memory / data, or segfault.

This patch will abort if an attempt to enter a coroutine is made while
it is currently pending execution, either in a specific AioContext bh,
or pending execution via a timer.  It will also abort if a coroutine
is scheduled, before a prior scheduled run has occured.

We cannot rely on the existing co->caller check for recursive re-entry
to catch this, as the coroutine may run and exit with
COROUTINE_TERMINATE before the scheduled coroutine executes.

(This is the scenario that was occuring and fixed in the previous
patch).

Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 include/qemu/coroutine_int.h |  6 ++++++
 util/async.c                 | 11 +++++++++++
 util/qemu-coroutine-sleep.c  | 11 +++++++++++
 util/qemu-coroutine.c        | 11 +++++++++++
 4 files changed, 39 insertions(+)

diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
index cb98892..56e4c48 100644
--- a/include/qemu/coroutine_int.h
+++ b/include/qemu/coroutine_int.h
@@ -53,6 +53,12 @@ struct Coroutine {
 
     /* Only used when the coroutine has yielded.  */
     AioContext *ctx;
+
+    /* Used to catch and abort on illegal co-routine entry.
+     * Will contain the name of the function that had first
+     * scheduled the coroutine. */
+    const char *scheduled;
+
     QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
     QSLIST_ENTRY(Coroutine) co_scheduled_next;
 };
diff --git a/util/async.c b/util/async.c
index 0e1bd87..49174b3 100644
--- a/util/async.c
+++ b/util/async.c
@@ -388,6 +388,7 @@ static void co_schedule_bh_cb(void *opaque)
         QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
         trace_aio_co_schedule_bh_cb(ctx, co);
         aio_context_acquire(ctx);
+        atomic_set(&co->scheduled, NULL);
         qemu_coroutine_enter(co);
         aio_context_release(ctx);
     }
@@ -438,6 +439,16 @@ fail:
 void aio_co_schedule(AioContext *ctx, Coroutine *co)
 {
     trace_aio_co_schedule(ctx, co);
+    const char *scheduled = atomic_read(&co->scheduled);
+
+    if (scheduled) {
+        fprintf(stderr,
+                "%s: Co-routine was already scheduled in '%s'\n",
+                __func__, scheduled);
+        abort();
+    }
+    atomic_set(&co->scheduled, __func__);
+
     QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
                               co, co_scheduled_next);
     qemu_bh_schedule(ctx->co_schedule_bh);
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
index 9c56550..38dc4c8 100644
--- a/util/qemu-coroutine-sleep.c
+++ b/util/qemu-coroutine-sleep.c
@@ -13,6 +13,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
 #include "qemu/timer.h"
 #include "block/aio.h"
 
@@ -25,6 +26,7 @@ static void co_sleep_cb(void *opaque)
 {
     CoSleepCB *sleep_cb = opaque;
 
+    atomic_set(&sleep_cb->co->scheduled, NULL);
     aio_co_wake(sleep_cb->co);
 }
 
@@ -34,6 +36,15 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
     CoSleepCB sleep_cb = {
         .co = qemu_coroutine_self(),
     };
+    const char *scheduled = atomic_read(&sleep_cb.co->scheduled);
+
+    if (scheduled) {
+        fprintf(stderr,
+                "%s: Co-routine was already scheduled in '%s'\n",
+                __func__, scheduled);
+        abort();
+    }
+    atomic_set(&sleep_cb.co->scheduled, __func__);
     sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb);
     timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
     qemu_coroutine_yield();
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index d6095c1..fbfd0ad 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -106,9 +106,20 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
 {
     Coroutine *self = qemu_coroutine_self();
     CoroutineAction ret;
+    const char *scheduled = atomic_read(&co->scheduled);
 
     trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg);
 
+    /* if the Coroutine has already been scheduled, entering it again will
+     * cause us to enter it twice, potentially even after the coroutine has
+     * been deleted */
+    if (scheduled) {
+        fprintf(stderr,
+                "%s: Co-routine was already scheduled in '%s'\n",
+                __func__, scheduled);
+        abort();
+    }
+
     if (co->caller) {
         fprintf(stderr, "Co-routine re-entered recursively\n");
         abort();
-- 
2.9.5

  parent reply	other threads:[~2017-11-21  2:23 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-21  2:23 [Qemu-devel] [PATCH v2 for-2.11 0/4] Fix segfault in blockjob race condition Jeff Cody
2017-11-21  2:23 ` [Qemu-devel] [PATCH v2 for-2.11 1/4] blockjob: do not allow coroutine double entry or entry-after-completion Jeff Cody
2017-11-21 10:49   ` Stefan Hajnoczi
2017-11-21 13:12     ` Paolo Bonzini
2017-11-21 13:26       ` Jeff Cody
2017-11-21  2:23 ` Jeff Cody [this message]
2017-11-21 10:59   ` [Qemu-devel] [PATCH v2 for-2.11 2/4] coroutine: abort if we try to schedule or enter a pending coroutine Stefan Hajnoczi
2017-11-21 13:11     ` Paolo Bonzini
2017-11-21 12:20   ` Eric Blake
2017-11-21 13:47   ` Kevin Wolf
2017-11-21 15:11     ` Paolo Bonzini
2017-11-21  2:23 ` [Qemu-devel] [PATCH v2 for-2.11 3/4] qemu-iotests: add option in common.qemu for mismatch only Jeff Cody
2017-11-21  2:23 ` [Qemu-devel] [PATCH v2 for-2.11 4/4] qemu-iotest: add test for blockjob coroutine race condition Jeff Cody

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1f0fd95c2096688add2c7b3cfcd7016756ef19fb.1511230683.git.jcody@redhat.com \
    --to=jcody@redhat.com \
    --cc=famz@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.