All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kevin Wolf <kwolf@redhat.com>
To: qemu-block@nongnu.org
Cc: kwolf@redhat.com, mreitz@redhat.com, pbonzini@redhat.com,
	famz@redhat.com, stefanha@redhat.com, eblake@redhat.com,
	qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH v2 07/20] block: Really pause block jobs on drain
Date: Tue, 29 May 2018 19:21:43 +0200	[thread overview]
Message-ID: <20180529172156.29311-8-kwolf@redhat.com> (raw)
In-Reply-To: <20180529172156.29311-1-kwolf@redhat.com>

We already requested that block jobs be paused in .bdrv_drained_begin,
but no guarantee was made that the job was actually inactive at the
point where bdrv_drained_begin() returned.

This introduces a new callback BdrvChildRole.bdrv_drained_poll() and
uses it to make bdrv_drain_poll() consider block jobs using the node to
be drained.

For the test case to work as expected, we have to switch from
block_job_sleep_ns() to qemu_co_sleep_ns() so that the test job is even
considered active and must be waited for when draining the node.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 include/block/block.h        |  8 ++++++++
 include/block/block_int.h    |  7 +++++++
 include/block/blockjob_int.h |  8 ++++++++
 block.c                      |  9 +++++++++
 block/io.c                   | 40 ++++++++++++++++++++++++++++++++++------
 block/mirror.c               |  8 ++++++++
 blockjob.c                   | 23 +++++++++++++++++++++++
 tests/test-bdrv-drain.c      | 18 ++++++++++--------
 8 files changed, 107 insertions(+), 14 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 3894edda9d..52f07da3d3 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -566,6 +566,14 @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore);
 void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
 
 /**
+ * bdrv_drain_poll:
+ *
+ * Poll for pending requests in @bs and its parents (except for
+ * @ignore_parent). This is part of bdrv_drained_begin.
+ */
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent);
+
+/**
  * bdrv_drained_begin:
  *
  * Begin a quiesced section for exclusive access to the BDS, by disabling
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 6c0927bce3..feba86888e 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -573,6 +573,13 @@ struct BdrvChildRole {
     void (*drained_begin)(BdrvChild *child);
     void (*drained_end)(BdrvChild *child);
 
+    /*
+     * Returns whether the parent has pending requests for the child. This
+     * callback is polled after .drained_begin() has been called until all
+     * activity on the child has stopped.
+     */
+    bool (*drained_poll)(BdrvChild *child);
+
     /* Notifies the parent that the child has been activated/inactivated (e.g.
      * when migration is completing) and it can start/stop requesting
      * permissions and doing I/O on it. */
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index 5cd50c6639..e4a318dd15 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -39,6 +39,14 @@ struct BlockJobDriver {
     JobDriver job_driver;
 
     /*
+     * Returns whether the job has pending requests for the child or will
+     * submit new requests before the next pause point. This callback is polled
+     * in the context of draining a job node after requesting that the job be
+     * paused, until all activity on the child has stopped.
+     */
+    bool (*drained_poll)(BlockJob *job);
+
+    /*
      * If the callback is not NULL, it will be invoked before the job is
      * resumed in a new AioContext.  This is the place to move any resources
      * besides job->blk to the new AioContext.
diff --git a/block.c b/block.c
index 501b64c819..3c654a8a63 100644
--- a/block.c
+++ b/block.c
@@ -820,6 +820,12 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child)
     bdrv_drained_begin(bs);
 }
 
+static bool bdrv_child_cb_drained_poll(BdrvChild *child)
+{
+    BlockDriverState *bs = child->opaque;
+    return bdrv_drain_poll(bs, NULL);
+}
+
 static void bdrv_child_cb_drained_end(BdrvChild *child)
 {
     BlockDriverState *bs = child->opaque;
@@ -904,6 +910,7 @@ const BdrvChildRole child_file = {
     .get_parent_desc = bdrv_child_get_parent_desc,
     .inherit_options = bdrv_inherited_options,
     .drained_begin   = bdrv_child_cb_drained_begin,
+    .drained_poll    = bdrv_child_cb_drained_poll,
     .drained_end     = bdrv_child_cb_drained_end,
     .attach          = bdrv_child_cb_attach,
     .detach          = bdrv_child_cb_detach,
@@ -928,6 +935,7 @@ const BdrvChildRole child_format = {
     .get_parent_desc = bdrv_child_get_parent_desc,
     .inherit_options = bdrv_inherited_fmt_options,
     .drained_begin   = bdrv_child_cb_drained_begin,
+    .drained_poll    = bdrv_child_cb_drained_poll,
     .drained_end     = bdrv_child_cb_drained_end,
     .attach          = bdrv_child_cb_attach,
     .detach          = bdrv_child_cb_detach,
@@ -1047,6 +1055,7 @@ const BdrvChildRole child_backing = {
     .detach          = bdrv_backing_detach,
     .inherit_options = bdrv_backing_options,
     .drained_begin   = bdrv_child_cb_drained_begin,
+    .drained_poll    = bdrv_child_cb_drained_poll,
     .drained_end     = bdrv_child_cb_drained_end,
     .inactivate      = bdrv_child_cb_inactivate,
     .update_filename = bdrv_backing_update_filename,
diff --git a/block/io.c b/block/io.c
index aa973f1d4a..b76ef92009 100644
--- a/block/io.c
+++ b/block/io.c
@@ -69,6 +69,23 @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
     }
 }
 
+static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore)
+{
+    BdrvChild *c, *next;
+    bool busy = false;
+
+    QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
+        if (c == ignore) {
+            continue;
+        }
+        if (c->role->drained_poll) {
+            busy |= c->role->drained_poll(c);
+        }
+    }
+
+    return busy;
+}
+
 static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
 {
     dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
@@ -183,21 +200,32 @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
 }
 
 /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
-static bool bdrv_drain_poll(BlockDriverState *bs)
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent)
+{
+    if (bdrv_parent_drained_poll(bs, ignore_parent)) {
+        return true;
+    }
+
+    return atomic_read(&bs->in_flight);
+}
+
+static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
+                                      BdrvChild *ignore_parent)
 {
     /* Execute pending BHs first and check everything else only after the BHs
      * have executed. */
     while (aio_poll(bs->aio_context, false));
-    return atomic_read(&bs->in_flight);
+
+    return bdrv_drain_poll(bs, ignore_parent);
 }
 
-static bool bdrv_drain_recurse(BlockDriverState *bs)
+static bool bdrv_drain_recurse(BlockDriverState *bs, BdrvChild *parent)
 {
     BdrvChild *child, *tmp;
     bool waited;
 
     /* Wait for drained requests to finish */
-    waited = BDRV_POLL_WHILE(bs, bdrv_drain_poll(bs));
+    waited = BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
 
     QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
         BlockDriverState *bs = child->bs;
@@ -214,7 +242,7 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
              */
             bdrv_ref(bs);
         }
-        waited |= bdrv_drain_recurse(bs);
+        waited |= bdrv_drain_recurse(bs, child);
         if (in_main_loop) {
             bdrv_unref(bs);
         }
@@ -290,7 +318,7 @@ void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
 
     bdrv_parent_drained_begin(bs, parent);
     bdrv_drain_invoke(bs, true);
-    bdrv_drain_recurse(bs);
+    bdrv_drain_recurse(bs, parent);
 
     if (recursive) {
         bs->recursive_quiesce_counter++;
diff --git a/block/mirror.c b/block/mirror.c
index dcb66ec3be..fb1d686ff6 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -964,6 +964,12 @@ static void mirror_pause(Job *job)
     mirror_wait_for_all_io(s);
 }
 
+static bool mirror_drained_poll(BlockJob *job)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+    return !!s->in_flight;
+}
+
 static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context)
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
@@ -997,6 +1003,7 @@ static const BlockJobDriver mirror_job_driver = {
         .pause                  = mirror_pause,
         .complete               = mirror_complete,
     },
+    .drained_poll           = mirror_drained_poll,
     .attached_aio_context   = mirror_attached_aio_context,
     .drain                  = mirror_drain,
 };
@@ -1012,6 +1019,7 @@ static const BlockJobDriver commit_active_job_driver = {
         .pause                  = mirror_pause,
         .complete               = mirror_complete,
     },
+    .drained_poll           = mirror_drained_poll,
     .attached_aio_context   = mirror_attached_aio_context,
     .drain                  = mirror_drain,
 };
diff --git a/blockjob.c b/blockjob.c
index 0306533a2e..be5903aa96 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -155,6 +155,28 @@ static void child_job_drained_begin(BdrvChild *c)
     job_pause(&job->job);
 }
 
+static bool child_job_drained_poll(BdrvChild *c)
+{
+    BlockJob *bjob = c->opaque;
+    Job *job = &bjob->job;
+    const BlockJobDriver *drv = block_job_driver(bjob);
+
+    /* An inactive or completed job doesn't have any pending requests. Jobs
+     * with !job->busy are either already paused or have a pause point after
+     * being reentered, so no job driver code will run before they pause. */
+    if (!job->busy || job_is_completed(job) || job->deferred_to_main_loop) {
+        return false;
+    }
+
+    /* Otherwise, assume that it isn't fully stopped yet, but allow the job to
+     * override this assumption. */
+    if (drv->drained_poll) {
+        return drv->drained_poll(bjob);
+    } else {
+        return true;
+    }
+}
+
 static void child_job_drained_end(BdrvChild *c)
 {
     BlockJob *job = c->opaque;
@@ -164,6 +186,7 @@ static void child_job_drained_end(BdrvChild *c)
 static const BdrvChildRole child_job = {
     .get_parent_desc    = child_job_get_parent_desc,
     .drained_begin      = child_job_drained_begin,
+    .drained_poll       = child_job_drained_poll,
     .drained_end        = child_job_drained_end,
     .stay_at_node       = true,
 };
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index 17127e63e4..756e7a1a7f 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -686,7 +686,11 @@ static void coroutine_fn test_job_start(void *opaque)
 
     job_transition_to_ready(&s->common.job);
     while (!s->should_complete) {
-        job_sleep_ns(&s->common.job, 100000);
+        /* Avoid block_job_sleep_ns() because it marks the job as !busy. We
+         * want to emulate some actual activity (probably some I/O) here so
+         * that drain has to wait for this acitivity to stop. */
+        qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
+        job_pause_point(&s->common.job);
     }
 
     job_defer_to_main_loop(&s->common.job, test_job_completed, NULL);
@@ -733,7 +737,7 @@ static void test_blockjob_common(enum drain_type drain_type)
 
     g_assert_cmpint(job->job.pause_count, ==, 0);
     g_assert_false(job->job.paused);
-    g_assert_false(job->job.busy); /* We're in job_sleep_ns() */
+    g_assert_true(job->job.busy); /* We're in job_sleep_ns() */
 
     do_drain_begin(drain_type, src);
 
@@ -743,15 +747,14 @@ static void test_blockjob_common(enum drain_type drain_type)
     } else {
         g_assert_cmpint(job->job.pause_count, ==, 1);
     }
-    /* XXX We don't wait until the job is actually paused. Is this okay? */
-    /* g_assert_true(job->job.paused); */
+    g_assert_true(job->job.paused);
     g_assert_false(job->job.busy); /* The job is paused */
 
     do_drain_end(drain_type, src);
 
     g_assert_cmpint(job->job.pause_count, ==, 0);
     g_assert_false(job->job.paused);
-    g_assert_false(job->job.busy); /* We're in job_sleep_ns() */
+    g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
 
     do_drain_begin(drain_type, target);
 
@@ -761,15 +764,14 @@ static void test_blockjob_common(enum drain_type drain_type)
     } else {
         g_assert_cmpint(job->job.pause_count, ==, 1);
     }
-    /* XXX We don't wait until the job is actually paused. Is this okay? */
-    /* g_assert_true(job->job.paused); */
+    g_assert_true(job->job.paused);
     g_assert_false(job->job.busy); /* The job is paused */
 
     do_drain_end(drain_type, target);
 
     g_assert_cmpint(job->job.pause_count, ==, 0);
     g_assert_false(job->job.paused);
-    g_assert_false(job->job.busy); /* We're in job_sleep_ns() */
+    g_assert_true(job->job.busy); /* We're in job_sleep_ns() */
 
     ret = job_complete_sync(&job->job, &error_abort);
     g_assert_cmpint(ret, ==, 0);
-- 
2.13.6

  parent reply	other threads:[~2018-05-29 17:22 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-29 17:21 [Qemu-devel] [PATCH v2 00/20] Drain fixes and cleanups, part 3 Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 01/20] test-bdrv-drain: bdrv_drain() works with cross-AioContext events Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 02/20] block: Use bdrv_do_drain_begin/end in bdrv_drain_all() Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 03/20] block: Remove 'recursive' parameter from bdrv_drain_invoke() Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 04/20] block: Don't manually poll in bdrv_drain_all() Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 05/20] tests/test-bdrv-drain: bdrv_drain_all() works in coroutines now Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 06/20] block: Avoid unnecessary aio_poll() in AIO_WAIT_WHILE() Kevin Wolf
2018-05-29 17:21 ` Kevin Wolf [this message]
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 08/20] block: Remove bdrv_drain_recurse() Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 09/20] test-bdrv-drain: Add test for node deletion Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 10/20] block: Drain recursively with a single BDRV_POLL_WHILE() Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 11/20] test-bdrv-drain: Test node deletion in subtree recursion Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 12/20] block: Don't poll in parent drain callbacks Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 13/20] test-bdrv-drain: Graph change through parent callback Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 14/20] block: Defer .bdrv_drain_begin callback to polling phase Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 15/20] test-bdrv-drain: Test that bdrv_drain_invoke() doesn't poll Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 16/20] block: Allow AIO_WAIT_WHILE with NULL ctx Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 17/20] block: Move bdrv_drain_all_begin() out of coroutine context Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 18/20] block: ignore_bds_parents parameter for drain functions Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 19/20] block: Allow graph changes in bdrv_drain_all_begin/end sections Kevin Wolf
2018-05-29 17:21 ` [Qemu-devel] [PATCH v2 20/20] test-bdrv-drain: Test graph changes in drain_all section Kevin Wolf
2018-05-29 17:45 ` [Qemu-devel] [PATCH v2 00/20] Drain fixes and cleanups, part 3 no-reply
2018-06-11 12:23 ` Kevin Wolf
2018-06-15 16:08   ` [Qemu-devel] [Qemu-block] " Kevin Wolf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180529172156.29311-8-kwolf@redhat.com \
    --to=kwolf@redhat.com \
    --cc=eblake@redhat.com \
    --cc=famz@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.