All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
To: "qemu-devel@nongnu.org" <qemu-devel@nongnu.org>,
	"qemu-block@nongnu.org" <qemu-block@nongnu.org>
Cc: "jcody@redhat.com" <jcody@redhat.com>,
	"kwolf@redhat.com" <kwolf@redhat.com>,
	"mreitz@redhat.com" <mreitz@redhat.com>,
	Denis Lunev <den@virtuozzo.com>,
	Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Subject: Re: [Qemu-devel] [PATCH RFC 1/1] Stream block job involves copy-on-read filter
Date: Thu, 31 Jan 2019 14:02:14 +0000	[thread overview]
Message-ID: <7ccbcc44-9d41-2df9-e1af-88715f81c99a@virtuozzo.com> (raw)
In-Reply-To: <1548244464-633186-1-git-send-email-andrey.shinkevich@virtuozzo.com>

PINGing
Please help!

On 23/01/2019 14:54, Andrey Shinkevich wrote:
> The copy-on-read filter driver is applied to block-stream operations.
> The 'test_stream_parallel' in the file tests/qemu-iotests/030 runs
> jobs that use nodes for streaming in parallel through the backing chain.
> We've got filters being inserted to and removed from the backing chain
> while jobs are running. As a result, a filter node may be passed as the
> 'base' parameter to the stream_start() function when the base node name
> is not specified (the base node is identified by its file name which is
> the same to the related filter node).
> Another issue is that a function keeps the pointer to the filter BDS
> object that can be replaced and deleted after the co-routine switch.
> For example, the function backing_bs() returns the pointer to the
> backing BDS and the BDS reference counter is not incremented.
> A solution (or workaround) made with the given patch for block-stream
> job helps to pass all the iotests in the file tests/qemu-iotests/030.
> Any piece of advice how to amend the solution will be appreciated.
> I am looking forward to hearing from you.
> 
> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
> ---
>   block/stream.c | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
>   1 file changed, 143 insertions(+), 11 deletions(-)
> 
> diff --git a/block/stream.c b/block/stream.c
> index 7a49ac0..18e51b3 100644
> --- a/block/stream.c
> +++ b/block/stream.c
> @@ -16,6 +16,7 @@
>   #include "block/block_int.h"
>   #include "block/blockjob_int.h"
>   #include "qapi/error.h"
> +#include "qapi/qmp/qdict.h"
>   #include "qapi/qmp/qerror.h"
>   #include "qemu/ratelimit.h"
>   #include "sysemu/block-backend.h"
> @@ -35,8 +36,26 @@ typedef struct StreamBlockJob {
>       BlockdevOnError on_error;
>       char *backing_file_str;
>       bool bs_read_only;
> +    BlockDriverState *cor_filter_bs;
> +    BlockDriverState *target_bs;
>   } StreamBlockJob;
>   
> +static BlockDriverState *child_file_bs(BlockDriverState *bs)
> +{
> +    return bs->file ? bs->file->bs : NULL;
> +}
> +
> +static BlockDriverState *skip_filter(BlockDriverState *bs)
> +{
> +    BlockDriverState *ret_bs = bs;
> +
> +    while (ret_bs && ret_bs->drv && ret_bs->drv->is_filter) {
> +        ret_bs = child_file_bs(ret_bs);
> +    }
> +
> +    return ret_bs;
> +}
> +
>   static int coroutine_fn stream_populate(BlockBackend *blk,
>                                           int64_t offset, uint64_t bytes,
>                                           void *buf)
> @@ -51,14 +70,12 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
>       qemu_iovec_init_external(&qiov, &iov, 1);
>   
>       /* Copy-on-read the unallocated clusters */
> -    return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ);
> +    return blk_co_preadv(blk, offset, qiov.size, &qiov, 0);
>   }
>   
> -static int stream_prepare(Job *job)
> +static int stream_change_backing_file(StreamBlockJob *s)
>   {
> -    StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
> -    BlockJob *bjob = &s->common;
> -    BlockDriverState *bs = blk_bs(bjob->blk);
> +    BlockDriverState *bs = s->target_bs;
>       BlockDriverState *base = s->base;
>       Error *local_err = NULL;
>       int ret = 0;
> @@ -82,11 +99,53 @@ static int stream_prepare(Job *job)
>       return ret;
>   }
>   
> +static int stream_exit(Job *job, bool abort)
> +{
> +    StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
> +    BlockJob *bjob = &s->common;
> +    BlockDriverState *target_bs = s->target_bs;
> +    int ret = 0;
> +
> +    /* Retain the BDS until we complete the graph change. */
> +    bdrv_ref(target_bs);
> +    /* Hold a guest back from writing while permissions are being reset. */
> +    bdrv_drained_begin(target_bs);
> +    /* Drop permissions before the graph change. */
> +    bdrv_child_try_set_perm(s->cor_filter_bs->file, 0, BLK_PERM_ALL,
> +                            &error_abort);
> +    if (!abort) {
> +        ret = stream_change_backing_file(s);
> +    }
> +
> +    bdrv_replace_node(s->cor_filter_bs, target_bs, &error_abort);
> +    /* Switch the BB back to the filter so that job terminated properly. */
> +    blk_remove_bs(bjob->blk);
> +    blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
> +    blk_insert_bs(bjob->blk, s->cor_filter_bs, &error_abort);
> +
> +    bdrv_drained_end(target_bs);
> +    bdrv_unref(target_bs);
> +    /* Submit control over filter to the job instance. */
> +    bdrv_unref(s->cor_filter_bs);
> +
> +    return ret;
> +}
> +
> +static int stream_prepare(Job *job)
> +{
> +    return stream_exit(job, false);
> +}
> +
> +static void stream_abort(Job *job)
> +{
> +    stream_exit(job, job->ret < 0);
> +}
> +
>   static void stream_clean(Job *job)
>   {
>       StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
>       BlockJob *bjob = &s->common;
> -    BlockDriverState *bs = blk_bs(bjob->blk);
> +    BlockDriverState *bs = s->target_bs;
>   
>       /* Reopen the image back in read-only mode if necessary */
>       if (s->bs_read_only) {
> @@ -102,7 +161,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
>   {
>       StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
>       BlockBackend *blk = s->common.blk;
> -    BlockDriverState *bs = blk_bs(blk);
> +    BlockDriverState *bs = s->target_bs;
>       BlockDriverState *base = s->base;
>       int64_t len;
>       int64_t offset = 0;
> @@ -213,12 +272,64 @@ static const BlockJobDriver stream_job_driver = {
>           .free          = block_job_free,
>           .run           = stream_run,
>           .prepare       = stream_prepare,
> +        .abort         = stream_abort,
>           .clean         = stream_clean,
>           .user_resume   = block_job_user_resume,
>           .drain         = block_job_drain,
>       },
>   };
>   
> +static BlockDriverState *create_filter_node(BlockDriverState *bs,
> +                                            Error **errp)
> +{
> +    QDict *opts = qdict_new();
> +
> +    qdict_put_str(opts, "driver", "copy-on-read");
> +    qdict_put_str(opts, "file", bdrv_get_node_name(bs));
> +
> +    return bdrv_open(NULL, NULL, opts, BDRV_O_RDWR, errp);
> +}
> +
> +static void remove_filter(BlockDriverState *cor_filter_bs)
> +{
> +    BlockDriverState *bs = child_file_bs(cor_filter_bs);
> +
> +    /* Hold a guest back from writing until we remove the filter */
> +    bdrv_drained_begin(bs);
> +    bdrv_child_try_set_perm(cor_filter_bs->file, 0, BLK_PERM_ALL,
> +                            &error_abort);
> +    bdrv_replace_node(cor_filter_bs, bs, &error_abort);
> +    bdrv_drained_end(bs);
> +
> +    bdrv_unref(cor_filter_bs);
> +}
> +
> +static BlockDriverState *insert_filter(BlockDriverState *bs, Error **errp)
> +{
> +    BlockDriverState *cor_filter_bs;
> +    Error *local_err = NULL;
> +
> +    cor_filter_bs = create_filter_node(bs, errp);
> +    if (cor_filter_bs == NULL) {
> +        error_prepend(errp, "Could not create filter node: ");
> +        return NULL;
> +    }
> +
> +    bdrv_set_aio_context(cor_filter_bs, bdrv_get_aio_context(bs));
> +
> +    bdrv_drained_begin(bs);
> +    bdrv_replace_node(bs, cor_filter_bs, &local_err);
> +    bdrv_drained_end(bs);
> +
> +    if (local_err) {
> +        bdrv_unref(cor_filter_bs);
> +        error_propagate(errp, local_err);
> +        return NULL;
> +    }
> +
> +    return cor_filter_bs;
> +}
> +
>   void stream_start(const char *job_id, BlockDriverState *bs,
>                     BlockDriverState *base, const char *backing_file_str,
>                     int creation_flags, int64_t speed,
> @@ -227,6 +338,14 @@ void stream_start(const char *job_id, BlockDriverState *bs,
>       StreamBlockJob *s;
>       BlockDriverState *iter;
>       bool bs_read_only;
> +    BlockDriverState *cor_filter_bs;
> +
> +    /*
> +     * The base node might be identified by its file name rather than
> +     * by its node name. In that case, we can encounter a filter node
> +     * instead which has other BS pointer and the same file name.
> +     */
> +    base = skip_filter(base);
>   
>       /* Make sure that the image is opened in read-write mode */
>       bs_read_only = bdrv_is_read_only(bs);
> @@ -236,10 +355,15 @@ void stream_start(const char *job_id, BlockDriverState *bs,
>           }
>       }
>   
> +    cor_filter_bs = insert_filter(bs, errp);
> +    if (cor_filter_bs == NULL) {
> +        goto fail;
> +    }
> +
>       /* Prevent concurrent jobs trying to modify the graph structure here, we
>        * already have our own plans. Also don't allow resize as the image size is
>        * queried only at the job start and then cached. */
> -    s = block_job_create(job_id, &stream_job_driver, NULL, bs,
> +    s = block_job_create(job_id, &stream_job_driver, NULL, cor_filter_bs,
>                            BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
>                            BLK_PERM_GRAPH_MOD,
>                            BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
> @@ -249,16 +373,21 @@ void stream_start(const char *job_id, BlockDriverState *bs,
>           goto fail;
>       }
>   
> -    /* Block all intermediate nodes between bs and base, because they will
> +    /*
> +     * Block all intermediate nodes between bs and base, because they will
>        * disappear from the chain after this operation. The streaming job reads
>        * every block only once, assuming that it doesn't change, so block writes
> -     * and resizes. */
> -    for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
> +     * and resizes. We account a filter node may be a part of the graph.
> +     */
> +    for (iter = skip_filter(backing_bs(bs)); iter && iter != base;
> +         iter = skip_filter(backing_bs(iter))) {
>           block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
>                              BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
>                              &error_abort);
>       }
>   
> +    s->cor_filter_bs = cor_filter_bs;
> +    s->target_bs = bs;
>       s->base = base;
>       s->backing_file_str = g_strdup(backing_file_str);
>       s->bs_read_only = bs_read_only;
> @@ -269,6 +398,9 @@ void stream_start(const char *job_id, BlockDriverState *bs,
>       return;
>   
>   fail:
> +    if (cor_filter_bs) {
> +        remove_filter(cor_filter_bs);
> +    }
>       if (bs_read_only) {
>           bdrv_reopen_set_read_only(bs, true, NULL);
>       }
> 

-- 
With the best regards,
Andrey Shinkevich

  parent reply	other threads:[~2019-01-31 14:02 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-23 11:54 [Qemu-devel] [PATCH RFC 1/1] Stream block job involves copy-on-read filter Andrey Shinkevich
2019-01-23 13:10 ` Vladimir Sementsov-Ogievskiy
2019-01-31 14:02 ` Andrey Shinkevich [this message]
2019-02-08 13:13 ` [Qemu-devel] [Qemu-block] " Alberto Garcia
2019-02-08 15:29   ` Andrey Shinkevich
2019-02-11 14:07     ` Alberto Garcia
2019-02-11 14:51       ` Vladimir Sementsov-Ogievskiy
2019-02-11 15:52         ` Alberto Garcia
2019-02-11 16:58           ` Vladimir Sementsov-Ogievskiy
2019-02-12 11:35             ` Alberto Garcia
2019-02-14 13:43               ` Andrey Shinkevich
2019-02-18 10:08                 ` Vladimir Sementsov-Ogievskiy
2019-02-20  9:10                   ` Andrey Shinkevich
2019-02-11 14:54       ` Andrey Shinkevich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7ccbcc44-9d41-2df9-e1af-88715f81c99a@virtuozzo.com \
    --to=andrey.shinkevich@virtuozzo.com \
    --cc=den@virtuozzo.com \
    --cc=jcody@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=vsementsov@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.