All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/3] Support streaming to an intermediate layer
@ 2015-02-20 13:53 Alberto Garcia
  2015-02-20 13:53 ` [Qemu-devel] [PATCH 1/3] block: " Alberto Garcia
                   ` (3 more replies)
  0 siblings, 4 replies; 26+ messages in thread
From: Alberto Garcia @ 2015-02-20 13:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Alberto Garcia, Stefan Hajnoczi

Hello,

I added support to the Block Stream API for streaming to an
intermediate layer.

I followed the proposed API from the wiki, which simply adds an
additional 'top' parameter to block-stream specifying the image that
data is written to:

http://wiki.qemu.org/Features/Snapshots#Streaming_to_an_Intermediate_Layer_.5Bproposal.5D

I split the work in three patches, I think the code is pretty
straightforward and doesn't need much explanation, but feedback and
questions are welcome.

Regards,

Berto

Alberto Garcia (3):
  block: Support streaming to an intermediate layer
  block: Add QMP support for streaming to an intermediate layer
  docs: Document how to stream to an intermediate layer

 block.c                   |  4 +++-
 block/stream.c            | 49 ++++++++++++++++++++++++++++++++++++-----------
 blockdev.c                | 19 ++++++++++++++----
 docs/live-block-ops.txt   | 28 ++++++++++++++++++---------
 hmp.c                     |  2 +-
 include/block/block_int.h | 19 +++++++++---------
 include/qapi/qmp/qerror.h |  3 +++
 qapi/block-core.json      | 18 +++++++++++++----
 qmp-commands.hx           |  2 +-
 9 files changed, 104 insertions(+), 40 deletions(-)

-- 
2.1.4

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [Qemu-devel] [PATCH 1/3] block: Support streaming to an intermediate layer
  2015-02-20 13:53 [Qemu-devel] [PATCH 0/3] Support streaming to an intermediate layer Alberto Garcia
@ 2015-02-20 13:53 ` Alberto Garcia
  2015-03-05 14:04   ` Kevin Wolf
  2015-02-20 13:53 ` [Qemu-devel] [PATCH 2/3] block: Add QMP support for " Alberto Garcia
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 26+ messages in thread
From: Alberto Garcia @ 2015-02-20 13:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Alberto Garcia, Stefan Hajnoczi

This adds a new 'top' parameter to stream_start(), that specifies the
block device where the data will be written. The image is changed to
read-write mode during the streaming and back to read-only afterwards.

This also unblocks the stream operation in backing files.

Signed-off-by: Alberto Garcia <berto@igalia.com>
---
 block.c                   |  4 +++-
 block/stream.c            | 49 ++++++++++++++++++++++++++++++++++++-----------
 blockdev.c                |  2 +-
 include/block/block_int.h | 19 +++++++++---------
 4 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/block.c b/block.c
index 210fd5f..6b68d90 100644
--- a/block.c
+++ b/block.c
@@ -1199,9 +1199,11 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
             backing_hd->drv ? backing_hd->drv->format_name : "");
 
     bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
-    /* Otherwise we won't be able to commit due to check in bdrv_commit */
+    /* Otherwise we won't be able to commit or stream */
     bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
                     bs->backing_blocker);
+    bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_STREAM,
+                    bs->backing_blocker);
 out:
     bdrv_refresh_limits(bs, NULL);
 }
diff --git a/block/stream.c b/block/stream.c
index a628901..ecf5831 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -31,8 +31,10 @@ typedef struct StreamBlockJob {
     BlockJob common;
     RateLimit limit;
     BlockDriverState *base;
+    BlockDriverState *top;
     BlockdevOnError on_error;
     char *backing_file_str;
+    int top_flags;
 } StreamBlockJob;
 
 static int coroutine_fn stream_populate(BlockDriverState *bs,
@@ -99,8 +101,13 @@ static void stream_complete(BlockJob *job, void *opaque)
                 base_fmt = base->drv->format_name;
             }
         }
-        data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt);
-        close_unused_images(job->bs, base, base_id);
+        data->ret = bdrv_change_backing_file(s->top, base_id, base_fmt);
+        close_unused_images(s->top, base, base_id);
+    }
+
+    /* Reopen the image back in read-only mode if necessary */
+    if (s->top_flags != bdrv_get_flags(s->top)) {
+        bdrv_reopen(s->top, s->top_flags, NULL);
     }
 
     g_free(s->backing_file_str);
@@ -112,23 +119,23 @@ static void coroutine_fn stream_run(void *opaque)
 {
     StreamBlockJob *s = opaque;
     StreamCompleteData *data;
-    BlockDriverState *bs = s->common.bs;
+    BlockDriverState *bs = s->top;
     BlockDriverState *base = s->base;
-    int64_t sector_num, end;
+    int64_t sector_num = 0;
+    int64_t end = -1;
     int error = 0;
     int ret = 0;
     int n = 0;
     void *buf;
 
     if (!bs->backing_hd) {
-        block_job_completed(&s->common, 0);
-        return;
+        goto out;
     }
 
     s->common.len = bdrv_getlength(bs);
     if (s->common.len < 0) {
-        block_job_completed(&s->common, s->common.len);
-        return;
+        ret = s->common.len;
+        goto out;
     }
 
     end = s->common.len >> BDRV_SECTOR_BITS;
@@ -215,6 +222,7 @@ wait:
 
     qemu_vfree(buf);
 
+out:
     /* Modify backing chain and close BDSes in main loop */
     data = g_malloc(sizeof(*data));
     data->ret = ret;
@@ -239,13 +247,18 @@ static const BlockJobDriver stream_job_driver = {
     .set_speed     = stream_set_speed,
 };
 
-void stream_start(BlockDriverState *bs, BlockDriverState *base,
-                  const char *backing_file_str, int64_t speed,
-                  BlockdevOnError on_error,
+void stream_start(BlockDriverState *bs, BlockDriverState *top,
+                  BlockDriverState *base, const char *backing_file_str,
+                  int64_t speed, BlockdevOnError on_error,
                   BlockCompletionFunc *cb,
                   void *opaque, Error **errp)
 {
     StreamBlockJob *s;
+    int orig_top_flags;
+
+    if (!top) {
+        top = bs;
+    }
 
     if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
          on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
@@ -254,14 +267,28 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
         return;
     }
 
+    /* Make sure that the top image in opened in read-write mode */
+    orig_top_flags = bdrv_get_flags(top);
+    if (!(orig_top_flags & BDRV_O_RDWR)) {
+        Error *local_err = NULL;
+        bdrv_reopen(top, orig_top_flags | BDRV_O_RDWR, &local_err);
+        if (local_err != NULL) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
     s = block_job_create(&stream_job_driver, bs, speed, cb, opaque, errp);
     if (!s) {
         return;
     }
 
+    s->top = top;
     s->base = base;
     s->backing_file_str = g_strdup(backing_file_str);
 
+    s->top_flags = orig_top_flags;
+
     s->on_error = on_error;
     s->common.co = qemu_coroutine_create(stream_run);
     trace_stream_start(bs, base, s, s->common.co, opaque);
diff --git a/blockdev.c b/blockdev.c
index 7d34960..06628ca 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2148,7 +2148,7 @@ void qmp_block_stream(const char *device,
     /* backing_file string overrides base bs filename */
     base_name = has_backing_file ? backing_file : base_name;
 
-    stream_start(bs, base_bs, base_name, has_speed ? speed : 0,
+    stream_start(bs, NULL, base_bs, base_name, has_speed ? speed : 0,
                  on_error, block_job_cb, bs, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 7ad1950..959d19c 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -498,10 +498,11 @@ int is_windows_drive(const char *filename);
 
 /**
  * stream_start:
- * @bs: Block device to operate on.
+ * @bs: Active block device.
+ * @top: Block device to copy the data to, defaults to @bs if %NULL.
  * @base: Block device that will become the new base, or %NULL to
- * flatten the whole backing file chain onto @bs.
- * @base_id: The file name that will be written to @bs as the new
+ * flatten the whole backing file chain onto @top.
+ * @base_id: The file name that will be written to @top as the new
  * backing file if the job completes.  Ignored if @base is %NULL.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
@@ -510,14 +511,14 @@ int is_windows_drive(const char *filename);
  * @errp: Error object.
  *
  * Start a streaming operation on @bs.  Clusters that are unallocated
- * in @bs, but allocated in any image between @base and @bs (both
- * exclusive) will be written to @bs.  At the end of a successful
- * streaming job, the backing file of @bs will be changed to
+ * in @top, but allocated in any image between @base and @top (both
+ * exclusive) will be written to @top.  At the end of a successful
+ * streaming job, the backing file of @top will be changed to
  * @base_id in the written image and to @base in the live BlockDriverState.
  */
-void stream_start(BlockDriverState *bs, BlockDriverState *base,
-                  const char *base_id, int64_t speed, BlockdevOnError on_error,
-                  BlockCompletionFunc *cb,
+void stream_start(BlockDriverState *bs, BlockDriverState *top,
+                  BlockDriverState *base, const char *base_id, int64_t speed,
+                  BlockdevOnError on_error, BlockCompletionFunc *cb,
                   void *opaque, Error **errp);
 
 /**
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [Qemu-devel] [PATCH 2/3] block: Add QMP support for streaming to an intermediate layer
  2015-02-20 13:53 [Qemu-devel] [PATCH 0/3] Support streaming to an intermediate layer Alberto Garcia
  2015-02-20 13:53 ` [Qemu-devel] [PATCH 1/3] block: " Alberto Garcia
@ 2015-02-20 13:53 ` Alberto Garcia
  2015-02-20 22:38   ` Eric Blake
  2015-03-05 14:09   ` Kevin Wolf
  2015-02-20 13:53 ` [Qemu-devel] [PATCH 3/3] docs: Document how to stream " Alberto Garcia
  2015-02-20 17:34 ` [Qemu-devel] [PATCH 0/3] Support streaming " Eric Blake
  3 siblings, 2 replies; 26+ messages in thread
From: Alberto Garcia @ 2015-02-20 13:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Alberto Garcia, Stefan Hajnoczi

This adds the 'top' parameter to the 'block-stream' QMP command and
checks that its value is valid before passing it to stream_start().

Signed-off-by: Alberto Garcia <berto@igalia.com>
---
 blockdev.c                | 19 +++++++++++++++----
 hmp.c                     |  2 +-
 include/qapi/qmp/qerror.h |  3 +++
 qapi/block-core.json      | 18 ++++++++++++++----
 qmp-commands.hx           |  2 +-
 5 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 06628ca..2404f89 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2099,6 +2099,7 @@ static void block_job_cb(void *opaque, int ret)
 
 void qmp_block_stream(const char *device,
                       bool has_base, const char *base,
+                      bool has_top, const char *top,
                       bool has_backing_file, const char *backing_file,
                       bool has_speed, int64_t speed,
                       bool has_on_error, BlockdevOnError on_error,
@@ -2106,6 +2107,7 @@ void qmp_block_stream(const char *device,
 {
     BlockDriverState *bs;
     BlockDriverState *base_bs = NULL;
+    BlockDriverState *top_bs;
     AioContext *aio_context;
     Error *local_err = NULL;
     const char *base_name = NULL;
@@ -2114,7 +2116,7 @@ void qmp_block_stream(const char *device,
         on_error = BLOCKDEV_ON_ERROR_REPORT;
     }
 
-    bs = bdrv_find(device);
+    top_bs = bs = bdrv_find(device);
     if (!bs) {
         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
         return;
@@ -2123,12 +2125,21 @@ void qmp_block_stream(const char *device,
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) {
+    if (has_top) {
+        top_bs = bdrv_find_backing_image(bs, top);
+        if (top_bs == NULL) {
+            error_set(errp, QERR_TOP_NOT_FOUND, top);
+            goto out;
+        }
+        assert(bdrv_get_aio_context(top_bs) == aio_context);
+    }
+
+    if (bdrv_op_is_blocked(top_bs, BLOCK_OP_TYPE_STREAM, errp)) {
         goto out;
     }
 
     if (has_base) {
-        base_bs = bdrv_find_backing_image(bs, base);
+        base_bs = bdrv_find_backing_image(top_bs, base);
         if (base_bs == NULL) {
             error_set(errp, QERR_BASE_NOT_FOUND, base);
             goto out;
@@ -2148,7 +2159,7 @@ void qmp_block_stream(const char *device,
     /* backing_file string overrides base bs filename */
     base_name = has_backing_file ? backing_file : base_name;
 
-    stream_start(bs, NULL, base_bs, base_name, has_speed ? speed : 0,
+    stream_start(bs, top_bs, base_bs, base_name, has_speed ? speed : 0,
                  on_error, block_job_cb, bs, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
diff --git a/hmp.c b/hmp.c
index b47f331..28f7adb 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1239,7 +1239,7 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
     const char *base = qdict_get_try_str(qdict, "base");
     int64_t speed = qdict_get_try_int(qdict, "speed", 0);
 
-    qmp_block_stream(device, base != NULL, base, false, NULL,
+    qmp_block_stream(device, base != NULL, base, false, NULL, false, NULL,
                      qdict_haskey(qdict, "speed"), speed,
                      true, BLOCKDEV_ON_ERROR_REPORT, &error);
 
diff --git a/include/qapi/qmp/qerror.h b/include/qapi/qmp/qerror.h
index 986260f..d075f78 100644
--- a/include/qapi/qmp/qerror.h
+++ b/include/qapi/qmp/qerror.h
@@ -127,6 +127,9 @@ void qerror_report_err(Error *err);
 #define QERR_SET_PASSWD_FAILED \
     ERROR_CLASS_GENERIC_ERROR, "Could not set password"
 
+#define QERR_TOP_NOT_FOUND \
+    ERROR_CLASS_GENERIC_ERROR, "Top '%s' not found"
+
 #define QERR_UNDEFINED_ERROR \
     ERROR_CLASS_GENERIC_ERROR, "An undefined error has occurred"
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index a3fdaf0..3073be6 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1013,6 +1013,9 @@
 # with query-block-jobs.  The operation can be stopped before it has completed
 # using the block-job-cancel command.
 #
+# Data is copied to the top image, which defaults to the active layer if no other
+# file is selected.
+#
 # If a base file is specified then sectors are not copied from that base file and
 # its backing chain.  When streaming completes the image file will have the base
 # file as its backing file.  This can be used to stream a subset of the backing
@@ -1025,8 +1028,14 @@
 #
 # @base:   #optional the common backing file name
 #
-# @backing-file: #optional The backing file string to write into the active
-#                          layer. This filename is not validated.
+# @top:    #optional Top image, only sectors below this image are streamed
+#                    into it.
+#
+#                    If not specified, the top image is the active layer.
+#                    (Since 2.3)
+#
+# @backing-file: #optional The backing file string to write into the top
+#                          image. This filename is not validated.
 #
 #                          If a pathname string is such that it cannot be
 #                          resolved by QEMU, that means that subsequent QMP or
@@ -1052,8 +1061,9 @@
 # Since: 1.1
 ##
 { 'command': 'block-stream',
-  'data': { 'device': 'str', '*base': 'str', '*backing-file': 'str',
-            '*speed': 'int', '*on-error': 'BlockdevOnError' } }
+  'data': { 'device': 'str', '*base': 'str', '*top': 'str',
+            '*backing-file': 'str', '*speed': 'int',
+            '*on-error': 'BlockdevOnError' } }
 
 ##
 # @block-job-set-speed:
diff --git a/qmp-commands.hx b/qmp-commands.hx
index a85d847..a02f19f 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -978,7 +978,7 @@ EQMP
 
     {
         .name       = "block-stream",
-        .args_type  = "device:B,base:s?,speed:o?,backing-file:s?,on-error:s?",
+        .args_type  = "device:B,base:s?,top:s?,speed:o?,backing-file:s?,on-error:s?",
         .mhandler.cmd_new = qmp_marshal_input_block_stream,
     },
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [Qemu-devel] [PATCH 3/3] docs: Document how to stream to an intermediate layer
  2015-02-20 13:53 [Qemu-devel] [PATCH 0/3] Support streaming to an intermediate layer Alberto Garcia
  2015-02-20 13:53 ` [Qemu-devel] [PATCH 1/3] block: " Alberto Garcia
  2015-02-20 13:53 ` [Qemu-devel] [PATCH 2/3] block: Add QMP support for " Alberto Garcia
@ 2015-02-20 13:53 ` Alberto Garcia
  2015-02-20 17:34 ` [Qemu-devel] [PATCH 0/3] Support streaming " Eric Blake
  3 siblings, 0 replies; 26+ messages in thread
From: Alberto Garcia @ 2015-02-20 13:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Alberto Garcia, Stefan Hajnoczi

Signed-off-by: Alberto Garcia <berto@igalia.com>
---
 docs/live-block-ops.txt | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/docs/live-block-ops.txt b/docs/live-block-ops.txt
index a257087..63530e8 100644
--- a/docs/live-block-ops.txt
+++ b/docs/live-block-ops.txt
@@ -10,9 +10,9 @@ Snapshot live merge
 Given a snapshot chain, described in this document in the following
 format:
 
-[A] -> [B] -> [C] -> [D]
+[A] -> [B] -> [C] -> [D] -> [E]
 
-Where the rightmost object ([D] in the example) described is the current
+Where the rightmost object ([E] in the example) described is the current
 image which the guest OS has write access to. To the left of it is its base
 image, and so on accordingly until the leftmost image, which has no
 base.
@@ -21,28 +21,38 @@ The snapshot live merge operation transforms such a chain into a
 smaller one with fewer elements, such as this transformation relative
 to the first example:
 
-[A] -> [D]
+[A] -> [E]
 
-Currently only forward merge with target being the active image is
-supported, that is, data copy is performed in the right direction with
-destination being the rightmost image.
+Data is copied in the right direction with destination being the
+rightmost image, but any other intermediate image can be specified
+instead, [D] in this example:
+
+[A] -> [B] -> [D] -> [E]
 
 The operation is implemented in QEMU through image streaming facilities.
 
 The basic idea is to execute 'block_stream virtio0' while the guest is
 running. Progress can be monitored using 'info block-jobs'. When the
 streaming operation completes it raises a QMP event. 'block_stream'
-copies data from the backing file(s) into the active image. When finished,
+copies data from the backing file(s) into the destination image. When finished,
 it adjusts the backing file pointer.
 
 The 'base' parameter specifies an image which data need not be streamed from.
-This image will be used as the backing file for the active image when the
+This image will be used as the backing file for the destination image when the
 operation is finished.
 
-In the example above, the command would be:
+In the first example above, the command would be:
 
 (qemu) block_stream virtio0 A
 
+The 'top' parameter specifies the destination image if it's different
+from the active (rightmost) one. Currently it can only be set using
+QMP.
+
+In the second example above this would be the QMP shell command:
+
+(QEMU) block-stream device=virtio0 top=D base=B
+
 
 Live block copy
 ===============
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] Support streaming to an intermediate layer
  2015-02-20 13:53 [Qemu-devel] [PATCH 0/3] Support streaming to an intermediate layer Alberto Garcia
                   ` (2 preceding siblings ...)
  2015-02-20 13:53 ` [Qemu-devel] [PATCH 3/3] docs: Document how to stream " Alberto Garcia
@ 2015-02-20 17:34 ` Eric Blake
  2015-02-20 19:05   ` Alberto Garcia
  3 siblings, 1 reply; 26+ messages in thread
From: Eric Blake @ 2015-02-20 17:34 UTC (permalink / raw)
  To: Alberto Garcia, qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi

[-- Attachment #1: Type: text/plain, Size: 686 bytes --]

On 02/20/2015 06:53 AM, Alberto Garcia wrote:
> Hello,
> 
> I added support to the Block Stream API for streaming to an
> intermediate layer.
> 
> I followed the proposed API from the wiki, which simply adds an
> additional 'top' parameter to block-stream specifying the image that
> data is written to:

How does one learn whether qemu is new enough to support this mode?
Until we add QMP introspection, learning whether an optional parameter
exists requires attempting the command and seeing a different error
depending on whether the argument is recognized.

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] Support streaming to an intermediate layer
  2015-02-20 17:34 ` [Qemu-devel] [PATCH 0/3] Support streaming " Eric Blake
@ 2015-02-20 19:05   ` Alberto Garcia
  2015-02-20 22:49     ` Eric Blake
  0 siblings, 1 reply; 26+ messages in thread
From: Alberto Garcia @ 2015-02-20 19:05 UTC (permalink / raw)
  To: Eric Blake; +Cc: Kevin Wolf, qemu-devel, Stefan Hajnoczi

On Fri, Feb 20, 2015 at 10:34:58AM -0700, Eric Blake wrote:

> > I followed the proposed API from the wiki, which simply adds an
> > additional 'top' parameter to block-stream specifying the image
> > that data is written to:
> 
> How does one learn whether qemu is new enough to support this
> mode? Until we add QMP introspection, learning whether an optional
> parameter exists requires attempting the command and seeing a
> different error depending on whether the argument is recognized.

Isn't it possible to just check the QEMU version number?

If not, what's the recommended way to do it?

Thanks,

Berto

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] block: Add QMP support for streaming to an intermediate layer
  2015-02-20 13:53 ` [Qemu-devel] [PATCH 2/3] block: Add QMP support for " Alberto Garcia
@ 2015-02-20 22:38   ` Eric Blake
  2015-02-23 12:23     ` Alberto Garcia
  2015-03-05 14:09   ` Kevin Wolf
  1 sibling, 1 reply; 26+ messages in thread
From: Eric Blake @ 2015-02-20 22:38 UTC (permalink / raw)
  To: Alberto Garcia, qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi

[-- Attachment #1: Type: text/plain, Size: 1864 bytes --]

On 02/20/2015 06:53 AM, Alberto Garcia wrote:
> This adds the 'top' parameter to the 'block-stream' QMP command and
> checks that its value is valid before passing it to stream_start().
> 
> Signed-off-by: Alberto Garcia <berto@igalia.com>
> ---
>  blockdev.c                | 19 +++++++++++++++----
>  hmp.c                     |  2 +-
>  include/qapi/qmp/qerror.h |  3 +++
>  qapi/block-core.json      | 18 ++++++++++++++----
>  qmp-commands.hx           |  2 +-
>  5 files changed, 34 insertions(+), 10 deletions(-)

> @@ -2123,12 +2125,21 @@ void qmp_block_stream(const char *device,
>      aio_context = bdrv_get_aio_context(bs);
>      aio_context_acquire(aio_context);
>  
> -    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) {
> +    if (has_top) {
> +        top_bs = bdrv_find_backing_image(bs, top);
> +        if (top_bs == NULL) {
> +            error_set(errp, QERR_TOP_NOT_FOUND, top);
> +            goto out;
> +        }

If I understand correctly, bdrv_find_backing_image has problems for
backing nodes that don't have a file name.  Given our shift towards node
names, I think we really want to target node names rather than file
names when specifying which node we will use as the top bound receiving
the stream operations.


> +++ b/include/qapi/qmp/qerror.h
> @@ -127,6 +127,9 @@ void qerror_report_err(Error *err);
>  #define QERR_SET_PASSWD_FAILED \
>      ERROR_CLASS_GENERIC_ERROR, "Could not set password"
>  
> +#define QERR_TOP_NOT_FOUND \
> +    ERROR_CLASS_GENERIC_ERROR, "Top '%s' not found"
> +

Please don't.  Just use error_setg() at the right place with the direct
message (existing QERR_ macros are a legacy holdover, and we shouldn't
be creating more of them).

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] Support streaming to an intermediate layer
  2015-02-20 19:05   ` Alberto Garcia
@ 2015-02-20 22:49     ` Eric Blake
  2015-02-22 15:08       ` Alberto Garcia
  0 siblings, 1 reply; 26+ messages in thread
From: Eric Blake @ 2015-02-20 22:49 UTC (permalink / raw)
  To: Alberto Garcia; +Cc: Kevin Wolf, qemu-devel, Stefan Hajnoczi

[-- Attachment #1: Type: text/plain, Size: 2308 bytes --]

On 02/20/2015 12:05 PM, Alberto Garcia wrote:
> On Fri, Feb 20, 2015 at 10:34:58AM -0700, Eric Blake wrote:
> 
>>> I followed the proposed API from the wiki, which simply adds an
>>> additional 'top' parameter to block-stream specifying the image
>>> that data is written to:
>>
>> How does one learn whether qemu is new enough to support this
>> mode? Until we add QMP introspection, learning whether an optional
>> parameter exists requires attempting the command and seeing a
>> different error depending on whether the argument is recognized.
> 
> Isn't it possible to just check the QEMU version number?

Absolutely not.  Vendors are very likely to backport this to earlier
version numbers.  Feature probes are ALWAYS a better idea than version
probes.

> 
> If not, what's the recommended way to do it?

Several design possibilities, but not all of them feasible at the moment.

1. implement QAPI introspection (we've been dreaming about this since
qemu 1.5 days), then the caller just queries to see if the version of
QMP has the optional 'top' parameter.

2. implement the new feature as a new command (then the existing
'query-commands' becomes an easy probe; but we have code duplication
with existing commands) (on the other hand, if we are going to frame
'stream' in terms of node operations instead of file name operations,
this may be the best idea after all)

3. guarantee that we have sane error messages for bogus commands used as
the probe. If {"execute":"block-stream",
"data":{"device":"no-such","top":"bogus"}} gives a reliable
DeviceNotFound error for qemu that supports optional 'top', and a
reliable GenericError about an unknown argument 'top' in older qemu,
then libvirt could use that as a poor-man's probe for the functionality
existing. (ugly, and only works if you can guarantee a difference in
error type between old and new qemu for a specific bogus command usage)
 Precedent: we do this sort of bogus command call on 'block-commit' to
learn if qemu is new enough to support active commit, and the comments
in the qemu code are explicit that a particular error message must not
be changed because libvirt depends on it.

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] Support streaming to an intermediate layer
  2015-02-20 22:49     ` Eric Blake
@ 2015-02-22 15:08       ` Alberto Garcia
  0 siblings, 0 replies; 26+ messages in thread
From: Alberto Garcia @ 2015-02-22 15:08 UTC (permalink / raw)
  To: Eric Blake; +Cc: Kevin Wolf, qemu-devel, Stefan Hajnoczi

On Fri, Feb 20, 2015 at 03:49:05PM -0700, Eric Blake wrote:

Hello and thanks for your comments.

> 1. implement QAPI introspection (we've been dreaming about this
> since qemu 1.5 days), then the caller just queries to see if the
> version of QMP has the optional 'top' parameter.

This sounds like the best option. Is there a roadmap? What's the
status?

I guess the amount of work depends a lot on the goals, but from what
I can see, something like a 'query-command-args' command that returns
the list of parameters is trivial, there's already the qmp_cmds array
in memory with all the necessary information.

> 2. implement the new feature as a new command (then the existing
> 'query-commands' becomes an easy probe; but we have code duplication
> with existing commands)

I don't think there would be a lot of code duplication because one
operation is a strict superset of the other, we would be just adding
unnecessary commands. I would go for option 1) if possible.

> 3. guarantee that we have sane error messages for bogus
> commands used as the probe. If {"execute":"block-stream",
> "data":{"device":"no-such","top":"bogus"}} gives a reliable
> DeviceNotFound error for qemu that supports optional 'top', and a
> reliable GenericError about an unknown argument 'top' in older qemu

It doesn't seem like the best solution considering the alternatives,
but for what it's worth:

Current QEMU:

(QEMU) block-stream device=no-such top=bogus
{u'error': {u'class': u'GenericError', u'desc': u"Invalid parameter 'top'"}}

With my patch:

(QEMU) block-stream device=no-such top=bogus
{u'error': {u'class': u'DeviceNotFound', u'desc': u"Device 'no-such' not found"}}

Regards,

Berto

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] block: Add QMP support for streaming to an intermediate layer
  2015-02-20 22:38   ` Eric Blake
@ 2015-02-23 12:23     ` Alberto Garcia
  2015-02-23 13:04       ` Kevin Wolf
  0 siblings, 1 reply; 26+ messages in thread
From: Alberto Garcia @ 2015-02-23 12:23 UTC (permalink / raw)
  To: Eric Blake; +Cc: Kevin Wolf, qemu-devel, Stefan Hajnoczi

On Fri, Feb 20, 2015 at 03:38:04PM -0700, Eric Blake wrote:

> > +    if (has_top) {
> > +        top_bs = bdrv_find_backing_image(bs, top);
> > +        if (top_bs == NULL) {
> > +            error_set(errp, QERR_TOP_NOT_FOUND, top);
> > +            goto out;
> > +        }
> 
> If I understand correctly, bdrv_find_backing_image has problems for
> backing nodes that don't have a file name.  Given our shift towards
> node names, I think we really want to target node names rather than
> file names when specifying which node we will use as the top bound
> receiving the stream operations.

Sure I can change that, but note that the 'base' parameter also
receives a file name and uses bdrv_find_backing_image, so I guess it
makes sense to change it in both sides.

> > +#define QERR_TOP_NOT_FOUND \
> > +    ERROR_CLASS_GENERIC_ERROR, "Top '%s' not found"
> > +
> 
> Please don't.  Just use error_setg() at the right place with the
> direct message (existing QERR_ macros are a legacy holdover, and we
> shouldn't be creating more of them).

Ok, I'll fix that.

I'll wait for more comments regarding the top / base parameters before
resubmitting the patches.

Thanks,

Berto

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] block: Add QMP support for streaming to an intermediate layer
  2015-02-23 12:23     ` Alberto Garcia
@ 2015-02-23 13:04       ` Kevin Wolf
  2015-02-24 14:08         ` Markus Armbruster
  0 siblings, 1 reply; 26+ messages in thread
From: Kevin Wolf @ 2015-02-23 13:04 UTC (permalink / raw)
  To: Alberto Garcia; +Cc: qemu-devel, Stefan Hajnoczi, armbru

Am 23.02.2015 um 13:23 hat Alberto Garcia geschrieben:
> On Fri, Feb 20, 2015 at 03:38:04PM -0700, Eric Blake wrote:
> 
> > > +    if (has_top) {
> > > +        top_bs = bdrv_find_backing_image(bs, top);
> > > +        if (top_bs == NULL) {
> > > +            error_set(errp, QERR_TOP_NOT_FOUND, top);
> > > +            goto out;
> > > +        }
> > 
> > If I understand correctly, bdrv_find_backing_image has problems for
> > backing nodes that don't have a file name.  Given our shift towards
> > node names, I think we really want to target node names rather than
> > file names when specifying which node we will use as the top bound
> > receiving the stream operations.
> 
> Sure I can change that, but note that the 'base' parameter also
> receives a file name and uses bdrv_find_backing_image, so I guess it
> makes sense to change it in both sides.

Yes, using the file name for identifying nodes was a mistake. We're
going to replace all occurrences of it sooner or later. Not sure if
someone is actively working on this currently - Markus?

For your patch series, I think it's good enough to use node names for
the new parameter. Converting old parameters is a separate issue.

Kevin

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] block: Add QMP support for streaming to an intermediate layer
  2015-02-23 13:04       ` Kevin Wolf
@ 2015-02-24 14:08         ` Markus Armbruster
  0 siblings, 0 replies; 26+ messages in thread
From: Markus Armbruster @ 2015-02-24 14:08 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: Alberto Garcia, qemu-devel, Stefan Hajnoczi

Kevin Wolf <kwolf@redhat.com> writes:

> Am 23.02.2015 um 13:23 hat Alberto Garcia geschrieben:
>> On Fri, Feb 20, 2015 at 03:38:04PM -0700, Eric Blake wrote:
>> 
>> > > +    if (has_top) {
>> > > +        top_bs = bdrv_find_backing_image(bs, top);
>> > > +        if (top_bs == NULL) {
>> > > +            error_set(errp, QERR_TOP_NOT_FOUND, top);
>> > > +            goto out;
>> > > +        }
>> > 
>> > If I understand correctly, bdrv_find_backing_image has problems for
>> > backing nodes that don't have a file name.  Given our shift towards
>> > node names, I think we really want to target node names rather than
>> > file names when specifying which node we will use as the top bound
>> > receiving the stream operations.
>> 
>> Sure I can change that, but note that the 'base' parameter also
>> receives a file name and uses bdrv_find_backing_image, so I guess it
>> makes sense to change it in both sides.
>
> Yes, using the file name for identifying nodes was a mistake. We're
> going to replace all occurrences of it sooner or later. Not sure if
> someone is actively working on this currently - Markus?

Not currently, sorry.  I agree it needs doing.

> For your patch series, I think it's good enough to use node names for
> the new parameter. Converting old parameters is a separate issue.

Makes sense.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] block: Support streaming to an intermediate layer
  2015-02-20 13:53 ` [Qemu-devel] [PATCH 1/3] block: " Alberto Garcia
@ 2015-03-05 14:04   ` Kevin Wolf
  2015-03-05 14:58     ` Alberto Garcia
  2015-03-12 13:18     ` Alberto Garcia
  0 siblings, 2 replies; 26+ messages in thread
From: Kevin Wolf @ 2015-03-05 14:04 UTC (permalink / raw)
  To: Alberto Garcia; +Cc: qemu-devel, Stefan Hajnoczi

Am 20.02.2015 um 14:53 hat Alberto Garcia geschrieben:
> This adds a new 'top' parameter to stream_start(), that specifies the
> block device where the data will be written. The image is changed to
> read-write mode during the streaming and back to read-only afterwards.
> 
> This also unblocks the stream operation in backing files.
> 
> Signed-off-by: Alberto Garcia <berto@igalia.com>

The bs parameter is now only used for the following things:

1. As the default for top

2. For error handling: Any errors are reported for bs, even though they
   are actually for top. Is this correct behaviour? It looks
   questionable to me.

3. As the BDS that owns the job

My question is whether we can't simply call stream_start() with an
intermediate node as bs instead of introducing a new parameter. I'm not
completely sure about the consequences of 3., i.e. moving ownership of a
block job to some BDS somewhere down the chain, but otherwise it should
be possible and seems cleaner.

Kevin

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] block: Add QMP support for streaming to an intermediate layer
  2015-02-20 13:53 ` [Qemu-devel] [PATCH 2/3] block: Add QMP support for " Alberto Garcia
  2015-02-20 22:38   ` Eric Blake
@ 2015-03-05 14:09   ` Kevin Wolf
  2015-03-05 15:12     ` Alberto Garcia
  2015-03-11 16:38     ` Alberto Garcia
  1 sibling, 2 replies; 26+ messages in thread
From: Kevin Wolf @ 2015-03-05 14:09 UTC (permalink / raw)
  To: Alberto Garcia; +Cc: qemu-devel, Stefan Hajnoczi

Am 20.02.2015 um 14:53 hat Alberto Garcia geschrieben:
> This adds the 'top' parameter to the 'block-stream' QMP command and
> checks that its value is valid before passing it to stream_start().
> 
> Signed-off-by: Alberto Garcia <berto@igalia.com>

> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -1013,6 +1013,9 @@
>  # with query-block-jobs.  The operation can be stopped before it has completed
>  # using the block-job-cancel command.
>  #
> +# Data is copied to the top image, which defaults to the active layer if no other
> +# file is selected.
> +#
>  # If a base file is specified then sectors are not copied from that base file and
>  # its backing chain.  When streaming completes the image file will have the base
>  # file as its backing file.  This can be used to stream a subset of the backing
> @@ -1025,8 +1028,14 @@
>  #
>  # @base:   #optional the common backing file name
>  #
> -# @backing-file: #optional The backing file string to write into the active
> -#                          layer. This filename is not validated.
> +# @top:    #optional Top image, only sectors below this image are streamed
> +#                    into it.
> +#
> +#                    If not specified, the top image is the active layer.
> +#                    (Since 2.3)
> +#
> +# @backing-file: #optional The backing file string to write into the top
> +#                          image. This filename is not validated.
>  #
>  #                          If a pathname string is such that it cannot be
>  #                          resolved by QEMU, that means that subsequent QMP or
> @@ -1052,8 +1061,9 @@
>  # Since: 1.1
>  ##
>  { 'command': 'block-stream',
> -  'data': { 'device': 'str', '*base': 'str', '*backing-file': 'str',
> -            '*speed': 'int', '*on-error': 'BlockdevOnError' } }
> +  'data': { 'device': 'str', '*base': 'str', '*top': 'str',
> +            '*backing-file': 'str', '*speed': 'int',
> +            '*on-error': 'BlockdevOnError' } }

While in patch 1 it would only be nice to avoid the additional argument,
I think we absolutely have to avoid it here in the external interface.

There is no point in specifying some root node as 'device' that isn't
actually involved in the operation; worse, it isn't even possible in the
general case because 'top' could have multiple users/parents.

A better interface would probably be to allow node names for 'device'
and leave everything else as it is.

Kevin

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] block: Support streaming to an intermediate layer
  2015-03-05 14:04   ` Kevin Wolf
@ 2015-03-05 14:58     ` Alberto Garcia
  2015-03-05 15:15       ` Kevin Wolf
  2015-03-12 13:18     ` Alberto Garcia
  1 sibling, 1 reply; 26+ messages in thread
From: Alberto Garcia @ 2015-03-05 14:58 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: qemu-devel, Stefan Hajnoczi

On Thu, Mar 05, 2015 at 03:04:25PM +0100, Kevin Wolf wrote:

> The bs parameter is now only used for the following things:
> 
> 1. As the default for top

Right.

> 2. For error handling: Any errors are reported for bs, even though
>    they are actually for top. Is this correct behaviour? It looks
>    questionable to me.

Hmm... I guess you mean when calling block_job_error_action(), I
probably overlooked that.

> 3. As the BDS that owns the job
> 
> My question is whether we can't simply call stream_start() with an
> intermediate node as bs instead of introducing a new parameter. I'm
> not completely sure about the consequences of 3., i.e. moving
> ownership of a block job to some BDS somewhere down the chain, but
> otherwise it should be possible and seems cleaner.

We can, that was actually the first thing I tried and it does work,
but then I noticed that other parts of the code would need changes,
e.g. qmp_query_block_jobs() must be modified to iterate over all nodes
of each device.

Since I was also not sure about the consequences of such a change I
opted for the conservative approach.

Berto

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] block: Add QMP support for streaming to an intermediate layer
  2015-03-05 14:09   ` Kevin Wolf
@ 2015-03-05 15:12     ` Alberto Garcia
  2015-03-11 16:38     ` Alberto Garcia
  1 sibling, 0 replies; 26+ messages in thread
From: Alberto Garcia @ 2015-03-05 15:12 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: qemu-devel, Stefan Hajnoczi

On Thu, Mar 05, 2015 at 03:09:58PM +0100, Kevin Wolf wrote:

> >  { 'command': 'block-stream',
> > -  'data': { 'device': 'str', '*base': 'str', '*backing-file': 'str',
> > -            '*speed': 'int', '*on-error': 'BlockdevOnError' } }
> > +  'data': { 'device': 'str', '*base': 'str', '*top': 'str',
> > +            '*backing-file': 'str', '*speed': 'int',
> > +            '*on-error': 'BlockdevOnError' } }

> A better interface would probably be to allow node names for
> 'device' and leave everything else as it is.

That's possible, but if the API is the same how does libvirt know if
streaming to an intermediate node is possible or not?

> There is no point in specifying some root node as 'device' that
> isn't actually involved in the operation; worse, it isn't even
> possible in the general case because 'top' could have multiple
> users/parents.

The latter is actually a good point, if 'top' is used by 2+ parents
then it makes sense that the ownership of the block job in in 'top',
not in the root node.

I think I will need to investigate the consequences of that.

Berto

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] block: Support streaming to an intermediate layer
  2015-03-05 14:58     ` Alberto Garcia
@ 2015-03-05 15:15       ` Kevin Wolf
  2015-03-05 15:47         ` Alberto Garcia
  0 siblings, 1 reply; 26+ messages in thread
From: Kevin Wolf @ 2015-03-05 15:15 UTC (permalink / raw)
  To: Alberto Garcia; +Cc: qemu-devel, Stefan Hajnoczi

Am 05.03.2015 um 15:58 hat Alberto Garcia geschrieben:
> On Thu, Mar 05, 2015 at 03:04:25PM +0100, Kevin Wolf wrote:
> 
> > The bs parameter is now only used for the following things:
> > 
> > 1. As the default for top
> 
> Right.
> 
> > 2. For error handling: Any errors are reported for bs, even though
> >    they are actually for top. Is this correct behaviour? It looks
> >    questionable to me.
> 
> Hmm... I guess you mean when calling block_job_error_action(), I
> probably overlooked that.

Yes, that and the check whether iostatus is enabled.

> > 3. As the BDS that owns the job
> > 
> > My question is whether we can't simply call stream_start() with an
> > intermediate node as bs instead of introducing a new parameter. I'm
> > not completely sure about the consequences of 3., i.e. moving
> > ownership of a block job to some BDS somewhere down the chain, but
> > otherwise it should be possible and seems cleaner.
> 
> We can, that was actually the first thing I tried and it does work,
> but then I noticed that other parts of the code would need changes,
> e.g. qmp_query_block_jobs() must be modified to iterate over all nodes
> of each device.
> 
> Since I was also not sure about the consequences of such a change I
> opted for the conservative approach.

I see. I'm worried about the external API. If we let the root node own
the job, we may paint ourselves into a corner with respect to nodes with
multiple users and therefore multiple possible root nodes.

Kevin

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] block: Support streaming to an intermediate layer
  2015-03-05 15:15       ` Kevin Wolf
@ 2015-03-05 15:47         ` Alberto Garcia
  0 siblings, 0 replies; 26+ messages in thread
From: Alberto Garcia @ 2015-03-05 15:47 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: qemu-devel, Stefan Hajnoczi

On Thu, Mar 05, 2015 at 04:15:52PM +0100, Kevin Wolf wrote:

> > > 3. As the BDS that owns the job
> > > 
> > > My question is whether we can't simply call stream_start()
> > > with an intermediate node as bs instead of introducing a new
> > > parameter. I'm not completely sure about the consequences of
> > > 3., i.e. moving ownership of a block job to some BDS somewhere
> > > down the chain, but otherwise it should be possible and seems
> > > cleaner.
> > 
> > Since I was also not sure about the consequences of such a change
> > I opted for the conservative approach.
> 
> I see. I'm worried about the external API. If we let the root node
> own the job, we may paint ourselves into a corner with respect to
> nodes with multiple users and therefore multiple possible root
> nodes.

I'll try to move the job to the intermediate node and see where I can
get then.

Berto

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] block: Add QMP support for streaming to an intermediate layer
  2015-03-05 14:09   ` Kevin Wolf
  2015-03-05 15:12     ` Alberto Garcia
@ 2015-03-11 16:38     ` Alberto Garcia
  2015-03-12 15:45       ` Kevin Wolf
  1 sibling, 1 reply; 26+ messages in thread
From: Alberto Garcia @ 2015-03-11 16:38 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: Jeff Cody, qemu-devel, Stefan Hajnoczi

On Thu, Mar 05, 2015 at 03:09:58PM +0100, Kevin Wolf wrote:

> >  { 'command': 'block-stream',
> > -  'data': { 'device': 'str', '*base': 'str', '*backing-file': 'str',
> > -            '*speed': 'int', '*on-error': 'BlockdevOnError' } }
> > +  'data': { 'device': 'str', '*base': 'str', '*top': 'str',
> > +            '*backing-file': 'str', '*speed': 'int',
> > +            '*on-error': 'BlockdevOnError' } }
> 
> There is no point in specifying some root node as 'device' that
> isn't actually involved in the operation; worse, it isn't even
> possible in the general case because 'top' could have multiple
> users/parents.
> 
> A better interface would probably be to allow node names for
> 'device' and leave everything else as it is.

Ok, I changed the code and it does make the implementation simpler.

One issue that I'm finding is that when we move the block-stream
job to an intermediate node, where the device name is empty, we get
messages like "Device '' is busy".

I can use node names instead, but they are also not guaranteed to
exist. I heard there was a plan to auto-generate names, and searching
the archives I found this patch by Jeff Cody:

http://lists.gnu.org/archive/html/qemu-devel/2014-06/msg04057.html

But it seems that it was never merged?

If we are going to have a scenario where a parameter can mean either a
device or a node name, we need a clear way to identify that node.

Berto

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] block: Support streaming to an intermediate layer
  2015-03-05 14:04   ` Kevin Wolf
  2015-03-05 14:58     ` Alberto Garcia
@ 2015-03-12 13:18     ` Alberto Garcia
  1 sibling, 0 replies; 26+ messages in thread
From: Alberto Garcia @ 2015-03-12 13:18 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: Jeff Cody, qemu-devel, Stefan Hajnoczi, Markus Armbruster

(Ccing Markus and Jeff as suggested)

On Thu, Mar 05, 2015 at 03:04:25PM +0100, Kevin Wolf wrote:

> My question is whether we can't simply call stream_start() with an
> intermediate node as bs instead of introducing a new parameter. I'm
> not completely sure about the consequences of 3., i.e. moving
> ownership of a block job to some BDS somewhere down the chain, but
> otherwise it should be possible and seems cleaner.

I would like to get some feedback about how to properly block jobs
during a block streaming operation to an intermediate node.

So let's suppose we have a graph like this:

[A] <- [B] <- [C] <- [D] <- [E] <- [F]

[F] is the active layer, and to its left is the chain of backing
files.

So if we stream from [B] (base) to [D] (top) this is the result:

[A] <- [B] <- [D] <- [E] <- [F]

The idea is that the block job would be owned by the node that is
receiving the data ([D] in this example) so other operations would
still be allowed in other parts of the chain. I would also update
query-block-jobs so it would return jobs owned by inner nodes, not
just the ones at the root (there's still the issue of how to refer to
those nodes, yesterday I wrote a separate e-mail about that).

During this process we would block all operations involving any node
between base and top ([C] in this example):

 - Streaming from [D] to [F] would be allowed.
 - Streaming from [A], [B] or [C] would not be allowed.
 - Streaming with no base would not be allowed either.

Are those assumptions correct? What would we do if part of the chain
is shared, like in this case?

[A] <- [B] <- [C] <- [D] <- [E] <- [F]
               ^
                \
                 [G] <- [H] <- [I] <- [J]

Berto

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] block: Add QMP support for streaming to an intermediate layer
  2015-03-11 16:38     ` Alberto Garcia
@ 2015-03-12 15:45       ` Kevin Wolf
  2015-03-17 15:00         ` Alberto Garcia
  2015-03-18 12:29         ` Alberto Garcia
  0 siblings, 2 replies; 26+ messages in thread
From: Kevin Wolf @ 2015-03-12 15:45 UTC (permalink / raw)
  To: Alberto Garcia; +Cc: Jeff Cody, qemu-devel, Stefan Hajnoczi

Am 11.03.2015 um 17:38 hat Alberto Garcia geschrieben:
> On Thu, Mar 05, 2015 at 03:09:58PM +0100, Kevin Wolf wrote:
> 
> > >  { 'command': 'block-stream',
> > > -  'data': { 'device': 'str', '*base': 'str', '*backing-file': 'str',
> > > -            '*speed': 'int', '*on-error': 'BlockdevOnError' } }
> > > +  'data': { 'device': 'str', '*base': 'str', '*top': 'str',
> > > +            '*backing-file': 'str', '*speed': 'int',
> > > +            '*on-error': 'BlockdevOnError' } }
> > 
> > There is no point in specifying some root node as 'device' that
> > isn't actually involved in the operation; worse, it isn't even
> > possible in the general case because 'top' could have multiple
> > users/parents.
> > 
> > A better interface would probably be to allow node names for
> > 'device' and leave everything else as it is.
> 
> Ok, I changed the code and it does make the implementation simpler.
> 
> One issue that I'm finding is that when we move the block-stream
> job to an intermediate node, where the device name is empty, we get
> messages like "Device '' is busy".
> 
> I can use node names instead, but they are also not guaranteed to
> exist. I heard there was a plan to auto-generate names, and searching
> the archives I found this patch by Jeff Cody:
> 
> http://lists.gnu.org/archive/html/qemu-devel/2014-06/msg04057.html
> 
> But it seems that it was never merged?
> 
> If we are going to have a scenario where a parameter can mean either a
> device or a node name, we need a clear way to identify that node.

Yes, autogenerated node names were not merged yet. And if they were,
they wouldn't make for very good error messages either.

My first thought was "then make it 'Source/Target device is busy'
without mentioning any name". In the context of any given command, it
would still be clear which BDS is meant. In fact, I have argued before
that mentioning the device name in an error to a command that refers to
a specific device is redundant and should be avoided.

The problem here is that it's not stream_start() that generates the
error, but block_job_create(), which doesn't know which role it's bs
argument has for the block job. So it can't decide whether to say
"source device", "target device" or something completely different.

On the other hand, having an owner BDS for a block job is considered a
mistake meanwhile because there is no clear rule which BDS to pick when
the job involves more than one. In fact, without tying a job to a BDS,
it could be just a background job instead of specifically a block job.
I'm not saying that this conversion should be done now, but just to give
you some background about the direction we're generally taking.

So in the light of this, it might be reasonable to move the bs->job
check with the error check to the callers.

Another, less invasive, option would be to replace the error_set() call
in block_job_create() by error_copy(bs->job->blocker). We're not really
op blockers code here, so might be somewhat ugly, but I think eventually
the check is going to be fully replaced by op blockers anyway, so using
the same message now could make sense.

Jeff, as you are working on op blockers, do you have an opinion on this?

Kevin

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] block: Add QMP support for streaming to an intermediate layer
  2015-03-12 15:45       ` Kevin Wolf
@ 2015-03-17 15:00         ` Alberto Garcia
  2015-03-17 15:22           ` Eric Blake
  2015-03-17 15:28           ` Kevin Wolf
  2015-03-18 12:29         ` Alberto Garcia
  1 sibling, 2 replies; 26+ messages in thread
From: Alberto Garcia @ 2015-03-17 15:00 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: Jeff Cody, qemu-devel, Stefan Hajnoczi

On Thu, Mar 12, 2015 at 04:45:17PM +0100, Kevin Wolf wrote:

> > One issue that I'm finding is that when we move the block-stream
> > job to an intermediate node, where the device name is empty, we
> > get messages like "Device '' is busy".
> > 
> > I can use node names instead, but they are also not guaranteed to
> > exist.

> My first thought was "then make it 'Source/Target device is busy'
> without mentioning any name". In the context of any given command,
> it would still be clear which BDS is meant.

There's a related problem that I discussed on IRC with Kevin and Eric
but that I think needs further deliberation.

The BlockJobInfo object returned by query-block-jobs identifies the
owner of the job using the 'device' field. If jobs can be in any
intermediate node then we cannot simply rely on the device name. We
also cannot simply replace it with a node name because 1) it might not
exist and 2) existing libvirt versions expect a device name.

So I see several alternatives:

   a) Add a new 'node-name' field to BlockJobInfo. It's simple,
      'device' keeps the current semantics so we don't break
      compatibility.

   b) Make 'device' return the device name as it currently does, or
      the node name if it's not present. The main problem is that
      libvirt cannot easily know what to expect. On the other hand
      since both device and node-name share the same namespace the
      returned value is not ambiguous.

   c) Make 'device' return the same name that was used when the job
      was created. It's maybe simpler for libvirt than option b),
      but it would require us to remember how the job was created,
      possibly in the BlockJob structure. This is personally my least
      favorite option.

   d) Create a new query command that returns a different data
      structure.

I would opt for a) or b), but I'd like to hear if you have a different
opinion.

Regarding the 'block-stream' command, I think the current option to
reuse the 'device' parameter to refer to either a device or a node
name is ok, so I'll go forward with that one.

> On the other hand, having an owner BDS for a block job is considered
> a mistake meanwhile because there is no clear rule which BDS to pick
> when the job involves more than one.

Does it really matter as long as all the operations blockers are
correctly set?

> In fact, without tying a job to a BDS, it could be just a background
> job instead of specifically a block job.

I don't understand what you mean by this.

Berto

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] block: Add QMP support for streaming to an intermediate layer
  2015-03-17 15:00         ` Alberto Garcia
@ 2015-03-17 15:22           ` Eric Blake
  2015-03-17 15:40             ` Alberto Garcia
  2015-03-17 15:28           ` Kevin Wolf
  1 sibling, 1 reply; 26+ messages in thread
From: Eric Blake @ 2015-03-17 15:22 UTC (permalink / raw)
  To: Alberto Garcia, Kevin Wolf; +Cc: Jeff Cody, qemu-devel, Stefan Hajnoczi

[-- Attachment #1: Type: text/plain, Size: 3445 bytes --]

On 03/17/2015 09:00 AM, Alberto Garcia wrote:

> The BlockJobInfo object returned by query-block-jobs identifies the
> owner of the job using the 'device' field. If jobs can be in any
> intermediate node then we cannot simply rely on the device name. We
> also cannot simply replace it with a node name because 1) it might not
> exist and 2) existing libvirt versions expect a device name.
> 
> So I see several alternatives:
> 
>    a) Add a new 'node-name' field to BlockJobInfo. It's simple,
>       'device' keeps the current semantics so we don't break
>       compatibility.
> 
>    b) Make 'device' return the device name as it currently does, or
>       the node name if it's not present. The main problem is that
>       libvirt cannot easily know what to expect. On the other hand
>       since both device and node-name share the same namespace the
>       returned value is not ambiguous.

If libvirt is new enough to create the block job via node name instead
of device name, then it is also new enough to expect a node name instead
of device name in the returned job information.  That is, I'm okay with
either:

old libvirt: creates job using 'device' as a device name, status about
the job is reported with 'device' as the device name

new libvirt: creates job using 'device' as a node name, status about the
job is reported with 'device' as the node name

(no new parameter, 'device' is used on both creation and query as a
[poorly-named] device-or-node, back-compat is obvious)


or with:

old libvirt: creates job using 'device' as a device name, status about
the job is reported with 'device' as the device name

new libvirt: creates job using 'node' as a node name, status about the
job is reported with 'node' as the node name

(new parameter; old usage remains the same, and new usage has proper
naming, but now we have to track which name is in use)


> 
>    c) Make 'device' return the same name that was used when the job
>       was created. It's maybe simpler for libvirt than option b),
>       but it would require us to remember how the job was created,
>       possibly in the BlockJob structure. This is personally my least
>       favorite option.

If you're going to reuse 'device' on the creation, then reuse it on the
reporting.

> 
>    d) Create a new query command that returns a different data
>       structure.
> 
> I would opt for a) or b), but I'd like to hear if you have a different
> opinion.

I'm kind of leaning towards b).

> 
> Regarding the 'block-stream' command, I think the current option to
> reuse the 'device' parameter to refer to either a device or a node
> name is ok, so I'll go forward with that one.

Particularly if we don't have two parameters for starting the job, then
we don't need two parameters for reporting it.

> 
>> On the other hand, having an owner BDS for a block job is considered
>> a mistake meanwhile because there is no clear rule which BDS to pick
>> when the job involves more than one.
> 
> Does it really matter as long as all the operations blockers are
> correctly set?
> 
>> In fact, without tying a job to a BDS, it could be just a background
>> job instead of specifically a block job.
> 
> I don't understand what you mean by this.
> 
> Berto
> 
> 

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] block: Add QMP support for streaming to an intermediate layer
  2015-03-17 15:00         ` Alberto Garcia
  2015-03-17 15:22           ` Eric Blake
@ 2015-03-17 15:28           ` Kevin Wolf
  1 sibling, 0 replies; 26+ messages in thread
From: Kevin Wolf @ 2015-03-17 15:28 UTC (permalink / raw)
  To: Alberto Garcia; +Cc: Jeff Cody, qemu-devel, Stefan Hajnoczi

Am 17.03.2015 um 16:00 hat Alberto Garcia geschrieben:
> On Thu, Mar 12, 2015 at 04:45:17PM +0100, Kevin Wolf wrote:
> 
> > > One issue that I'm finding is that when we move the block-stream
> > > job to an intermediate node, where the device name is empty, we
> > > get messages like "Device '' is busy".
> > > 
> > > I can use node names instead, but they are also not guaranteed to
> > > exist.
> 
> > My first thought was "then make it 'Source/Target device is busy'
> > without mentioning any name". In the context of any given command,
> > it would still be clear which BDS is meant.
> 
> There's a related problem that I discussed on IRC with Kevin and Eric
> but that I think needs further deliberation.
> 
> The BlockJobInfo object returned by query-block-jobs identifies the
> owner of the job using the 'device' field. If jobs can be in any
> intermediate node then we cannot simply rely on the device name. We
> also cannot simply replace it with a node name because 1) it might not
> exist and 2) existing libvirt versions expect a device name.
> 
> So I see several alternatives:
> 
>    a) Add a new 'node-name' field to BlockJobInfo. It's simple,
>       'device' keeps the current semantics so we don't break
>       compatibility.
> 
>    b) Make 'device' return the device name as it currently does, or
>       the node name if it's not present. The main problem is that
>       libvirt cannot easily know what to expect. On the other hand
>       since both device and node-name share the same namespace the
>       returned value is not ambiguous.
> 
>    c) Make 'device' return the same name that was used when the job
>       was created. It's maybe simpler for libvirt than option b),
>       but it would require us to remember how the job was created,
>       possibly in the BlockJob structure. This is personally my least
>       favorite option.
> 
>    d) Create a new query command that returns a different data
>       structure.
> 
> I would opt for a) or b), but I'd like to hear if you have a different
> opinion.

e) Considering that we want to generalise block jobs into background
   jobs, make 'device' optional and fill it only if the owner BDS has a
   device name. If not, the field is omitted.

If e) is possible for libvirt (Eric?), I would vote for that. Otherwise,
I think b) would be nicest.

> Regarding the 'block-stream' command, I think the current option to
> reuse the 'device' parameter to refer to either a device or a node
> name is ok, so I'll go forward with that one.

Great!

> > On the other hand, having an owner BDS for a block job is considered
> > a mistake meanwhile because there is no clear rule which BDS to pick
> > when the job involves more than one.
> 
> Does it really matter as long as all the operations blockers are
> correctly set?

No, it doesn't actively break anything, it's just a little arbitrary.

> > In fact, without tying a job to a BDS, it could be just a background
> > job instead of specifically a block job.
> 
> I don't understand what you mean by this.

We could have other long-running operations in the background that could
make use of the same infrastructure if it weren't tied to block devices
(for no reason, the actual block job infrastructure doesn't need
anything block-like). One example that came up in the past is live
migration.

This is not directly related to your series, but some background to keep
in mind while evolving the external APIs.

Kevin

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] block: Add QMP support for streaming to an intermediate layer
  2015-03-17 15:22           ` Eric Blake
@ 2015-03-17 15:40             ` Alberto Garcia
  0 siblings, 0 replies; 26+ messages in thread
From: Alberto Garcia @ 2015-03-17 15:40 UTC (permalink / raw)
  To: Eric Blake; +Cc: Kevin Wolf, Jeff Cody, qemu-devel, Stefan Hajnoczi

On Tue, Mar 17, 2015 at 09:22:55AM -0600, Eric Blake wrote:

> > The BlockJobInfo object returned by query-block-jobs identifies
> > the owner of the job using the 'device' field. If jobs can be in
> > any intermediate node then we cannot simply rely on the device
> > name. We also cannot simply replace it with a node name because
> > 1) it might not exist and 2) existing libvirt versions expect a
> > device name.
> > 
> > So I see several alternatives:
> > 
> >    a) Add a new 'node-name' field to BlockJobInfo. It's simple,
> >       'device' keeps the current semantics so we don't break
> >       compatibility.
> > 
> >    b) Make 'device' return the device name as it currently does,
> >       or the node name if it's not present. The main problem is
> >       that libvirt cannot easily know what to expect. On the
> >       other hand since both device and node-name share the same
> >       namespace the returned value is not ambiguous.
> 
> If libvirt is new enough to create the block job via node name
> instead of device name, then it is also new enough to expect a node
> name instead of device name in the returned job information.

That is clear.

> >    c) Make 'device' return the same name that was used when
> >       the job was created. It's maybe simpler for libvirt than
> >       option b), but it would require us to remember how the job
> >       was created, possibly in the BlockJob structure. This is
> >       personally my least favorite option.
> 
> If you're going to reuse 'device' on the creation, then reuse it on
> the reporting.

The problem with c) is that the name is only needed early in the
operation to get a BlockDriverState, we don't use it afterwards.

So returning the same name that was used to request the operation
would force us to keep that information internally, because in the
case of a job owned by a BlockDriverState with both device name
'virtio0' and node name 'node0' it's otherwise impossible to know if
the job was requested using 'virtio0' or 'node0'.

> >    d) Create a new query command that returns a different data
> >       structure.
> > 
> > I would opt for a) or b), but I'd like to hear if you have a
> > different opinion.
> 
> I'm kind of leaning towards b).

But note that in the example I just mentioned, if you create a job
using 'node0' to refer to the node, you would still get 'virtio0' in
return, and not 'node0'.

With b) you only get 'node0' if the node does not have a device name.

Berto

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] block: Add QMP support for streaming to an intermediate layer
  2015-03-12 15:45       ` Kevin Wolf
  2015-03-17 15:00         ` Alberto Garcia
@ 2015-03-18 12:29         ` Alberto Garcia
  1 sibling, 0 replies; 26+ messages in thread
From: Alberto Garcia @ 2015-03-18 12:29 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: Jeff Cody, qemu-devel, Stefan Hajnoczi

On Thu, Mar 12, 2015 at 04:45:17PM +0100, Kevin Wolf wrote:

> > One issue that I'm finding is that when we move the block-stream
> > job to an intermediate node, where the device name is empty, we
> > get messages like "Device '' is busy".

> My first thought was "then make it 'Source/Target device is busy'
> without mentioning any name". In the context of any given command,
> it would still be clear which BDS is meant. In fact, I have argued
> before that mentioning the device name in an error to a command that
> refers to a specific device is redundant and should be avoided.
> 
> The problem here is that it's not stream_start() that generates the
> error, but block_job_create(), which doesn't know which role it's bs
> argument has for the block job. So it can't decide whether to say
> "source device", "target device" or something completely different.

The problem is actually not there. The error message generated by
block_job_create() is "block device is in use by block job: stream".

It's bdrv_op_is_blocked() that adds the extra "Device '' is busy".

            error_setg(errp, "Device '%s' is busy: %s",
                       bdrv_get_device_name(bs),
                       error_get_pretty(blocker->reason));

I can use the same approach as in the BlockJobInfo case and fall back
to the node name if the device name is empty, but the problem is that
bdrv_get_device_name() is used all over the place, so this probably
needs a more general solution.

Even at the moment the backing blocker set by bdrv_set_backing_hd()
has problems:

        error_setg(&bs->backing_blocker,
                   "device is used as backing hd of '%s'",
                   bdrv_get_device_name(bs));

This only works if 'bs' is a root node, but if you try to perform an
operation on the backing image of another backing image, you get a
"device is used as backing hd of ''".

Error messages aside, I would probably need to check all uses of
bdrv_get_device_name() because there could be more surprises if the
node is not at the root.

Berto

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2015-03-18 12:29 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-20 13:53 [Qemu-devel] [PATCH 0/3] Support streaming to an intermediate layer Alberto Garcia
2015-02-20 13:53 ` [Qemu-devel] [PATCH 1/3] block: " Alberto Garcia
2015-03-05 14:04   ` Kevin Wolf
2015-03-05 14:58     ` Alberto Garcia
2015-03-05 15:15       ` Kevin Wolf
2015-03-05 15:47         ` Alberto Garcia
2015-03-12 13:18     ` Alberto Garcia
2015-02-20 13:53 ` [Qemu-devel] [PATCH 2/3] block: Add QMP support for " Alberto Garcia
2015-02-20 22:38   ` Eric Blake
2015-02-23 12:23     ` Alberto Garcia
2015-02-23 13:04       ` Kevin Wolf
2015-02-24 14:08         ` Markus Armbruster
2015-03-05 14:09   ` Kevin Wolf
2015-03-05 15:12     ` Alberto Garcia
2015-03-11 16:38     ` Alberto Garcia
2015-03-12 15:45       ` Kevin Wolf
2015-03-17 15:00         ` Alberto Garcia
2015-03-17 15:22           ` Eric Blake
2015-03-17 15:40             ` Alberto Garcia
2015-03-17 15:28           ` Kevin Wolf
2015-03-18 12:29         ` Alberto Garcia
2015-02-20 13:53 ` [Qemu-devel] [PATCH 3/3] docs: Document how to stream " Alberto Garcia
2015-02-20 17:34 ` [Qemu-devel] [PATCH 0/3] Support streaming " Eric Blake
2015-02-20 19:05   ` Alberto Garcia
2015-02-20 22:49     ` Eric Blake
2015-02-22 15:08       ` Alberto Garcia

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.