All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] block: Close a BlockDriverState completely even when bs->drv is NULL
@ 2017-11-06 14:53 Alberto Garcia
  2017-11-07 22:29 ` Eric Blake
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Alberto Garcia @ 2017-11-06 14:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alberto Garcia, qemu-block, Max Reitz, Kevin Wolf

bdrv_close() skips much of its logic when bs->drv is NULL. This is
fine when we're closing a BlockDriverState that has just been created
(because e.g the initialization process failed), but it's not enough
in other cases.

For example, when a valid qcow2 image is found to be corrupted then
QEMU marks it as such in the file header and then sets bs->drv to
NULL in order to make the BlockDriverState unusable. When that BDS is
later closed then many of its data structures are not freed (leaking
their memory) and none of its children are detached. This results in
bdrv_close_all() failing to close all BDSs and making this assertion
fail when QEMU is being shut down:

   bdrv_close_all: Assertion `QTAILQ_EMPTY(&all_bdrv_states)' failed.

This patch makes bdrv_close() do the full uninitialization process
in all cases. This fixes the problem with corrupted images and still
works fine with freshly created BDSs.

Signed-off-by: Alberto Garcia <berto@igalia.com>
---
 block.c                    | 57 +++++++++++++++++++++++-----------------------
 tests/qemu-iotests/060     | 13 +++++++++++
 tests/qemu-iotests/060.out | 12 ++++++++++
 3 files changed, 53 insertions(+), 29 deletions(-)

diff --git a/block.c b/block.c
index 684cb018da..389a6edad2 100644
--- a/block.c
+++ b/block.c
@@ -3179,6 +3179,7 @@ void bdrv_reopen_abort(BDRVReopenState *reopen_state)
 static void bdrv_close(BlockDriverState *bs)
 {
     BdrvAioNotifier *ban, *ban_next;
+    BdrvChild *child, *next;
 
     assert(!bs->job);
     assert(!bs->refcnt);
@@ -3188,43 +3189,41 @@ static void bdrv_close(BlockDriverState *bs)
     bdrv_drain(bs); /* in case flush left pending I/O */
 
     if (bs->drv) {
-        BdrvChild *child, *next;
-
         bs->drv->bdrv_close(bs);
         bs->drv = NULL;
+    }
 
-        bdrv_set_backing_hd(bs, NULL, &error_abort);
+    bdrv_set_backing_hd(bs, NULL, &error_abort);
 
-        if (bs->file != NULL) {
-            bdrv_unref_child(bs, bs->file);
-            bs->file = NULL;
-        }
+    if (bs->file != NULL) {
+        bdrv_unref_child(bs, bs->file);
+        bs->file = NULL;
+    }
 
-        QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
-            /* TODO Remove bdrv_unref() from drivers' close function and use
-             * bdrv_unref_child() here */
-            if (child->bs->inherits_from == bs) {
-                child->bs->inherits_from = NULL;
-            }
-            bdrv_detach_child(child);
+    QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
+        /* TODO Remove bdrv_unref() from drivers' close function and use
+         * bdrv_unref_child() here */
+        if (child->bs->inherits_from == bs) {
+            child->bs->inherits_from = NULL;
         }
-
-        g_free(bs->opaque);
-        bs->opaque = NULL;
-        atomic_set(&bs->copy_on_read, 0);
-        bs->backing_file[0] = '\0';
-        bs->backing_format[0] = '\0';
-        bs->total_sectors = 0;
-        bs->encrypted = false;
-        bs->sg = false;
-        QDECREF(bs->options);
-        QDECREF(bs->explicit_options);
-        bs->options = NULL;
-        bs->explicit_options = NULL;
-        QDECREF(bs->full_open_options);
-        bs->full_open_options = NULL;
+        bdrv_detach_child(child);
     }
 
+    g_free(bs->opaque);
+    bs->opaque = NULL;
+    atomic_set(&bs->copy_on_read, 0);
+    bs->backing_file[0] = '\0';
+    bs->backing_format[0] = '\0';
+    bs->total_sectors = 0;
+    bs->encrypted = false;
+    bs->sg = false;
+    QDECREF(bs->options);
+    QDECREF(bs->explicit_options);
+    bs->options = NULL;
+    bs->explicit_options = NULL;
+    QDECREF(bs->full_open_options);
+    bs->full_open_options = NULL;
+
     bdrv_release_named_dirty_bitmaps(bs);
     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
 
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index 66a8fa4aea..1cebe4c775 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -291,6 +291,19 @@ _make_test_img 64M
 poke_file "$TEST_IMG" "48"                "\x00\x00\x00\x00\x00\x00\x00\x00"
 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 
+echo
+echo "=== Testing the QEMU shutdown with a corrupted image ==="
+echo
+_make_test_img 64M
+poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
+echo "{'execute': 'qmp_capabilities'}
+      {'execute': 'human-monitor-command',
+       'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}}
+      {'execute': 'quit'}" \
+    | $QEMU -qmp stdio -nographic -nodefaults \
+            -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \
+    | _filter_qmp | _filter_qemu_io
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index cfd78f87a9..a5093fab8e 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -220,4 +220,16 @@ can't open device TEST_DIR/t.IMGFMT: Image does not contain a reference count ta
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 qcow2: Marking image as corrupt: Preventing invalid allocation of L2 table at offset 0; further corruption events will be suppressed
 write failed: Input/output error
+
+=== Testing the QEMU shutdown with a corrupted image ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with refcount table); further corruption events will be suppressed
+QMP_VERSION
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_IMAGE_CORRUPTED", "data": {"device": "none0", "msg": "Preventing invalid write on metadata (overlaps with refcount table)", "offset": 65536, "node-name": "drive", "fatal": true, "size": 65536}}
+write failed: Input/output error
+{"return": ""}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}
 *** done
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Close a BlockDriverState completely even when bs->drv is NULL
  2017-11-06 14:53 [Qemu-devel] [PATCH] block: Close a BlockDriverState completely even when bs->drv is NULL Alberto Garcia
@ 2017-11-07 22:29 ` Eric Blake
  2017-11-08 14:33 ` Kevin Wolf
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Eric Blake @ 2017-11-07 22:29 UTC (permalink / raw)
  To: Alberto Garcia, qemu-devel; +Cc: Kevin Wolf, qemu-block, Max Reitz

[-- Attachment #1: Type: text/plain, Size: 1495 bytes --]

On 11/06/2017 08:53 AM, Alberto Garcia wrote:
> bdrv_close() skips much of its logic when bs->drv is NULL. This is
> fine when we're closing a BlockDriverState that has just been created
> (because e.g the initialization process failed), but it's not enough
> in other cases.
> 
> For example, when a valid qcow2 image is found to be corrupted then
> QEMU marks it as such in the file header and then sets bs->drv to
> NULL in order to make the BlockDriverState unusable. When that BDS is
> later closed then many of its data structures are not freed (leaking
> their memory) and none of its children are detached. This results in
> bdrv_close_all() failing to close all BDSs and making this assertion
> fail when QEMU is being shut down:
> 
>    bdrv_close_all: Assertion `QTAILQ_EMPTY(&all_bdrv_states)' failed.
> 
> This patch makes bdrv_close() do the full uninitialization process
> in all cases. This fixes the problem with corrupted images and still
> works fine with freshly created BDSs.
> 
> Signed-off-by: Alberto Garcia <berto@igalia.com>
> ---
>  block.c                    | 57 +++++++++++++++++++++++-----------------------
>  tests/qemu-iotests/060     | 13 +++++++++++
>  tests/qemu-iotests/060.out | 12 ++++++++++
>  3 files changed, 53 insertions(+), 29 deletions(-)
> 

Reviewed-by: Eric Blake <eblake@redhat.com>

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3266
Virtualization:  qemu.org | libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 619 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Close a BlockDriverState completely even when bs->drv is NULL
  2017-11-06 14:53 [Qemu-devel] [PATCH] block: Close a BlockDriverState completely even when bs->drv is NULL Alberto Garcia
  2017-11-07 22:29 ` Eric Blake
@ 2017-11-08 14:33 ` Kevin Wolf
  2017-11-13 14:03   ` Alberto Garcia
  2017-11-17 16:14 ` Max Reitz
  2017-11-20 20:50 ` Max Reitz
  3 siblings, 1 reply; 8+ messages in thread
From: Kevin Wolf @ 2017-11-08 14:33 UTC (permalink / raw)
  To: Alberto Garcia; +Cc: qemu-devel, qemu-block, Max Reitz

Am 06.11.2017 um 15:53 hat Alberto Garcia geschrieben:
> bdrv_close() skips much of its logic when bs->drv is NULL. This is
> fine when we're closing a BlockDriverState that has just been created
> (because e.g the initialization process failed), but it's not enough
> in other cases.
> 
> For example, when a valid qcow2 image is found to be corrupted then
> QEMU marks it as such in the file header and then sets bs->drv to
> NULL in order to make the BlockDriverState unusable. When that BDS is
> later closed then many of its data structures are not freed (leaking
> their memory) and none of its children are detached. This results in
> bdrv_close_all() failing to close all BDSs and making this assertion
> fail when QEMU is being shut down:
> 
>    bdrv_close_all: Assertion `QTAILQ_EMPTY(&all_bdrv_states)' failed.
> 
> This patch makes bdrv_close() do the full uninitialization process
> in all cases. This fixes the problem with corrupted images and still
> works fine with freshly created BDSs.
> 
> Signed-off-by: Alberto Garcia <berto@igalia.com>

This doesn't apply cleanly in the test case. Does it depend on your
qcow2 fixes?

Also, while I think the change makes sense, it's also clear that we're
trying to fix up an inconsistent state here. Maybe we could also improve
the state that block drivers leave behind when marking an image as
corrupt. Just setting bs->drv = NULL means that at least any internal
data structures will not get cleaned up.

On the other hand, we can't just call bdrv_close() from a failing
request because closing requires that we drain the request first. Maybe
it would be possible to call drv->bdrv_close() with a BH or something.

Kevin

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Close a BlockDriverState completely even when bs->drv is NULL
  2017-11-08 14:33 ` Kevin Wolf
@ 2017-11-13 14:03   ` Alberto Garcia
  0 siblings, 0 replies; 8+ messages in thread
From: Alberto Garcia @ 2017-11-13 14:03 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: qemu-devel, qemu-block, Max Reitz

On Wed 08 Nov 2017 03:33:54 PM CET, Kevin Wolf wrote:
>> This patch makes bdrv_close() do the full uninitialization process in
>> all cases. This fixes the problem with corrupted images and still
>> works fine with freshly created BDSs.
>> 
>> Signed-off-by: Alberto Garcia <berto@igalia.com>
>
> This doesn't apply cleanly in the test case. Does it depend on your
> qcow2 fixes?

Ah, yes. It doesn't have any semantic dependency though, so as long as
you add the new tests to qemu-iotests/060 it will work.

I can resend it properly rebased.

> Also, while I think the change makes sense, it's also clear that we're
> trying to fix up an inconsistent state here. Maybe we could also
> improve the state that block drivers leave behind when marking an
> image as corrupt. Just setting bs->drv = NULL means that at least any
> internal data structures will not get cleaned up.
>
> On the other hand, we can't just call bdrv_close() from a failing
> request because closing requires that we drain the request
> first. Maybe it would be possible to call drv->bdrv_close() with a BH
> or something.

I'm not sure if I'm following you here, where would you add the
bottom-half and what kind of problem it would solve?

Berto

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Close a BlockDriverState completely even when bs->drv is NULL
  2017-11-06 14:53 [Qemu-devel] [PATCH] block: Close a BlockDriverState completely even when bs->drv is NULL Alberto Garcia
  2017-11-07 22:29 ` Eric Blake
  2017-11-08 14:33 ` Kevin Wolf
@ 2017-11-17 16:14 ` Max Reitz
  2017-11-17 16:19   ` Alberto Garcia
  2017-11-20 20:50 ` Max Reitz
  3 siblings, 1 reply; 8+ messages in thread
From: Max Reitz @ 2017-11-17 16:14 UTC (permalink / raw)
  To: Alberto Garcia, qemu-devel; +Cc: qemu-block, Kevin Wolf

[-- Attachment #1: Type: text/plain, Size: 1373 bytes --]

On 2017-11-06 15:53, Alberto Garcia wrote:
> bdrv_close() skips much of its logic when bs->drv is NULL. This is
> fine when we're closing a BlockDriverState that has just been created
> (because e.g the initialization process failed), but it's not enough
> in other cases.
> 
> For example, when a valid qcow2 image is found to be corrupted then
> QEMU marks it as such in the file header and then sets bs->drv to
> NULL in order to make the BlockDriverState unusable. When that BDS is
> later closed then many of its data structures are not freed (leaking
> their memory) and none of its children are detached. This results in
> bdrv_close_all() failing to close all BDSs and making this assertion
> fail when QEMU is being shut down:
> 
>    bdrv_close_all: Assertion `QTAILQ_EMPTY(&all_bdrv_states)' failed.
> 
> This patch makes bdrv_close() do the full uninitialization process
> in all cases. This fixes the problem with corrupted images and still
> works fine with freshly created BDSs.
> 
> Signed-off-by: Alberto Garcia <berto@igalia.com>
> ---
>  block.c                    | 57 +++++++++++++++++++++++-----------------------
>  tests/qemu-iotests/060     | 13 +++++++++++
>  tests/qemu-iotests/060.out | 12 ++++++++++
>  3 files changed, 53 insertions(+), 29 deletions(-)

Sooo...  What's the exact status of this patch? :-)

Max


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 512 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Close a BlockDriverState completely even when bs->drv is NULL
  2017-11-17 16:14 ` Max Reitz
@ 2017-11-17 16:19   ` Alberto Garcia
  2017-11-17 17:03     ` Kevin Wolf
  0 siblings, 1 reply; 8+ messages in thread
From: Alberto Garcia @ 2017-11-17 16:19 UTC (permalink / raw)
  To: Max Reitz, qemu-devel; +Cc: qemu-block, Kevin Wolf

On Fri 17 Nov 2017 05:14:08 PM CET, Max Reitz wrote:
> On 2017-11-06 15:53, Alberto Garcia wrote:
>> bdrv_close() skips much of its logic when bs->drv is NULL. This is
>> fine when we're closing a BlockDriverState that has just been created
>> (because e.g the initialization process failed), but it's not enough
>> in other cases.
>> 
>> For example, when a valid qcow2 image is found to be corrupted then
>> QEMU marks it as such in the file header and then sets bs->drv to
>> NULL in order to make the BlockDriverState unusable. When that BDS is
>> later closed then many of its data structures are not freed (leaking
>> their memory) and none of its children are detached. This results in
>> bdrv_close_all() failing to close all BDSs and making this assertion
>> fail when QEMU is being shut down:
>> 
>>    bdrv_close_all: Assertion `QTAILQ_EMPTY(&all_bdrv_states)' failed.
>> 
>> This patch makes bdrv_close() do the full uninitialization process
>> in all cases. This fixes the problem with corrupted images and still
>> works fine with freshly created BDSs.
>> 
>> Signed-off-by: Alberto Garcia <berto@igalia.com>
>> ---
>>  block.c                    | 57 +++++++++++++++++++++++-----------------------
>>  tests/qemu-iotests/060     | 13 +++++++++++
>>  tests/qemu-iotests/060.out | 12 ++++++++++
>>  3 files changed, 53 insertions(+), 29 deletions(-)
>
> Sooo...  What's the exact status of this patch? :-)

I can resend it rebased on top of your block branch, but I'm fine if you
merge the iotest manually (it's a trivial merge).

I'm not sure about Kevin's comments though, it wasn't clear to me if
he's fine if we apply this patch or not.

Berto

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Close a BlockDriverState completely even when bs->drv is NULL
  2017-11-17 16:19   ` Alberto Garcia
@ 2017-11-17 17:03     ` Kevin Wolf
  0 siblings, 0 replies; 8+ messages in thread
From: Kevin Wolf @ 2017-11-17 17:03 UTC (permalink / raw)
  To: Alberto Garcia; +Cc: Max Reitz, qemu-devel, qemu-block

Am 17.11.2017 um 17:19 hat Alberto Garcia geschrieben:
> On Fri 17 Nov 2017 05:14:08 PM CET, Max Reitz wrote:
> > On 2017-11-06 15:53, Alberto Garcia wrote:
> >> bdrv_close() skips much of its logic when bs->drv is NULL. This is
> >> fine when we're closing a BlockDriverState that has just been created
> >> (because e.g the initialization process failed), but it's not enough
> >> in other cases.
> >> 
> >> For example, when a valid qcow2 image is found to be corrupted then
> >> QEMU marks it as such in the file header and then sets bs->drv to
> >> NULL in order to make the BlockDriverState unusable. When that BDS is
> >> later closed then many of its data structures are not freed (leaking
> >> their memory) and none of its children are detached. This results in
> >> bdrv_close_all() failing to close all BDSs and making this assertion
> >> fail when QEMU is being shut down:
> >> 
> >>    bdrv_close_all: Assertion `QTAILQ_EMPTY(&all_bdrv_states)' failed.
> >> 
> >> This patch makes bdrv_close() do the full uninitialization process
> >> in all cases. This fixes the problem with corrupted images and still
> >> works fine with freshly created BDSs.
> >> 
> >> Signed-off-by: Alberto Garcia <berto@igalia.com>
> >> ---
> >>  block.c                    | 57 +++++++++++++++++++++++-----------------------
> >>  tests/qemu-iotests/060     | 13 +++++++++++
> >>  tests/qemu-iotests/060.out | 12 ++++++++++
> >>  3 files changed, 53 insertions(+), 29 deletions(-)
> >
> > Sooo...  What's the exact status of this patch? :-)
> 
> I can resend it rebased on top of your block branch, but I'm fine if you
> merge the iotest manually (it's a trivial merge).
> 
> I'm not sure about Kevin's comments though, it wasn't clear to me if
> he's fine if we apply this patch or not.

I'm not sure if it's enough, but I think the patch is good anyway.

Kevin

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH] block: Close a BlockDriverState completely even when bs->drv is NULL
  2017-11-06 14:53 [Qemu-devel] [PATCH] block: Close a BlockDriverState completely even when bs->drv is NULL Alberto Garcia
                   ` (2 preceding siblings ...)
  2017-11-17 16:14 ` Max Reitz
@ 2017-11-20 20:50 ` Max Reitz
  3 siblings, 0 replies; 8+ messages in thread
From: Max Reitz @ 2017-11-20 20:50 UTC (permalink / raw)
  To: Alberto Garcia, qemu-devel; +Cc: qemu-block, Kevin Wolf

[-- Attachment #1: Type: text/plain, Size: 1406 bytes --]

On 2017-11-06 15:53, Alberto Garcia wrote:
> bdrv_close() skips much of its logic when bs->drv is NULL. This is
> fine when we're closing a BlockDriverState that has just been created
> (because e.g the initialization process failed), but it's not enough
> in other cases.
> 
> For example, when a valid qcow2 image is found to be corrupted then
> QEMU marks it as such in the file header and then sets bs->drv to
> NULL in order to make the BlockDriverState unusable. When that BDS is
> later closed then many of its data structures are not freed (leaking
> their memory) and none of its children are detached. This results in
> bdrv_close_all() failing to close all BDSs and making this assertion
> fail when QEMU is being shut down:
> 
>    bdrv_close_all: Assertion `QTAILQ_EMPTY(&all_bdrv_states)' failed.
> 
> This patch makes bdrv_close() do the full uninitialization process
> in all cases. This fixes the problem with corrupted images and still
> works fine with freshly created BDSs.
> 
> Signed-off-by: Alberto Garcia <berto@igalia.com>
> ---
>  block.c                    | 57 +++++++++++++++++++++++-----------------------
>  tests/qemu-iotests/060     | 13 +++++++++++
>  tests/qemu-iotests/060.out | 12 ++++++++++
>  3 files changed, 53 insertions(+), 29 deletions(-)

Thanks, applied to my block branch:

https://github.com/XanClic/qemu/commits/block

Max


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 512 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-11-20 20:51 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-06 14:53 [Qemu-devel] [PATCH] block: Close a BlockDriverState completely even when bs->drv is NULL Alberto Garcia
2017-11-07 22:29 ` Eric Blake
2017-11-08 14:33 ` Kevin Wolf
2017-11-13 14:03   ` Alberto Garcia
2017-11-17 16:14 ` Max Reitz
2017-11-17 16:19   ` Alberto Garcia
2017-11-17 17:03     ` Kevin Wolf
2017-11-20 20:50 ` Max Reitz

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.