All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] block/file-posix: Optimize for macOS
@ 2021-03-05 12:17 Akihiko Odaki
  2021-03-05 12:21 ` no-reply
  2021-03-08 15:17 ` Stefan Hajnoczi
  0 siblings, 2 replies; 10+ messages in thread
From: Akihiko Odaki @ 2021-03-05 12:17 UTC (permalink / raw)
  Cc: Kevin Wolf, Fam Zheng, pkrempa, Akihiko Odaki, qemu-block,
	Markus Armbruster, qemu-devel, Max Reitz, Konstantin Nazarov,
	Stefan Hajnoczi, John Snow, dgilbert

This commit introduces "punch hole" operation and optimizes transfer
block size for macOS.

This commit introduces two additional members,
discard_granularity and opt_io to BlockSizes type in
include/block/block.h. Also, the members of the type are now
optional. Set -1 to discard_granularity and 0 to other members
for the default values.

Thanks to Konstantin Nazarov for detailed analysis of a flaw in an
old version of this change:
https://gist.github.com/akihikodaki/87df4149e7ca87f18dc56807ec5a1bc5#gistcomment-3654667

Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com>
---
 block/file-posix.c    | 40 ++++++++++++++++++++++++++++++++++++++--
 block/nvme.c          |  2 ++
 block/raw-format.c    |  4 +++-
 hw/block/block.c      | 12 ++++++++++--
 include/block/block.h |  2 ++
 5 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 05079b40cae..21bdaf969c5 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -44,6 +44,7 @@
 #if defined(__APPLE__) && (__MACH__)
 #include <paths.h>
 #include <sys/param.h>
+#include <sys/mount.h>
 #include <IOKit/IOKitLib.h>
 #include <IOKit/IOBSD.h>
 #include <IOKit/storage/IOMediaBSDClient.h>
@@ -1292,6 +1293,8 @@ static int hdev_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
     if (check_for_dasd(s->fd) < 0) {
         return -ENOTSUP;
     }
+    bsz->opt_io = 0;
+    bsz->discard_granularity = -1;
     ret = probe_logical_blocksize(s->fd, &bsz->log);
     if (ret < 0) {
         return ret;
@@ -1586,6 +1589,7 @@ out:
     }
 }
 
+G_GNUC_UNUSED
 static int translate_err(int err)
 {
     if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
@@ -1795,16 +1799,27 @@ static int handle_aiocb_discard(void *opaque)
             }
         } while (errno == EINTR);
 
-        ret = -errno;
+        ret = translate_err(-errno);
 #endif
     } else {
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
         ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                            aiocb->aio_offset, aiocb->aio_nbytes);
+        ret = translate_err(-errno);
+#elif defined(__APPLE__) && (__MACH__)
+        fpunchhole_t fpunchhole;
+        fpunchhole.fp_flags = 0;
+        fpunchhole.reserved = 0;
+        fpunchhole.fp_offset = aiocb->aio_offset;
+        fpunchhole.fp_length = aiocb->aio_nbytes;
+        if (fcntl(s->fd, F_PUNCHHOLE, &fpunchhole) == -1) {
+            ret = errno == ENODEV ? -ENOTSUP : -errno;
+        } else {
+            ret = 0;
+        }
 #endif
     }
 
-    ret = translate_err(ret);
     if (ret == -ENOTSUP) {
         s->has_discard = false;
     }
@@ -2113,6 +2128,26 @@ static int raw_co_flush_to_disk(BlockDriverState *bs)
     return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb);
 }
 
+static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
+{
+#if defined(__APPLE__) && (__MACH__)
+    BDRVRawState *s = bs->opaque;
+    struct statfs buf;
+
+    if (!fstatfs(s->fd, &buf)) {
+        bsz->phys = 0;
+        bsz->log = 0;
+        bsz->opt_io = buf.f_iosize;
+        bsz->discard_granularity = buf.f_bsize;
+        return 0;
+    }
+
+    return -errno;
+#else
+    return -ENOTSUP;
+#endif
+}
+
 static void raw_aio_attach_aio_context(BlockDriverState *bs,
                                        AioContext *new_context)
 {
@@ -3247,6 +3282,7 @@ BlockDriver bdrv_file = {
     .bdrv_refresh_limits = raw_refresh_limits,
     .bdrv_io_plug = raw_aio_plug,
     .bdrv_io_unplug = raw_aio_unplug,
+    .bdrv_probe_blocksizes = raw_probe_blocksizes,
     .bdrv_attach_aio_context = raw_aio_attach_aio_context,
 
     .bdrv_co_truncate = raw_co_truncate,
diff --git a/block/nvme.c b/block/nvme.c
index 2b5421e7aa6..1845d07577b 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -989,6 +989,8 @@ static int nvme_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
     uint32_t blocksize = nvme_get_blocksize(bs);
     bsz->phys = blocksize;
     bsz->log = blocksize;
+    bsz->opt_io = 0;
+    bsz->discard_granularity = -1;
     return 0;
 }
 
diff --git a/block/raw-format.c b/block/raw-format.c
index 7717578ed6a..847df11f2ae 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -507,6 +507,7 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
 static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 {
     BDRVRawState *s = bs->opaque;
+    uint32_t size;
     int ret;
 
     ret = bdrv_probe_blocksizes(bs->file->bs, bsz);
@@ -514,7 +515,8 @@ static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
         return ret;
     }
 
-    if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
+    size = MAX(bsz->log, bsz->phys);
+    if (size && !QEMU_IS_ALIGNED(s->offset, size)) {
         return -ENOTSUP;
     }
 
diff --git a/hw/block/block.c b/hw/block/block.c
index 1e34573da71..c907e5a7722 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -70,19 +70,27 @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp)
     backend_ret = blk_probe_blocksizes(blk, &blocksizes);
     /* fill in detected values if they are not defined via qemu command line */
     if (!conf->physical_block_size) {
-        if (!backend_ret) {
+        if (!backend_ret && blocksizes.phys) {
            conf->physical_block_size = blocksizes.phys;
         } else {
             conf->physical_block_size = BDRV_SECTOR_SIZE;
         }
     }
     if (!conf->logical_block_size) {
-        if (!backend_ret) {
+        if (!backend_ret && blocksizes.log) {
             conf->logical_block_size = blocksizes.log;
         } else {
             conf->logical_block_size = BDRV_SECTOR_SIZE;
         }
     }
+    if (!backend_ret) {
+        if (!conf->opt_io_size) {
+            conf->opt_io_size = blocksizes.opt_io;
+        }
+        if (conf->discard_granularity == -1) {
+            conf->discard_granularity = blocksizes.discard_granularity;
+        }
+    }
 
     if (conf->logical_block_size > conf->physical_block_size) {
         error_setg(errp,
diff --git a/include/block/block.h b/include/block/block.h
index b3f6e509d49..d12471a6cc4 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -93,6 +93,8 @@ typedef enum {
 typedef struct BlockSizes {
     uint32_t phys;
     uint32_t log;
+    uint32_t discard_granularity;
+    uint32_t opt_io;
 } BlockSizes;
 
 typedef struct HDGeometry {
-- 
2.24.3 (Apple Git-128)



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] block/file-posix: Optimize for macOS
  2021-03-05 12:17 [PATCH v2] block/file-posix: Optimize for macOS Akihiko Odaki
@ 2021-03-05 12:21 ` no-reply
  2021-03-08 15:17 ` Stefan Hajnoczi
  1 sibling, 0 replies; 10+ messages in thread
From: no-reply @ 2021-03-05 12:21 UTC (permalink / raw)
  To: akihiko.odaki
  Cc: kwolf, fam, pkrempa, stefanha, qemu-block, armbru, qemu-devel,
	mail, akihiko.odaki, mreitz, jsnow, dgilbert

Patchew URL: https://patchew.org/QEMU/20210305121748.65173-1-akihiko.odaki@gmail.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20210305121748.65173-1-akihiko.odaki@gmail.com
Subject: [PATCH v2] block/file-posix: Optimize for macOS

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag]         patchew/20210305121748.65173-1-akihiko.odaki@gmail.com -> patchew/20210305121748.65173-1-akihiko.odaki@gmail.com
Switched to a new branch 'test'
16b639a block/file-posix: Optimize for macOS

=== OUTPUT BEGIN ===
WARNING: architecture specific defines should be avoided
#95: FILE: block/file-posix.c:2133:
+#if defined(__APPLE__) && (__MACH__)

ERROR: suspect code indent for conditional statements (8, 11)
#168: FILE: hw/block/block.c:73:
+        if (!backend_ret && blocksizes.phys) {
            conf->physical_block_size = blocksizes.phys;

total: 1 errors, 1 warnings, 145 lines checked

Commit 16b639a81c45 (block/file-posix: Optimize for macOS) has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20210305121748.65173-1-akihiko.odaki@gmail.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-devel@redhat.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] block/file-posix: Optimize for macOS
  2021-03-05 12:17 [PATCH v2] block/file-posix: Optimize for macOS Akihiko Odaki
  2021-03-05 12:21 ` no-reply
@ 2021-03-08 15:17 ` Stefan Hajnoczi
  2021-03-08 15:37   ` Akihiko Odaki
  1 sibling, 1 reply; 10+ messages in thread
From: Stefan Hajnoczi @ 2021-03-08 15:17 UTC (permalink / raw)
  To: Akihiko Odaki
  Cc: Kevin Wolf, Fam Zheng, pkrempa, qemu-block, Markus Armbruster,
	qemu-devel, Max Reitz, Konstantin Nazarov, John Snow, dgilbert

[-- Attachment #1: Type: text/plain, Size: 1685 bytes --]

On Fri, Mar 05, 2021 at 09:17:48PM +0900, Akihiko Odaki wrote:
> This commit introduces "punch hole" operation and optimizes transfer
> block size for macOS.
> 
> This commit introduces two additional members,
> discard_granularity and opt_io to BlockSizes type in
> include/block/block.h. Also, the members of the type are now
> optional. Set -1 to discard_granularity and 0 to other members
> for the default values.
> 
> Thanks to Konstantin Nazarov for detailed analysis of a flaw in an
> old version of this change:
> https://gist.github.com/akihikodaki/87df4149e7ca87f18dc56807ec5a1bc5#gistcomment-3654667
> 
> Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com>
> ---
>  block/file-posix.c    | 40 ++++++++++++++++++++++++++++++++++++++--
>  block/nvme.c          |  2 ++
>  block/raw-format.c    |  4 +++-
>  hw/block/block.c      | 12 ++++++++++--
>  include/block/block.h |  2 ++
>  5 files changed, 55 insertions(+), 5 deletions(-)

The live migration compatibility issue is still present. Migrating to
another host might not work if the block limits are different.

Here is an idea for solving it:

Modify include/hw/block/block.h:DEFINE_BLOCK_PROPERTIES_BASE() to
support a new value called "host". The default behavior remains
unchanged for live migration compatibility but now you can use "host" if
you know it's okay but don't care about migration compatibility.

The downside to this approach is that users must explicitly say
something like --drive ...,opt_io_size=host. But it's still better than
the situation we have today where user must manually enter values for
their disk.

Does this sound okay to everyone?

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] block/file-posix: Optimize for macOS
  2021-03-08 15:17 ` Stefan Hajnoczi
@ 2021-03-08 15:37   ` Akihiko Odaki
  2021-03-09  4:52     ` Akihiko Odaki
  2021-03-10 11:38     ` Stefan Hajnoczi
  0 siblings, 2 replies; 10+ messages in thread
From: Akihiko Odaki @ 2021-03-08 15:37 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Kevin Wolf, Fam Zheng, pkrempa, qemu-block, Markus Armbruster,
	qemu Developers, Max Reitz, Konstantin Nazarov, John Snow,
	dgilbert

2021年3月9日(火) 0:17 Stefan Hajnoczi <stefanha@redhat.com>:
>
> The live migration compatibility issue is still present. Migrating to
> another host might not work if the block limits are different.
>
> Here is an idea for solving it:
>
> Modify include/hw/block/block.h:DEFINE_BLOCK_PROPERTIES_BASE() to
> support a new value called "host". The default behavior remains
> unchanged for live migration compatibility but now you can use "host" if
> you know it's okay but don't care about migration compatibility.
>
> The downside to this approach is that users must explicitly say
> something like --drive ...,opt_io_size=host. But it's still better than
> the situation we have today where user must manually enter values for
> their disk.
>
> Does this sound okay to everyone?
>
> Stefan

I wonder how that change affects other block drivers implementing
bdrv_probe_blocksizes. As far as I know, the values they report are
already used by default, which is contrary to the default not being
"host".

Regards,
Akihiko Odaki


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] block/file-posix: Optimize for macOS
  2021-03-08 15:37   ` Akihiko Odaki
@ 2021-03-09  4:52     ` Akihiko Odaki
  2021-03-09  9:37       ` Kevin Wolf
  2021-03-10 11:38     ` Stefan Hajnoczi
  1 sibling, 1 reply; 10+ messages in thread
From: Akihiko Odaki @ 2021-03-09  4:52 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Kevin Wolf, Fam Zheng, pkrempa, qemu-block, Markus Armbruster,
	qemu Developers, Max Reitz, Konstantin Nazarov, John Snow,
	dgilbert

2021年3月9日(火) 0:37 Akihiko Odaki <akihiko.odaki@gmail.com>:
>
> 2021年3月9日(火) 0:17 Stefan Hajnoczi <stefanha@redhat.com>:
> >
> > The live migration compatibility issue is still present. Migrating to
> > another host might not work if the block limits are different.
> >
> > Here is an idea for solving it:
> >
> > Modify include/hw/block/block.h:DEFINE_BLOCK_PROPERTIES_BASE() to
> > support a new value called "host". The default behavior remains
> > unchanged for live migration compatibility but now you can use "host" if
> > you know it's okay but don't care about migration compatibility.
> >
> > The downside to this approach is that users must explicitly say
> > something like --drive ...,opt_io_size=host. But it's still better than
> > the situation we have today where user must manually enter values for
> > their disk.
> >
> > Does this sound okay to everyone?
> >
> > Stefan
>
> I wonder how that change affects other block drivers implementing
> bdrv_probe_blocksizes. As far as I know, the values they report are
> already used by default, which is contrary to the default not being
> "host".
>
> Regards,
> Akihiko Odaki

Let me suggest a variant of Stefan's approach:

Modify include/hw/block/block.h:DEFINE_BLOCK_PROPERTIES_BASE() to
support a new value called "host". The default values for block size
properties may be "host" or not, but they should be consistent. If
they are "host" by default, add global properties which sets
discard_granularity and opt_io_size to the old default to
hw_compat_5_2 in hw/core/machine.c. Otherwise, add global properties
which sets logical_block_size and physical_block_size to "host".

Does it sound good? I'd also like to know others opinions for the
default value ("host" or something else). I prefer "host" as the
default a little because those who need live migration should be
careful enough to set proper configurations for each device. We may
also assist users who need live migration by adding a property which
defaults all block size properties to something else "host".

Regards,
Akihiko Odaki


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] block/file-posix: Optimize for macOS
  2021-03-09  4:52     ` Akihiko Odaki
@ 2021-03-09  9:37       ` Kevin Wolf
  2021-03-09 10:26         ` Daniel P. Berrangé
  0 siblings, 1 reply; 10+ messages in thread
From: Kevin Wolf @ 2021-03-09  9:37 UTC (permalink / raw)
  To: Akihiko Odaki
  Cc: Fam Zheng, pkrempa, qemu-block, Markus Armbruster,
	qemu Developers, Max Reitz, Konstantin Nazarov, Stefan Hajnoczi,
	John Snow, dgilbert

Am 09.03.2021 um 05:52 hat Akihiko Odaki geschrieben:
> 2021年3月9日(火) 0:37 Akihiko Odaki <akihiko.odaki@gmail.com>:
> >
> > 2021年3月9日(火) 0:17 Stefan Hajnoczi <stefanha@redhat.com>:
> > >
> > > The live migration compatibility issue is still present. Migrating to
> > > another host might not work if the block limits are different.
> > >
> > > Here is an idea for solving it:
> > >
> > > Modify include/hw/block/block.h:DEFINE_BLOCK_PROPERTIES_BASE() to
> > > support a new value called "host". The default behavior remains
> > > unchanged for live migration compatibility but now you can use "host" if
> > > you know it's okay but don't care about migration compatibility.
> > >
> > > The downside to this approach is that users must explicitly say
> > > something like --drive ...,opt_io_size=host. But it's still better than
> > > the situation we have today where user must manually enter values for
> > > their disk.
> > >
> > > Does this sound okay to everyone?
> > >
> > > Stefan
> >
> > I wonder how that change affects other block drivers implementing
> > bdrv_probe_blocksizes. As far as I know, the values they report are
> > already used by default, which is contrary to the default not being
> > "host".
> >
> > Regards,
> > Akihiko Odaki
> 
> Let me suggest a variant of Stefan's approach:
> 
> Modify include/hw/block/block.h:DEFINE_BLOCK_PROPERTIES_BASE() to
> support a new value called "host". The default values for block size
> properties may be "host" or not, but they should be consistent. If
> they are "host" by default

I'm not sure if it's a good idea, but maybe we could make it so that the
default is "host" only as long as you didn't specify -nodefaults? Then
libvirt would automatically keep the old behaviour (because it always
sets -nodefaults) and manual invocations would usually get the new one.

Of course, when I start with "I'm not sure if it's a good idea", it's
usually not, but I wanted to share the thought anyway...

> add global properties which sets
> discard_granularity and opt_io_size to the old default to
> hw_compat_5_2 in hw/core/machine.c. Otherwise, add global properties
> which sets logical_block_size and physical_block_size to "host".

Would we have to do this for explicitly for every single block device in
the tree? That sounds like a lot of cases and therefore rather error
prone.

> Does it sound good? I'd also like to know others opinions for the
> default value ("host" or something else). I prefer "host" as the
> default a little because those who need live migration should be
> careful enough to set proper configurations for each device. We may
> also assist users who need live migration by adding a property which
> defaults all block size properties to something else "host".

Adding new requirements is always a bit problematic. Live migration
works fine today without specifying these properties, so users will
expect it to keep working. If live migration were a new feature and we
required the options from the start, it would be different.

Kevin



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] block/file-posix: Optimize for macOS
  2021-03-09  9:37       ` Kevin Wolf
@ 2021-03-09 10:26         ` Daniel P. Berrangé
  2021-03-09 10:47           ` Kevin Wolf
  2021-03-09 10:52           ` Akihiko Odaki
  0 siblings, 2 replies; 10+ messages in thread
From: Daniel P. Berrangé @ 2021-03-09 10:26 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: Fam Zheng, pkrempa, Stefan Hajnoczi, qemu-block, qemu Developers,
	Markus Armbruster, Konstantin Nazarov, Akihiko Odaki, Max Reitz,
	John Snow, dgilbert

On Tue, Mar 09, 2021 at 10:37:18AM +0100, Kevin Wolf wrote:
> Am 09.03.2021 um 05:52 hat Akihiko Odaki geschrieben:
> > 2021年3月9日(火) 0:37 Akihiko Odaki <akihiko.odaki@gmail.com>:
> > >
> > > 2021年3月9日(火) 0:17 Stefan Hajnoczi <stefanha@redhat.com>:
> > > >
> > > > The live migration compatibility issue is still present. Migrating to
> > > > another host might not work if the block limits are different.
> > > >
> > > > Here is an idea for solving it:
> > > >
> > > > Modify include/hw/block/block.h:DEFINE_BLOCK_PROPERTIES_BASE() to
> > > > support a new value called "host". The default behavior remains
> > > > unchanged for live migration compatibility but now you can use "host" if
> > > > you know it's okay but don't care about migration compatibility.
> > > >
> > > > The downside to this approach is that users must explicitly say
> > > > something like --drive ...,opt_io_size=host. But it's still better than
> > > > the situation we have today where user must manually enter values for
> > > > their disk.
> > > >
> > > > Does this sound okay to everyone?
> > > >
> > > > Stefan
> > >
> > > I wonder how that change affects other block drivers implementing
> > > bdrv_probe_blocksizes. As far as I know, the values they report are
> > > already used by default, which is contrary to the default not being
> > > "host".
> > >
> > > Regards,
> > > Akihiko Odaki
> > 
> > Let me suggest a variant of Stefan's approach:
> > 
> > Modify include/hw/block/block.h:DEFINE_BLOCK_PROPERTIES_BASE() to
> > support a new value called "host". The default values for block size
> > properties may be "host" or not, but they should be consistent. If
> > they are "host" by default
> 
> I'm not sure if it's a good idea, but maybe we could make it so that the
> default is "host" only as long as you didn't specify -nodefaults? Then
> libvirt would automatically keep the old behaviour (because it always
> sets -nodefaults) and manual invocations would usually get the new one.
> 
> Of course, when I start with "I'm not sure if it's a good idea", it's
> usually not, but I wanted to share the thought anyway...

Can you elaborate on what the actual live migration problem is, and
its impact ?  This patch is touching the block backends, so I'm
wondering how backend data ends up having an impact on the migration
stream which is all frontend device data ?  I'm especially concerned
by the mention that some block backends already have this problem,
and wondering if it already impacts libvirt ?

Using -nodefaults is good practice, but I'm still uncomfortable saying
that its use is a requirement if you want migration to work, as that
feels like a change in semantics for non-libvirt users (who can be
mgmt apps, nor merely human interactive users).

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] block/file-posix: Optimize for macOS
  2021-03-09 10:26         ` Daniel P. Berrangé
@ 2021-03-09 10:47           ` Kevin Wolf
  2021-03-09 10:52           ` Akihiko Odaki
  1 sibling, 0 replies; 10+ messages in thread
From: Kevin Wolf @ 2021-03-09 10:47 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Fam Zheng, pkrempa, Stefan Hajnoczi, qemu-block, qemu Developers,
	Markus Armbruster, Konstantin Nazarov, Akihiko Odaki, Max Reitz,
	John Snow, dgilbert

Am 09.03.2021 um 11:26 hat Daniel P. Berrangé geschrieben:
> On Tue, Mar 09, 2021 at 10:37:18AM +0100, Kevin Wolf wrote:
> > Am 09.03.2021 um 05:52 hat Akihiko Odaki geschrieben:
> > > 2021年3月9日(火) 0:37 Akihiko Odaki <akihiko.odaki@gmail.com>:
> > > >
> > > > 2021年3月9日(火) 0:17 Stefan Hajnoczi <stefanha@redhat.com>:
> > > > >
> > > > > The live migration compatibility issue is still present. Migrating to
> > > > > another host might not work if the block limits are different.
> > > > >
> > > > > Here is an idea for solving it:
> > > > >
> > > > > Modify include/hw/block/block.h:DEFINE_BLOCK_PROPERTIES_BASE() to
> > > > > support a new value called "host". The default behavior remains
> > > > > unchanged for live migration compatibility but now you can use "host" if
> > > > > you know it's okay but don't care about migration compatibility.
> > > > >
> > > > > The downside to this approach is that users must explicitly say
> > > > > something like --drive ...,opt_io_size=host. But it's still better than
> > > > > the situation we have today where user must manually enter values for
> > > > > their disk.
> > > > >
> > > > > Does this sound okay to everyone?
> > > > >
> > > > > Stefan
> > > >
> > > > I wonder how that change affects other block drivers implementing
> > > > bdrv_probe_blocksizes. As far as I know, the values they report are
> > > > already used by default, which is contrary to the default not being
> > > > "host".
> > > >
> > > > Regards,
> > > > Akihiko Odaki
> > > 
> > > Let me suggest a variant of Stefan's approach:
> > > 
> > > Modify include/hw/block/block.h:DEFINE_BLOCK_PROPERTIES_BASE() to
> > > support a new value called "host". The default values for block size
> > > properties may be "host" or not, but they should be consistent. If
> > > they are "host" by default
> > 
> > I'm not sure if it's a good idea, but maybe we could make it so that the
> > default is "host" only as long as you didn't specify -nodefaults? Then
> > libvirt would automatically keep the old behaviour (because it always
> > sets -nodefaults) and manual invocations would usually get the new one.
> > 
> > Of course, when I start with "I'm not sure if it's a good idea", it's
> > usually not, but I wanted to share the thought anyway...
> 
> Can you elaborate on what the actual live migration problem is, and
> its impact ?  This patch is touching the block backends, so I'm
> wondering how backend data ends up having an impact on the migration
> stream which is all frontend device data ?  I'm especially concerned
> by the mention that some block backends already have this problem,
> and wondering if it already impacts libvirt ?

The part that modifies the backend is the boring part (I haven't even
looked at the code for that one yet).

The interesting part is the change to hw/block/block.c, which modifies
the defaults for some qdev properties of block devices. The reason why
it does so is that it wants to let them default to autodetecting
whatever is optimal on the host.

The potential conflict between autodetecting qdev property defaults in
the backend and live migration should be obvious.

> Using -nodefaults is good practice, but I'm still uncomfortable saying
> that its use is a requirement if you want migration to work, as that
> feels like a change in semantics for non-libvirt users (who can be
> mgmt apps, nor merely human interactive users).

We can make live migration work in a way, by including these properties
in the VM state and then overriding whatever was set on the command line
with the values from the VM state. (This patch doesn't do this yet, nor
does it disable live migration, but it just lets the device magically
change its properties during migration, which is incorrect.)

Of course, this would still mean that the old value is only preserved on
the destination host as long as the QEMU instance runs. On the next
boot, the guest visible disk will change.

So if you want stable guest devices, you can't really have any of this
autodetection. If you set the properties explicitly instead of relying
on the defaults (which is what you should be doing ideally), you don't
have the problem, but I'm not sure if we can expect that users are
actually doing that. Considering -nodefaults would be just another
attempt to cover most users who care about a stable guest view, but
don't specify the value for each qdev property.

Kevin



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] block/file-posix: Optimize for macOS
  2021-03-09 10:26         ` Daniel P. Berrangé
  2021-03-09 10:47           ` Kevin Wolf
@ 2021-03-09 10:52           ` Akihiko Odaki
  1 sibling, 0 replies; 10+ messages in thread
From: Akihiko Odaki @ 2021-03-09 10:52 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Kevin Wolf, Fam Zheng, pkrempa, qemu-block, Markus Armbruster,
	qemu Developers, Konstantin Nazarov, Stefan Hajnoczi, Max Reitz,
	John Snow, dgilbert

2021年3月9日(火) 19:26 Daniel P. Berrangé <berrange@redhat.com>:
>
> On Tue, Mar 09, 2021 at 10:37:18AM +0100, Kevin Wolf wrote:
> > Am 09.03.2021 um 05:52 hat Akihiko Odaki geschrieben:
> > > 2021年3月9日(火) 0:37 Akihiko Odaki <akihiko.odaki@gmail.com>:
> > > >
> > > > 2021年3月9日(火) 0:17 Stefan Hajnoczi <stefanha@redhat.com>:
> > > > >
> > > > > The live migration compatibility issue is still present. Migrating to
> > > > > another host might not work if the block limits are different.
> > > > >
> > > > > Here is an idea for solving it:
> > > > >
> > > > > Modify include/hw/block/block.h:DEFINE_BLOCK_PROPERTIES_BASE() to
> > > > > support a new value called "host". The default behavior remains
> > > > > unchanged for live migration compatibility but now you can use "host" if
> > > > > you know it's okay but don't care about migration compatibility.
> > > > >
> > > > > The downside to this approach is that users must explicitly say
> > > > > something like --drive ...,opt_io_size=host. But it's still better than
> > > > > the situation we have today where user must manually enter values for
> > > > > their disk.
> > > > >
> > > > > Does this sound okay to everyone?
> > > > >
> > > > > Stefan
> > > >
> > > > I wonder how that change affects other block drivers implementing
> > > > bdrv_probe_blocksizes. As far as I know, the values they report are
> > > > already used by default, which is contrary to the default not being
> > > > "host".
> > > >
> > > > Regards,
> > > > Akihiko Odaki
> > >
> > > Let me suggest a variant of Stefan's approach:
> > >
> > > Modify include/hw/block/block.h:DEFINE_BLOCK_PROPERTIES_BASE() to
> > > support a new value called "host". The default values for block size
> > > properties may be "host" or not, but they should be consistent. If
> > > they are "host" by default
> >
> > I'm not sure if it's a good idea, but maybe we could make it so that the
> > default is "host" only as long as you didn't specify -nodefaults? Then
> > libvirt would automatically keep the old behaviour (because it always
> > sets -nodefaults) and manual invocations would usually get the new one.
> >
> > Of course, when I start with "I'm not sure if it's a good idea", it's
> > usually not, but I wanted to share the thought anyway...
>
> Can you elaborate on what the actual live migration problem is, and
> its impact ?  This patch is touching the block backends, so I'm
> wondering how backend data ends up having an impact on the migration
> stream which is all frontend device data ?  I'm especially concerned
> by the mention that some block backends already have this problem,
> and wondering if it already impacts libvirt ?
>
> Using -nodefaults is good practice, but I'm still uncomfortable saying
> that its use is a requirement if you want migration to work, as that
> feels like a change in semantics for non-libvirt users (who can be
> mgmt apps, nor merely human interactive users).
>
> Regards,
> Daniel
> --
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
>

A problem is that block drivers else "file" already provide
logical_block_sizes and physical_block_sizes, which depends on their
backends, via bdrv_probe_blocksizes. It is apparently tolerated
because those block drivers have physical backends, unlike "file"
block driver which has files as its backends.

This patch has two important changes:
1. It adds new members, discard_granularity and opt_io_size, to the
value bdrv_probe_blocksizes.
2. It implements bdrv_probe_blocksizes which returns
discard_granularity and opt_io_size for "file" block driver.

2 is unacceptable here because it is a change for "file" block driver.
Now we are discussing how it can be fixed. A possible solution is to
make the feature to infer the default value from the backend opt-in.
Stefan's suggestion adds a non-default value, "host" to opt-in the
feature. However, it breaks the current behaviors of other block
drivers which already decides logical_block_sizes and
physical_block_sizes from their backends. My variant fixes the
compatibility issue by adding a global property to hw_compat_5_2.

Regards,
Akihiko Odaki


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] block/file-posix: Optimize for macOS
  2021-03-08 15:37   ` Akihiko Odaki
  2021-03-09  4:52     ` Akihiko Odaki
@ 2021-03-10 11:38     ` Stefan Hajnoczi
  1 sibling, 0 replies; 10+ messages in thread
From: Stefan Hajnoczi @ 2021-03-10 11:38 UTC (permalink / raw)
  To: Akihiko Odaki
  Cc: Kevin Wolf, Fam Zheng, pkrempa, qemu-block, Markus Armbruster,
	qemu Developers, Max Reitz, Konstantin Nazarov, John Snow,
	dgilbert

[-- Attachment #1: Type: text/plain, Size: 1686 bytes --]

On Tue, Mar 09, 2021 at 12:37:35AM +0900, Akihiko Odaki wrote:
> 2021年3月9日(火) 0:17 Stefan Hajnoczi <stefanha@redhat.com>:
> >
> > The live migration compatibility issue is still present. Migrating to
> > another host might not work if the block limits are different.
> >
> > Here is an idea for solving it:
> >
> > Modify include/hw/block/block.h:DEFINE_BLOCK_PROPERTIES_BASE() to
> > support a new value called "host". The default behavior remains
> > unchanged for live migration compatibility but now you can use "host" if
> > you know it's okay but don't care about migration compatibility.
> >
> > The downside to this approach is that users must explicitly say
> > something like --drive ...,opt_io_size=host. But it's still better than
> > the situation we have today where user must manually enter values for
> > their disk.
> >
> > Does this sound okay to everyone?
> >
> > Stefan
> 
> I wonder how that change affects other block drivers implementing
> bdrv_probe_blocksizes. As far as I know, the values they report are
> already used by default, which is contrary to the default not being
> "host".

The behavior should remain unchanged for existing QEMU command-lines.
The block drivers that report values by default should continue to do
so:

block/file-posix.c: only reports values for s390 DASD devices.

block/nvme.c: reports the NVMe PCI adapter's values.

block/raw-format.c: propagates its child's values unless the "offset"
                    option is incompatible with them.

These block drivers would default to logical_block_size=host and
physical_block_size=host, while other block drivers would not.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-03-10 11:40 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-05 12:17 [PATCH v2] block/file-posix: Optimize for macOS Akihiko Odaki
2021-03-05 12:21 ` no-reply
2021-03-08 15:17 ` Stefan Hajnoczi
2021-03-08 15:37   ` Akihiko Odaki
2021-03-09  4:52     ` Akihiko Odaki
2021-03-09  9:37       ` Kevin Wolf
2021-03-09 10:26         ` Daniel P. Berrangé
2021-03-09 10:47           ` Kevin Wolf
2021-03-09 10:52           ` Akihiko Odaki
2021-03-10 11:38     ` Stefan Hajnoczi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.