qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 1/2] block/file-posix: Optimize for macOS
@ 2021-03-10 15:39 Akihiko Odaki
  2021-03-10 15:39 ` [PATCH v3 2/2] block: Add backend_defaults property Akihiko Odaki
  2021-04-01 15:45 ` [PATCH v3 1/2] block/file-posix: Optimize for macOS Stefan Hajnoczi
  0 siblings, 2 replies; 4+ messages in thread
From: Akihiko Odaki @ 2021-03-10 15:39 UTC (permalink / raw)
  Cc: Kevin Wolf, Fam Zheng, pkrempa, Akihiko Odaki, qemu-block,
	Markus Armbruster, qemu-devel, Max Reitz, Konstantin Nazarov,
	Stefan Hajnoczi, John Snow, dgilbert

This commit introduces "punch hole" operation and optimizes transfer
block size for macOS.

This commit introduces two additional members,
discard_granularity and opt_io to BlockSizes type in
include/block/block.h. Also, the members of the type are now
optional. Set -1 to discard_granularity and 0 to other members
for the default values.

Thanks to Konstantin Nazarov for detailed analysis of a flaw in an
old version of this change:
https://gist.github.com/akihikodaki/87df4149e7ca87f18dc56807ec5a1bc5#gistcomment-3654667

Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com>
---
 block/file-posix.c    | 40 ++++++++++++++++++++++++++++++++++++++--
 block/nvme.c          |  2 ++
 block/raw-format.c    |  4 +++-
 hw/block/block.c      | 12 ++++++++++--
 include/block/block.h |  2 ++
 5 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 05079b40cae..21bdaf969c5 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -44,6 +44,7 @@
 #if defined(__APPLE__) && (__MACH__)
 #include <paths.h>
 #include <sys/param.h>
+#include <sys/mount.h>
 #include <IOKit/IOKitLib.h>
 #include <IOKit/IOBSD.h>
 #include <IOKit/storage/IOMediaBSDClient.h>
@@ -1292,6 +1293,8 @@ static int hdev_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
     if (check_for_dasd(s->fd) < 0) {
         return -ENOTSUP;
     }
+    bsz->opt_io = 0;
+    bsz->discard_granularity = -1;
     ret = probe_logical_blocksize(s->fd, &bsz->log);
     if (ret < 0) {
         return ret;
@@ -1586,6 +1589,7 @@ out:
     }
 }
 
+G_GNUC_UNUSED
 static int translate_err(int err)
 {
     if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
@@ -1795,16 +1799,27 @@ static int handle_aiocb_discard(void *opaque)
             }
         } while (errno == EINTR);
 
-        ret = -errno;
+        ret = translate_err(-errno);
 #endif
     } else {
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
         ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                            aiocb->aio_offset, aiocb->aio_nbytes);
+        ret = translate_err(-errno);
+#elif defined(__APPLE__) && (__MACH__)
+        fpunchhole_t fpunchhole;
+        fpunchhole.fp_flags = 0;
+        fpunchhole.reserved = 0;
+        fpunchhole.fp_offset = aiocb->aio_offset;
+        fpunchhole.fp_length = aiocb->aio_nbytes;
+        if (fcntl(s->fd, F_PUNCHHOLE, &fpunchhole) == -1) {
+            ret = errno == ENODEV ? -ENOTSUP : -errno;
+        } else {
+            ret = 0;
+        }
 #endif
     }
 
-    ret = translate_err(ret);
     if (ret == -ENOTSUP) {
         s->has_discard = false;
     }
@@ -2113,6 +2128,26 @@ static int raw_co_flush_to_disk(BlockDriverState *bs)
     return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb);
 }
 
+static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
+{
+#if defined(__APPLE__) && (__MACH__)
+    BDRVRawState *s = bs->opaque;
+    struct statfs buf;
+
+    if (!fstatfs(s->fd, &buf)) {
+        bsz->phys = 0;
+        bsz->log = 0;
+        bsz->opt_io = buf.f_iosize;
+        bsz->discard_granularity = buf.f_bsize;
+        return 0;
+    }
+
+    return -errno;
+#else
+    return -ENOTSUP;
+#endif
+}
+
 static void raw_aio_attach_aio_context(BlockDriverState *bs,
                                        AioContext *new_context)
 {
@@ -3247,6 +3282,7 @@ BlockDriver bdrv_file = {
     .bdrv_refresh_limits = raw_refresh_limits,
     .bdrv_io_plug = raw_aio_plug,
     .bdrv_io_unplug = raw_aio_unplug,
+    .bdrv_probe_blocksizes = raw_probe_blocksizes,
     .bdrv_attach_aio_context = raw_aio_attach_aio_context,
 
     .bdrv_co_truncate = raw_co_truncate,
diff --git a/block/nvme.c b/block/nvme.c
index 2b5421e7aa6..1845d07577b 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -989,6 +989,8 @@ static int nvme_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
     uint32_t blocksize = nvme_get_blocksize(bs);
     bsz->phys = blocksize;
     bsz->log = blocksize;
+    bsz->opt_io = 0;
+    bsz->discard_granularity = -1;
     return 0;
 }
 
diff --git a/block/raw-format.c b/block/raw-format.c
index 7717578ed6a..847df11f2ae 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -507,6 +507,7 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
 static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 {
     BDRVRawState *s = bs->opaque;
+    uint32_t size;
     int ret;
 
     ret = bdrv_probe_blocksizes(bs->file->bs, bsz);
@@ -514,7 +515,8 @@ static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
         return ret;
     }
 
-    if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
+    size = MAX(bsz->log, bsz->phys);
+    if (size && !QEMU_IS_ALIGNED(s->offset, size)) {
         return -ENOTSUP;
     }
 
diff --git a/hw/block/block.c b/hw/block/block.c
index 1e34573da71..c907e5a7722 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -70,19 +70,27 @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp)
     backend_ret = blk_probe_blocksizes(blk, &blocksizes);
     /* fill in detected values if they are not defined via qemu command line */
     if (!conf->physical_block_size) {
-        if (!backend_ret) {
+        if (!backend_ret && blocksizes.phys) {
            conf->physical_block_size = blocksizes.phys;
         } else {
             conf->physical_block_size = BDRV_SECTOR_SIZE;
         }
     }
     if (!conf->logical_block_size) {
-        if (!backend_ret) {
+        if (!backend_ret && blocksizes.log) {
             conf->logical_block_size = blocksizes.log;
         } else {
             conf->logical_block_size = BDRV_SECTOR_SIZE;
         }
     }
+    if (!backend_ret) {
+        if (!conf->opt_io_size) {
+            conf->opt_io_size = blocksizes.opt_io;
+        }
+        if (conf->discard_granularity == -1) {
+            conf->discard_granularity = blocksizes.discard_granularity;
+        }
+    }
 
     if (conf->logical_block_size > conf->physical_block_size) {
         error_setg(errp,
diff --git a/include/block/block.h b/include/block/block.h
index b3f6e509d49..d12471a6cc4 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -93,6 +93,8 @@ typedef enum {
 typedef struct BlockSizes {
     uint32_t phys;
     uint32_t log;
+    uint32_t discard_granularity;
+    uint32_t opt_io;
 } BlockSizes;
 
 typedef struct HDGeometry {
-- 
2.24.3 (Apple Git-128)



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v3 2/2] block: Add backend_defaults property
  2021-03-10 15:39 [PATCH v3 1/2] block/file-posix: Optimize for macOS Akihiko Odaki
@ 2021-03-10 15:39 ` Akihiko Odaki
  2021-04-01 15:58   ` Stefan Hajnoczi
  2021-04-01 15:45 ` [PATCH v3 1/2] block/file-posix: Optimize for macOS Stefan Hajnoczi
  1 sibling, 1 reply; 4+ messages in thread
From: Akihiko Odaki @ 2021-03-10 15:39 UTC (permalink / raw)
  Cc: Kevin Wolf, Fam Zheng, pkrempa, Akihiko Odaki, qemu-block,
	Markus Armbruster, qemu-devel, Max Reitz, Konstantin Nazarov,
	Stefan Hajnoczi, John Snow, dgilbert

backend_defaults property allow users to control if default block
properties should be decided with backend information.

If it is off, any backend information will be discarded, which is
suitable if you plan to perform live migration to a different disk backend.

If it is on, a block device may utilize backend information more
aggressively.

By default, it is auto, which uses backend information from physical
devices and ignores one from file. It is consistent with the older
versions, and should be aligned with the user expectation that a file
backend is more independent of host than a physical device backend.

Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com>
---
 block/file-posix.c       |  2 ++
 block/nvme.c             |  1 +
 hw/block/block.c         | 27 ++++++++++++++++++++++-----
 include/block/block.h    |  1 +
 include/hw/block/block.h |  3 +++
 5 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 21bdaf969c5..2631bd972a5 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1293,6 +1293,7 @@ static int hdev_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
     if (check_for_dasd(s->fd) < 0) {
         return -ENOTSUP;
     }
+    bsz->file = false;
     bsz->opt_io = 0;
     bsz->discard_granularity = -1;
     ret = probe_logical_blocksize(s->fd, &bsz->log);
@@ -2135,6 +2136,7 @@ static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
     struct statfs buf;
 
     if (!fstatfs(s->fd, &buf)) {
+        bsz->file = true;
         bsz->phys = 0;
         bsz->log = 0;
         bsz->opt_io = buf.f_iosize;
diff --git a/block/nvme.c b/block/nvme.c
index 1845d07577b..8438c8a2b90 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -987,6 +987,7 @@ static uint32_t nvme_get_blocksize(BlockDriverState *bs)
 static int nvme_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 {
     uint32_t blocksize = nvme_get_blocksize(bs);
+    bsz->file = false;
     bsz->phys = blocksize;
     bsz->log = blocksize;
     bsz->opt_io = 0;
diff --git a/hw/block/block.c b/hw/block/block.c
index c907e5a7722..d42cc40a1c8 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -65,25 +65,42 @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp)
 {
     BlockBackend *blk = conf->blk;
     BlockSizes blocksizes;
-    int backend_ret;
+    bool use_blocksizes;
+
+    switch (conf->backend_defaults) {
+    case ON_OFF_AUTO_AUTO:
+        use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes) &&
+                         !blocksizes.file;
+        break;
+
+    case ON_OFF_AUTO_ON:
+        use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes);
+        break;
+
+    case ON_OFF_AUTO_OFF:
+        use_blocksizes = false;
+        break;
+
+    default:
+        abort();
+    }
 
-    backend_ret = blk_probe_blocksizes(blk, &blocksizes);
     /* fill in detected values if they are not defined via qemu command line */
     if (!conf->physical_block_size) {
-        if (!backend_ret && blocksizes.phys) {
+        if (use_blocksizes && blocksizes.phys) {
            conf->physical_block_size = blocksizes.phys;
         } else {
             conf->physical_block_size = BDRV_SECTOR_SIZE;
         }
     }
     if (!conf->logical_block_size) {
-        if (!backend_ret && blocksizes.log) {
+        if (use_blocksizes && blocksizes.log) {
             conf->logical_block_size = blocksizes.log;
         } else {
             conf->logical_block_size = BDRV_SECTOR_SIZE;
         }
     }
-    if (!backend_ret) {
+    if (use_blocksizes) {
         if (!conf->opt_io_size) {
             conf->opt_io_size = blocksizes.opt_io;
         }
diff --git a/include/block/block.h b/include/block/block.h
index d12471a6cc4..ccc3ec5c847 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -91,6 +91,7 @@ typedef enum {
 } BdrvRequestFlags;
 
 typedef struct BlockSizes {
+    bool file;
     uint32_t phys;
     uint32_t log;
     uint32_t discard_granularity;
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index c172cbe65f1..5902c0440a5 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -19,6 +19,7 @@
 
 typedef struct BlockConf {
     BlockBackend *blk;
+    OnOffAuto backend_defaults;
     uint32_t physical_block_size;
     uint32_t logical_block_size;
     uint32_t min_io_size;
@@ -48,6 +49,8 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
 }
 
 #define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf)                     \
+    DEFINE_PROP_ON_OFF_AUTO("backend_defaults", _state,                 \
+                            _conf.backend_defaults, ON_OFF_AUTO_AUTO),  \
     DEFINE_PROP_BLOCKSIZE("logical_block_size", _state,                 \
                           _conf.logical_block_size),                    \
     DEFINE_PROP_BLOCKSIZE("physical_block_size", _state,                \
-- 
2.24.3 (Apple Git-128)



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3 1/2] block/file-posix: Optimize for macOS
  2021-03-10 15:39 [PATCH v3 1/2] block/file-posix: Optimize for macOS Akihiko Odaki
  2021-03-10 15:39 ` [PATCH v3 2/2] block: Add backend_defaults property Akihiko Odaki
@ 2021-04-01 15:45 ` Stefan Hajnoczi
  1 sibling, 0 replies; 4+ messages in thread
From: Stefan Hajnoczi @ 2021-04-01 15:45 UTC (permalink / raw)
  To: Akihiko Odaki
  Cc: Kevin Wolf, Fam Zheng, pkrempa, qemu-block, Markus Armbruster,
	qemu-devel, Max Reitz, Konstantin Nazarov, John Snow, dgilbert

[-- Attachment #1: Type: text/plain, Size: 1055 bytes --]

On Thu, Mar 11, 2021 at 12:39:15AM +0900, Akihiko Odaki wrote:
> @@ -1586,6 +1589,7 @@ out:
>      }
>  }
>  
> +G_GNUC_UNUSED

Why isn't translate_err() used in the F_PUNCHHOLE case below?

If you really want to avoid using it on macOS, please add a #if with the
necessary conditions here so it's clear when this translate_err() is
needed.

> @@ -514,7 +515,8 @@ static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
>          return ret;
>      }
>  
> -    if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
> +    size = MAX(bsz->log, bsz->phys);
> +    if (size && !QEMU_IS_ALIGNED(s->offset, size)) {
>          return -ENOTSUP;
>      }
>  

This patch changes the semantics of bdrv_probe_blocksizes(). It used to
return -ENOTSUP when phys/log weren't available. Now it returns 0 and
the fields are 0. Please update the bdrv_probe_blocksizes doc comment in
include/block/block_int.h to mention phys and log, as well as that
fields can be set to 0 (or -1 in the case of discard_granularity).

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3 2/2] block: Add backend_defaults property
  2021-03-10 15:39 ` [PATCH v3 2/2] block: Add backend_defaults property Akihiko Odaki
@ 2021-04-01 15:58   ` Stefan Hajnoczi
  0 siblings, 0 replies; 4+ messages in thread
From: Stefan Hajnoczi @ 2021-04-01 15:58 UTC (permalink / raw)
  To: Akihiko Odaki
  Cc: Kevin Wolf, Fam Zheng, pkrempa, qemu-block, Markus Armbruster,
	qemu-devel, Max Reitz, Konstantin Nazarov, John Snow, dgilbert

[-- Attachment #1: Type: text/plain, Size: 1022 bytes --]

On Thu, Mar 11, 2021 at 12:39:16AM +0900, Akihiko Odaki wrote:
> backend_defaults property allow users to control if default block
> properties should be decided with backend information.
> 
> If it is off, any backend information will be discarded, which is
> suitable if you plan to perform live migration to a different disk backend.
> 
> If it is on, a block device may utilize backend information more
> aggressively.
> 
> By default, it is auto, which uses backend information from physical
> devices and ignores one from file. It is consistent with the older
> versions, and should be aligned with the user expectation that a file
> backend is more independent of host than a physical device backend.

Can BlockLimits pdiscard_alignment and opt_transfer be used instead of
adding discard_granularity and opt_io fields to BlockSizes? They seem to
do the same thing except the QEMU block layer currently uses BlockLimits
values internally to ensure that requests are suitable for the host
device.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-04-01 15:59 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-10 15:39 [PATCH v3 1/2] block/file-posix: Optimize for macOS Akihiko Odaki
2021-03-10 15:39 ` [PATCH v3 2/2] block: Add backend_defaults property Akihiko Odaki
2021-04-01 15:58   ` Stefan Hajnoczi
2021-04-01 15:45 ` [PATCH v3 1/2] block/file-posix: Optimize for macOS Stefan Hajnoczi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).