* [PATCH V3 1/6] block/rbd: bump librbd requirement to luminous release
2021-05-19 14:23 [PATCH V3 0/6] block/rbd: migrate to coroutines and add write zeroes support Peter Lieven
@ 2021-05-19 14:23 ` Peter Lieven
2021-06-16 12:26 ` Ilya Dryomov
2021-05-19 14:23 ` [PATCH V3 2/6] block/rbd: store object_size in BDRVRBDState Peter Lieven
` (4 subsequent siblings)
5 siblings, 1 reply; 22+ messages in thread
From: Peter Lieven @ 2021-05-19 14:23 UTC (permalink / raw)
To: qemu-block
Cc: kwolf, idryomov, berrange, Peter Lieven, qemu-devel, ct,
pbonzini, mreitz, dillaman
even luminous (version 12.2) is unmaintained for over 3 years now.
Bump the requirement to get rid of the ifdef'ry in the code.
Qemu 6.1 dropped the support for RHEL-7 which was the last supported
OS that required an older librbd.
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/rbd.c | 120 ++++------------------------------------------------
meson.build | 7 ++-
2 files changed, 13 insertions(+), 114 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 26f64cce7c..6b1cbe1d75 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -55,24 +55,10 @@
* leading "\".
*/
-/* rbd_aio_discard added in 0.1.2 */
-#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
-#define LIBRBD_SUPPORTS_DISCARD
-#else
-#undef LIBRBD_SUPPORTS_DISCARD
-#endif
-
#define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
#define RBD_MAX_SNAPS 100
-/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
-#ifdef LIBRBD_SUPPORTS_IOVEC
-#define LIBRBD_USE_IOVEC 1
-#else
-#define LIBRBD_USE_IOVEC 0
-#endif
-
typedef enum {
RBD_AIO_READ,
RBD_AIO_WRITE,
@@ -84,7 +70,6 @@ typedef struct RBDAIOCB {
BlockAIOCB common;
int64_t ret;
QEMUIOVector *qiov;
- char *bounce;
RBDAIOCmd cmd;
int error;
struct BDRVRBDState *s;
@@ -94,7 +79,6 @@ typedef struct RADOSCB {
RBDAIOCB *acb;
struct BDRVRBDState *s;
int64_t size;
- char *buf;
int64_t ret;
} RADOSCB;
@@ -342,13 +326,9 @@ static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
{
- if (LIBRBD_USE_IOVEC) {
- RBDAIOCB *acb = rcb->acb;
- iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
- acb->qiov->size - offs);
- } else {
- memset(rcb->buf + offs, 0, rcb->size - offs);
- }
+ RBDAIOCB *acb = rcb->acb;
+ iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
+ acb->qiov->size - offs);
}
/* FIXME Deprecate and remove keypairs or make it available in QMP. */
@@ -504,13 +484,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
g_free(rcb);
- if (!LIBRBD_USE_IOVEC) {
- if (acb->cmd == RBD_AIO_READ) {
- qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
- }
- qemu_vfree(acb->bounce);
- }
-
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
qemu_aio_unref(acb);
@@ -878,28 +851,6 @@ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
rbd_finish_bh, rcb);
}
-static int rbd_aio_discard_wrapper(rbd_image_t image,
- uint64_t off,
- uint64_t len,
- rbd_completion_t comp)
-{
-#ifdef LIBRBD_SUPPORTS_DISCARD
- return rbd_aio_discard(image, off, len, comp);
-#else
- return -ENOTSUP;
-#endif
-}
-
-static int rbd_aio_flush_wrapper(rbd_image_t image,
- rbd_completion_t comp)
-{
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
- return rbd_aio_flush(image, comp);
-#else
- return -ENOTSUP;
-#endif
-}
-
static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
int64_t off,
QEMUIOVector *qiov,
@@ -922,21 +873,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
rcb = g_new(RADOSCB, 1);
- if (!LIBRBD_USE_IOVEC) {
- if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
- acb->bounce = NULL;
- } else {
- acb->bounce = qemu_try_blockalign(bs, qiov->size);
- if (acb->bounce == NULL) {
- goto failed;
- }
- }
- if (cmd == RBD_AIO_WRITE) {
- qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
- }
- rcb->buf = acb->bounce;
- }
-
acb->ret = 0;
acb->error = 0;
acb->s = s;
@@ -950,7 +886,7 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
}
switch (cmd) {
- case RBD_AIO_WRITE: {
+ case RBD_AIO_WRITE:
/*
* RBD APIs don't allow us to write more than actual size, so in order
* to support growing images, we resize the image before write
@@ -962,25 +898,16 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
goto failed_completion;
}
}
-#ifdef LIBRBD_SUPPORTS_IOVEC
- r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
-#else
- r = rbd_aio_write(s->image, off, size, rcb->buf, c);
-#endif
+ r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
break;
- }
case RBD_AIO_READ:
-#ifdef LIBRBD_SUPPORTS_IOVEC
- r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
-#else
- r = rbd_aio_read(s->image, off, size, rcb->buf, c);
-#endif
+ r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
break;
case RBD_AIO_DISCARD:
- r = rbd_aio_discard_wrapper(s->image, off, size, c);
+ r = rbd_aio_discard(s->image, off, size, c);
break;
case RBD_AIO_FLUSH:
- r = rbd_aio_flush_wrapper(s->image, c);
+ r = rbd_aio_flush(s->image, c);
break;
default:
r = -EINVAL;
@@ -995,9 +922,6 @@ failed_completion:
rbd_aio_release(c);
failed:
g_free(rcb);
- if (!LIBRBD_USE_IOVEC) {
- qemu_vfree(acb->bounce);
- }
qemu_aio_unref(acb);
return NULL;
@@ -1023,7 +947,6 @@ static BlockAIOCB *qemu_rbd_aio_pwritev(BlockDriverState *bs,
RBD_AIO_WRITE);
}
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
BlockCompletionFunc *cb,
void *opaque)
@@ -1031,20 +954,6 @@ static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
}
-#else
-
-static int qemu_rbd_co_flush(BlockDriverState *bs)
-{
-#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1)
- /* rbd_flush added in 0.1.1 */
- BDRVRBDState *s = bs->opaque;
- return rbd_flush(s->image);
-#else
- return 0;
-#endif
-}
-#endif
-
static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
{
BDRVRBDState *s = bs->opaque;
@@ -1210,7 +1119,6 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
return snap_count;
}
-#ifdef LIBRBD_SUPPORTS_DISCARD
static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
int64_t offset,
int bytes,
@@ -1220,9 +1128,7 @@ static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
return rbd_start_aio(bs, offset, NULL, bytes, cb, opaque,
RBD_AIO_DISCARD);
}
-#endif
-#ifdef LIBRBD_SUPPORTS_INVALIDATE
static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs,
Error **errp)
{
@@ -1232,7 +1138,6 @@ static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs,
error_setg_errno(errp, -r, "Failed to invalidate the cache");
}
}
-#endif
static QemuOptsList qemu_rbd_create_opts = {
.name = "rbd-create-opts",
@@ -1290,23 +1195,14 @@ static BlockDriver bdrv_rbd = {
.bdrv_aio_preadv = qemu_rbd_aio_preadv,
.bdrv_aio_pwritev = qemu_rbd_aio_pwritev,
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
.bdrv_aio_flush = qemu_rbd_aio_flush,
-#else
- .bdrv_co_flush_to_disk = qemu_rbd_co_flush,
-#endif
-
-#ifdef LIBRBD_SUPPORTS_DISCARD
.bdrv_aio_pdiscard = qemu_rbd_aio_pdiscard,
-#endif
.bdrv_snapshot_create = qemu_rbd_snap_create,
.bdrv_snapshot_delete = qemu_rbd_snap_remove,
.bdrv_snapshot_list = qemu_rbd_snap_list,
.bdrv_snapshot_goto = qemu_rbd_snap_rollback,
-#ifdef LIBRBD_SUPPORTS_INVALIDATE
.bdrv_co_invalidate_cache = qemu_rbd_co_invalidate_cache,
-#endif
.strong_runtime_opts = qemu_rbd_strong_runtime_opts,
};
diff --git a/meson.build b/meson.build
index 1559e8d873..644ef36476 100644
--- a/meson.build
+++ b/meson.build
@@ -721,13 +721,16 @@ if not get_option('rbd').auto() or have_block
int main(void) {
rados_t cluster;
rados_create(&cluster, NULL);
+ #if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 12, 0)
+ #error
+ #endif
return 0;
}''', dependencies: [librbd, librados])
rbd = declare_dependency(dependencies: [librbd, librados])
elif get_option('rbd').enabled()
- error('could not link librados')
+ error('librados/librbd >= 12.0.0 required')
else
- warning('could not link librados, disabling')
+ warning('librados/librbd >= 12.0.0 not found, disabling rbd support')
endif
endif
endif
--
2.17.1
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH V3 1/6] block/rbd: bump librbd requirement to luminous release
2021-05-19 14:23 ` [PATCH V3 1/6] block/rbd: bump librbd requirement to luminous release Peter Lieven
@ 2021-06-16 12:26 ` Ilya Dryomov
2021-06-18 8:58 ` Peter Lieven
0 siblings, 1 reply; 22+ messages in thread
From: Ilya Dryomov @ 2021-06-16 12:26 UTC (permalink / raw)
To: Peter Lieven
Cc: kwolf, berrange, qemu-block, qemu-devel, ct, Paolo Bonzini, mreitz
On Wed, May 19, 2021 at 4:26 PM Peter Lieven <pl@kamp.de> wrote:
>
> even luminous (version 12.2) is unmaintained for over 3 years now.
> Bump the requirement to get rid of the ifdef'ry in the code.
> Qemu 6.1 dropped the support for RHEL-7 which was the last supported
> OS that required an older librbd.
>
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
> block/rbd.c | 120 ++++------------------------------------------------
> meson.build | 7 ++-
> 2 files changed, 13 insertions(+), 114 deletions(-)
>
> diff --git a/block/rbd.c b/block/rbd.c
> index 26f64cce7c..6b1cbe1d75 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -55,24 +55,10 @@
> * leading "\".
> */
>
> -/* rbd_aio_discard added in 0.1.2 */
> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
> -#define LIBRBD_SUPPORTS_DISCARD
> -#else
> -#undef LIBRBD_SUPPORTS_DISCARD
> -#endif
> -
> #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
>
> #define RBD_MAX_SNAPS 100
>
> -/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
> -#ifdef LIBRBD_SUPPORTS_IOVEC
> -#define LIBRBD_USE_IOVEC 1
> -#else
> -#define LIBRBD_USE_IOVEC 0
> -#endif
> -
> typedef enum {
> RBD_AIO_READ,
> RBD_AIO_WRITE,
> @@ -84,7 +70,6 @@ typedef struct RBDAIOCB {
> BlockAIOCB common;
> int64_t ret;
> QEMUIOVector *qiov;
> - char *bounce;
> RBDAIOCmd cmd;
> int error;
> struct BDRVRBDState *s;
> @@ -94,7 +79,6 @@ typedef struct RADOSCB {
> RBDAIOCB *acb;
> struct BDRVRBDState *s;
> int64_t size;
> - char *buf;
> int64_t ret;
> } RADOSCB;
>
> @@ -342,13 +326,9 @@ static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
>
> static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
> {
> - if (LIBRBD_USE_IOVEC) {
> - RBDAIOCB *acb = rcb->acb;
> - iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
> - acb->qiov->size - offs);
> - } else {
> - memset(rcb->buf + offs, 0, rcb->size - offs);
> - }
> + RBDAIOCB *acb = rcb->acb;
> + iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
> + acb->qiov->size - offs);
> }
>
> /* FIXME Deprecate and remove keypairs or make it available in QMP. */
> @@ -504,13 +484,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
>
> g_free(rcb);
>
> - if (!LIBRBD_USE_IOVEC) {
> - if (acb->cmd == RBD_AIO_READ) {
> - qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
> - }
> - qemu_vfree(acb->bounce);
> - }
> -
> acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
>
> qemu_aio_unref(acb);
> @@ -878,28 +851,6 @@ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
> rbd_finish_bh, rcb);
> }
>
> -static int rbd_aio_discard_wrapper(rbd_image_t image,
> - uint64_t off,
> - uint64_t len,
> - rbd_completion_t comp)
> -{
> -#ifdef LIBRBD_SUPPORTS_DISCARD
> - return rbd_aio_discard(image, off, len, comp);
> -#else
> - return -ENOTSUP;
> -#endif
> -}
> -
> -static int rbd_aio_flush_wrapper(rbd_image_t image,
> - rbd_completion_t comp)
> -{
> -#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
> - return rbd_aio_flush(image, comp);
> -#else
> - return -ENOTSUP;
> -#endif
> -}
> -
> static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> int64_t off,
> QEMUIOVector *qiov,
> @@ -922,21 +873,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>
> rcb = g_new(RADOSCB, 1);
>
> - if (!LIBRBD_USE_IOVEC) {
> - if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
> - acb->bounce = NULL;
> - } else {
> - acb->bounce = qemu_try_blockalign(bs, qiov->size);
> - if (acb->bounce == NULL) {
> - goto failed;
> - }
> - }
> - if (cmd == RBD_AIO_WRITE) {
> - qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
> - }
> - rcb->buf = acb->bounce;
> - }
> -
> acb->ret = 0;
> acb->error = 0;
> acb->s = s;
> @@ -950,7 +886,7 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> }
>
> switch (cmd) {
> - case RBD_AIO_WRITE: {
> + case RBD_AIO_WRITE:
> /*
> * RBD APIs don't allow us to write more than actual size, so in order
> * to support growing images, we resize the image before write
> @@ -962,25 +898,16 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> goto failed_completion;
> }
> }
> -#ifdef LIBRBD_SUPPORTS_IOVEC
> - r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
> -#else
> - r = rbd_aio_write(s->image, off, size, rcb->buf, c);
> -#endif
> + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
> break;
> - }
> case RBD_AIO_READ:
> -#ifdef LIBRBD_SUPPORTS_IOVEC
> - r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
> -#else
> - r = rbd_aio_read(s->image, off, size, rcb->buf, c);
> -#endif
> + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
> break;
> case RBD_AIO_DISCARD:
> - r = rbd_aio_discard_wrapper(s->image, off, size, c);
> + r = rbd_aio_discard(s->image, off, size, c);
> break;
> case RBD_AIO_FLUSH:
> - r = rbd_aio_flush_wrapper(s->image, c);
> + r = rbd_aio_flush(s->image, c);
> break;
> default:
> r = -EINVAL;
> @@ -995,9 +922,6 @@ failed_completion:
> rbd_aio_release(c);
> failed:
> g_free(rcb);
> - if (!LIBRBD_USE_IOVEC) {
> - qemu_vfree(acb->bounce);
> - }
>
> qemu_aio_unref(acb);
> return NULL;
> @@ -1023,7 +947,6 @@ static BlockAIOCB *qemu_rbd_aio_pwritev(BlockDriverState *bs,
> RBD_AIO_WRITE);
> }
>
> -#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
> static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
> BlockCompletionFunc *cb,
> void *opaque)
> @@ -1031,20 +954,6 @@ static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
> return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
> }
>
> -#else
> -
> -static int qemu_rbd_co_flush(BlockDriverState *bs)
> -{
> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1)
> - /* rbd_flush added in 0.1.1 */
> - BDRVRBDState *s = bs->opaque;
> - return rbd_flush(s->image);
> -#else
> - return 0;
> -#endif
> -}
> -#endif
> -
> static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
> {
> BDRVRBDState *s = bs->opaque;
> @@ -1210,7 +1119,6 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
> return snap_count;
> }
>
> -#ifdef LIBRBD_SUPPORTS_DISCARD
> static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
> int64_t offset,
> int bytes,
> @@ -1220,9 +1128,7 @@ static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
> return rbd_start_aio(bs, offset, NULL, bytes, cb, opaque,
> RBD_AIO_DISCARD);
> }
> -#endif
>
> -#ifdef LIBRBD_SUPPORTS_INVALIDATE
> static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs,
> Error **errp)
> {
> @@ -1232,7 +1138,6 @@ static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs,
> error_setg_errno(errp, -r, "Failed to invalidate the cache");
> }
> }
> -#endif
>
> static QemuOptsList qemu_rbd_create_opts = {
> .name = "rbd-create-opts",
> @@ -1290,23 +1195,14 @@ static BlockDriver bdrv_rbd = {
> .bdrv_aio_preadv = qemu_rbd_aio_preadv,
> .bdrv_aio_pwritev = qemu_rbd_aio_pwritev,
>
> -#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
> .bdrv_aio_flush = qemu_rbd_aio_flush,
> -#else
> - .bdrv_co_flush_to_disk = qemu_rbd_co_flush,
> -#endif
> -
> -#ifdef LIBRBD_SUPPORTS_DISCARD
> .bdrv_aio_pdiscard = qemu_rbd_aio_pdiscard,
> -#endif
>
> .bdrv_snapshot_create = qemu_rbd_snap_create,
> .bdrv_snapshot_delete = qemu_rbd_snap_remove,
> .bdrv_snapshot_list = qemu_rbd_snap_list,
> .bdrv_snapshot_goto = qemu_rbd_snap_rollback,
> -#ifdef LIBRBD_SUPPORTS_INVALIDATE
> .bdrv_co_invalidate_cache = qemu_rbd_co_invalidate_cache,
> -#endif
>
> .strong_runtime_opts = qemu_rbd_strong_runtime_opts,
> };
> diff --git a/meson.build b/meson.build
> index 1559e8d873..644ef36476 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -721,13 +721,16 @@ if not get_option('rbd').auto() or have_block
> int main(void) {
> rados_t cluster;
> rados_create(&cluster, NULL);
> + #if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 12, 0)
> + #error
Hi Peter,
Just a nit, but I would stick to the actual version of the library
instead of the package version. librbd major version is 1, librados
major version is 2 -- it shows up in ldd, when listing /usr/lib, etc.
Something like
#error librbd >= 1.12.0 required
here
> + #endif
> return 0;
> }''', dependencies: [librbd, librados])
> rbd = declare_dependency(dependencies: [librbd, librados])
> elif get_option('rbd').enabled()
> - error('could not link librados')
> + error('librados/librbd >= 12.0.0 required')
and
error('could not link librados/librbd')
> else
> - warning('could not link librados, disabling')
> + warning('librados/librbd >= 12.0.0 not found, disabling rbd support')
warning('could not link librados/librbd, disabling')
here to avoid repeating the version.
Thanks,
Ilya
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH V3 1/6] block/rbd: bump librbd requirement to luminous release
2021-06-16 12:26 ` Ilya Dryomov
@ 2021-06-18 8:58 ` Peter Lieven
2021-06-18 10:21 ` Ilya Dryomov
0 siblings, 1 reply; 22+ messages in thread
From: Peter Lieven @ 2021-06-18 8:58 UTC (permalink / raw)
To: Ilya Dryomov
Cc: kwolf, berrange, qemu-block, qemu-devel, ct, Paolo Bonzini, mreitz
Am 16.06.21 um 14:26 schrieb Ilya Dryomov:
> On Wed, May 19, 2021 at 4:26 PM Peter Lieven <pl@kamp.de> wrote:
>> even luminous (version 12.2) is unmaintained for over 3 years now.
>> Bump the requirement to get rid of the ifdef'ry in the code.
>> Qemu 6.1 dropped the support for RHEL-7 which was the last supported
>> OS that required an older librbd.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>> block/rbd.c | 120 ++++------------------------------------------------
>> meson.build | 7 ++-
>> 2 files changed, 13 insertions(+), 114 deletions(-)
>>
>> diff --git a/block/rbd.c b/block/rbd.c
>> index 26f64cce7c..6b1cbe1d75 100644
>> --- a/block/rbd.c
>> +++ b/block/rbd.c
>> @@ -55,24 +55,10 @@
>> * leading "\".
>> */
>>
>> -/* rbd_aio_discard added in 0.1.2 */
>> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
>> -#define LIBRBD_SUPPORTS_DISCARD
>> -#else
>> -#undef LIBRBD_SUPPORTS_DISCARD
>> -#endif
>> -
>> #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
>>
>> #define RBD_MAX_SNAPS 100
>>
>> -/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
>> -#ifdef LIBRBD_SUPPORTS_IOVEC
>> -#define LIBRBD_USE_IOVEC 1
>> -#else
>> -#define LIBRBD_USE_IOVEC 0
>> -#endif
>> -
>> typedef enum {
>> RBD_AIO_READ,
>> RBD_AIO_WRITE,
>> @@ -84,7 +70,6 @@ typedef struct RBDAIOCB {
>> BlockAIOCB common;
>> int64_t ret;
>> QEMUIOVector *qiov;
>> - char *bounce;
>> RBDAIOCmd cmd;
>> int error;
>> struct BDRVRBDState *s;
>> @@ -94,7 +79,6 @@ typedef struct RADOSCB {
>> RBDAIOCB *acb;
>> struct BDRVRBDState *s;
>> int64_t size;
>> - char *buf;
>> int64_t ret;
>> } RADOSCB;
>>
>> @@ -342,13 +326,9 @@ static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
>>
>> static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
>> {
>> - if (LIBRBD_USE_IOVEC) {
>> - RBDAIOCB *acb = rcb->acb;
>> - iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
>> - acb->qiov->size - offs);
>> - } else {
>> - memset(rcb->buf + offs, 0, rcb->size - offs);
>> - }
>> + RBDAIOCB *acb = rcb->acb;
>> + iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
>> + acb->qiov->size - offs);
>> }
>>
>> /* FIXME Deprecate and remove keypairs or make it available in QMP. */
>> @@ -504,13 +484,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
>>
>> g_free(rcb);
>>
>> - if (!LIBRBD_USE_IOVEC) {
>> - if (acb->cmd == RBD_AIO_READ) {
>> - qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
>> - }
>> - qemu_vfree(acb->bounce);
>> - }
>> -
>> acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
>>
>> qemu_aio_unref(acb);
>> @@ -878,28 +851,6 @@ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
>> rbd_finish_bh, rcb);
>> }
>>
>> -static int rbd_aio_discard_wrapper(rbd_image_t image,
>> - uint64_t off,
>> - uint64_t len,
>> - rbd_completion_t comp)
>> -{
>> -#ifdef LIBRBD_SUPPORTS_DISCARD
>> - return rbd_aio_discard(image, off, len, comp);
>> -#else
>> - return -ENOTSUP;
>> -#endif
>> -}
>> -
>> -static int rbd_aio_flush_wrapper(rbd_image_t image,
>> - rbd_completion_t comp)
>> -{
>> -#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
>> - return rbd_aio_flush(image, comp);
>> -#else
>> - return -ENOTSUP;
>> -#endif
>> -}
>> -
>> static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>> int64_t off,
>> QEMUIOVector *qiov,
>> @@ -922,21 +873,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>>
>> rcb = g_new(RADOSCB, 1);
>>
>> - if (!LIBRBD_USE_IOVEC) {
>> - if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
>> - acb->bounce = NULL;
>> - } else {
>> - acb->bounce = qemu_try_blockalign(bs, qiov->size);
>> - if (acb->bounce == NULL) {
>> - goto failed;
>> - }
>> - }
>> - if (cmd == RBD_AIO_WRITE) {
>> - qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
>> - }
>> - rcb->buf = acb->bounce;
>> - }
>> -
>> acb->ret = 0;
>> acb->error = 0;
>> acb->s = s;
>> @@ -950,7 +886,7 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>> }
>>
>> switch (cmd) {
>> - case RBD_AIO_WRITE: {
>> + case RBD_AIO_WRITE:
>> /*
>> * RBD APIs don't allow us to write more than actual size, so in order
>> * to support growing images, we resize the image before write
>> @@ -962,25 +898,16 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>> goto failed_completion;
>> }
>> }
>> -#ifdef LIBRBD_SUPPORTS_IOVEC
>> - r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
>> -#else
>> - r = rbd_aio_write(s->image, off, size, rcb->buf, c);
>> -#endif
>> + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
>> break;
>> - }
>> case RBD_AIO_READ:
>> -#ifdef LIBRBD_SUPPORTS_IOVEC
>> - r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
>> -#else
>> - r = rbd_aio_read(s->image, off, size, rcb->buf, c);
>> -#endif
>> + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
>> break;
>> case RBD_AIO_DISCARD:
>> - r = rbd_aio_discard_wrapper(s->image, off, size, c);
>> + r = rbd_aio_discard(s->image, off, size, c);
>> break;
>> case RBD_AIO_FLUSH:
>> - r = rbd_aio_flush_wrapper(s->image, c);
>> + r = rbd_aio_flush(s->image, c);
>> break;
>> default:
>> r = -EINVAL;
>> @@ -995,9 +922,6 @@ failed_completion:
>> rbd_aio_release(c);
>> failed:
>> g_free(rcb);
>> - if (!LIBRBD_USE_IOVEC) {
>> - qemu_vfree(acb->bounce);
>> - }
>>
>> qemu_aio_unref(acb);
>> return NULL;
>> @@ -1023,7 +947,6 @@ static BlockAIOCB *qemu_rbd_aio_pwritev(BlockDriverState *bs,
>> RBD_AIO_WRITE);
>> }
>>
>> -#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
>> static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
>> BlockCompletionFunc *cb,
>> void *opaque)
>> @@ -1031,20 +954,6 @@ static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
>> return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
>> }
>>
>> -#else
>> -
>> -static int qemu_rbd_co_flush(BlockDriverState *bs)
>> -{
>> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1)
>> - /* rbd_flush added in 0.1.1 */
>> - BDRVRBDState *s = bs->opaque;
>> - return rbd_flush(s->image);
>> -#else
>> - return 0;
>> -#endif
>> -}
>> -#endif
>> -
>> static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
>> {
>> BDRVRBDState *s = bs->opaque;
>> @@ -1210,7 +1119,6 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
>> return snap_count;
>> }
>>
>> -#ifdef LIBRBD_SUPPORTS_DISCARD
>> static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
>> int64_t offset,
>> int bytes,
>> @@ -1220,9 +1128,7 @@ static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
>> return rbd_start_aio(bs, offset, NULL, bytes, cb, opaque,
>> RBD_AIO_DISCARD);
>> }
>> -#endif
>>
>> -#ifdef LIBRBD_SUPPORTS_INVALIDATE
>> static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs,
>> Error **errp)
>> {
>> @@ -1232,7 +1138,6 @@ static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs,
>> error_setg_errno(errp, -r, "Failed to invalidate the cache");
>> }
>> }
>> -#endif
>>
>> static QemuOptsList qemu_rbd_create_opts = {
>> .name = "rbd-create-opts",
>> @@ -1290,23 +1195,14 @@ static BlockDriver bdrv_rbd = {
>> .bdrv_aio_preadv = qemu_rbd_aio_preadv,
>> .bdrv_aio_pwritev = qemu_rbd_aio_pwritev,
>>
>> -#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
>> .bdrv_aio_flush = qemu_rbd_aio_flush,
>> -#else
>> - .bdrv_co_flush_to_disk = qemu_rbd_co_flush,
>> -#endif
>> -
>> -#ifdef LIBRBD_SUPPORTS_DISCARD
>> .bdrv_aio_pdiscard = qemu_rbd_aio_pdiscard,
>> -#endif
>>
>> .bdrv_snapshot_create = qemu_rbd_snap_create,
>> .bdrv_snapshot_delete = qemu_rbd_snap_remove,
>> .bdrv_snapshot_list = qemu_rbd_snap_list,
>> .bdrv_snapshot_goto = qemu_rbd_snap_rollback,
>> -#ifdef LIBRBD_SUPPORTS_INVALIDATE
>> .bdrv_co_invalidate_cache = qemu_rbd_co_invalidate_cache,
>> -#endif
>>
>> .strong_runtime_opts = qemu_rbd_strong_runtime_opts,
>> };
>> diff --git a/meson.build b/meson.build
>> index 1559e8d873..644ef36476 100644
>> --- a/meson.build
>> +++ b/meson.build
>> @@ -721,13 +721,16 @@ if not get_option('rbd').auto() or have_block
>> int main(void) {
>> rados_t cluster;
>> rados_create(&cluster, NULL);
>> + #if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 12, 0)
>> + #error
> Hi Peter,
>
> Just a nit, but I would stick to the actual version of the library
> instead of the package version. librbd major version is 1, librados
> major version is 2 -- it shows up in ldd, when listing /usr/lib, etc.
> Something like
>
> #error librbd >= 1.12.0 required
>
> here
Okay.
>
>> + #endif
>> return 0;
>> }''', dependencies: [librbd, librados])
>> rbd = declare_dependency(dependencies: [librbd, librados])
>> elif get_option('rbd').enabled()
>> - error('could not link librados')
>> + error('librados/librbd >= 12.0.0 required')
> and
>
> error('could not link librados/librbd')
>
>> else
>> - warning('could not link librados, disabling')
>> + warning('librados/librbd >= 12.0.0 not found, disabling rbd support')
> warning('could not link librados/librbd, disabling')
>
> here to avoid repeating the version.
I would only like to change it if the "#error" is visible in the actual configure output. Have not testet your proposal yet.
If its visible I am fine to change it.
Peter
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH V3 1/6] block/rbd: bump librbd requirement to luminous release
2021-06-18 8:58 ` Peter Lieven
@ 2021-06-18 10:21 ` Ilya Dryomov
0 siblings, 0 replies; 22+ messages in thread
From: Ilya Dryomov @ 2021-06-18 10:21 UTC (permalink / raw)
To: Peter Lieven
Cc: kwolf, berrange, qemu-block, qemu-devel, ct, Paolo Bonzini, mreitz
On Fri, Jun 18, 2021 at 10:58 AM Peter Lieven <pl@kamp.de> wrote:
>
> Am 16.06.21 um 14:26 schrieb Ilya Dryomov:
> > On Wed, May 19, 2021 at 4:26 PM Peter Lieven <pl@kamp.de> wrote:
> >> even luminous (version 12.2) is unmaintained for over 3 years now.
> >> Bump the requirement to get rid of the ifdef'ry in the code.
> >> Qemu 6.1 dropped the support for RHEL-7 which was the last supported
> >> OS that required an older librbd.
> >>
> >> Signed-off-by: Peter Lieven <pl@kamp.de>
> >> ---
> >> block/rbd.c | 120 ++++------------------------------------------------
> >> meson.build | 7 ++-
> >> 2 files changed, 13 insertions(+), 114 deletions(-)
> >>
> >> diff --git a/block/rbd.c b/block/rbd.c
> >> index 26f64cce7c..6b1cbe1d75 100644
> >> --- a/block/rbd.c
> >> +++ b/block/rbd.c
> >> @@ -55,24 +55,10 @@
> >> * leading "\".
> >> */
> >>
> >> -/* rbd_aio_discard added in 0.1.2 */
> >> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
> >> -#define LIBRBD_SUPPORTS_DISCARD
> >> -#else
> >> -#undef LIBRBD_SUPPORTS_DISCARD
> >> -#endif
> >> -
> >> #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
> >>
> >> #define RBD_MAX_SNAPS 100
> >>
> >> -/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
> >> -#ifdef LIBRBD_SUPPORTS_IOVEC
> >> -#define LIBRBD_USE_IOVEC 1
> >> -#else
> >> -#define LIBRBD_USE_IOVEC 0
> >> -#endif
> >> -
> >> typedef enum {
> >> RBD_AIO_READ,
> >> RBD_AIO_WRITE,
> >> @@ -84,7 +70,6 @@ typedef struct RBDAIOCB {
> >> BlockAIOCB common;
> >> int64_t ret;
> >> QEMUIOVector *qiov;
> >> - char *bounce;
> >> RBDAIOCmd cmd;
> >> int error;
> >> struct BDRVRBDState *s;
> >> @@ -94,7 +79,6 @@ typedef struct RADOSCB {
> >> RBDAIOCB *acb;
> >> struct BDRVRBDState *s;
> >> int64_t size;
> >> - char *buf;
> >> int64_t ret;
> >> } RADOSCB;
> >>
> >> @@ -342,13 +326,9 @@ static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
> >>
> >> static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
> >> {
> >> - if (LIBRBD_USE_IOVEC) {
> >> - RBDAIOCB *acb = rcb->acb;
> >> - iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
> >> - acb->qiov->size - offs);
> >> - } else {
> >> - memset(rcb->buf + offs, 0, rcb->size - offs);
> >> - }
> >> + RBDAIOCB *acb = rcb->acb;
> >> + iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
> >> + acb->qiov->size - offs);
> >> }
> >>
> >> /* FIXME Deprecate and remove keypairs or make it available in QMP. */
> >> @@ -504,13 +484,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
> >>
> >> g_free(rcb);
> >>
> >> - if (!LIBRBD_USE_IOVEC) {
> >> - if (acb->cmd == RBD_AIO_READ) {
> >> - qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
> >> - }
> >> - qemu_vfree(acb->bounce);
> >> - }
> >> -
> >> acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
> >>
> >> qemu_aio_unref(acb);
> >> @@ -878,28 +851,6 @@ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
> >> rbd_finish_bh, rcb);
> >> }
> >>
> >> -static int rbd_aio_discard_wrapper(rbd_image_t image,
> >> - uint64_t off,
> >> - uint64_t len,
> >> - rbd_completion_t comp)
> >> -{
> >> -#ifdef LIBRBD_SUPPORTS_DISCARD
> >> - return rbd_aio_discard(image, off, len, comp);
> >> -#else
> >> - return -ENOTSUP;
> >> -#endif
> >> -}
> >> -
> >> -static int rbd_aio_flush_wrapper(rbd_image_t image,
> >> - rbd_completion_t comp)
> >> -{
> >> -#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
> >> - return rbd_aio_flush(image, comp);
> >> -#else
> >> - return -ENOTSUP;
> >> -#endif
> >> -}
> >> -
> >> static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> >> int64_t off,
> >> QEMUIOVector *qiov,
> >> @@ -922,21 +873,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> >>
> >> rcb = g_new(RADOSCB, 1);
> >>
> >> - if (!LIBRBD_USE_IOVEC) {
> >> - if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
> >> - acb->bounce = NULL;
> >> - } else {
> >> - acb->bounce = qemu_try_blockalign(bs, qiov->size);
> >> - if (acb->bounce == NULL) {
> >> - goto failed;
> >> - }
> >> - }
> >> - if (cmd == RBD_AIO_WRITE) {
> >> - qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
> >> - }
> >> - rcb->buf = acb->bounce;
> >> - }
> >> -
> >> acb->ret = 0;
> >> acb->error = 0;
> >> acb->s = s;
> >> @@ -950,7 +886,7 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> >> }
> >>
> >> switch (cmd) {
> >> - case RBD_AIO_WRITE: {
> >> + case RBD_AIO_WRITE:
> >> /*
> >> * RBD APIs don't allow us to write more than actual size, so in order
> >> * to support growing images, we resize the image before write
> >> @@ -962,25 +898,16 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> >> goto failed_completion;
> >> }
> >> }
> >> -#ifdef LIBRBD_SUPPORTS_IOVEC
> >> - r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
> >> -#else
> >> - r = rbd_aio_write(s->image, off, size, rcb->buf, c);
> >> -#endif
> >> + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
> >> break;
> >> - }
> >> case RBD_AIO_READ:
> >> -#ifdef LIBRBD_SUPPORTS_IOVEC
> >> - r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
> >> -#else
> >> - r = rbd_aio_read(s->image, off, size, rcb->buf, c);
> >> -#endif
> >> + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
> >> break;
> >> case RBD_AIO_DISCARD:
> >> - r = rbd_aio_discard_wrapper(s->image, off, size, c);
> >> + r = rbd_aio_discard(s->image, off, size, c);
> >> break;
> >> case RBD_AIO_FLUSH:
> >> - r = rbd_aio_flush_wrapper(s->image, c);
> >> + r = rbd_aio_flush(s->image, c);
> >> break;
> >> default:
> >> r = -EINVAL;
> >> @@ -995,9 +922,6 @@ failed_completion:
> >> rbd_aio_release(c);
> >> failed:
> >> g_free(rcb);
> >> - if (!LIBRBD_USE_IOVEC) {
> >> - qemu_vfree(acb->bounce);
> >> - }
> >>
> >> qemu_aio_unref(acb);
> >> return NULL;
> >> @@ -1023,7 +947,6 @@ static BlockAIOCB *qemu_rbd_aio_pwritev(BlockDriverState *bs,
> >> RBD_AIO_WRITE);
> >> }
> >>
> >> -#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
> >> static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
> >> BlockCompletionFunc *cb,
> >> void *opaque)
> >> @@ -1031,20 +954,6 @@ static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
> >> return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
> >> }
> >>
> >> -#else
> >> -
> >> -static int qemu_rbd_co_flush(BlockDriverState *bs)
> >> -{
> >> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1)
> >> - /* rbd_flush added in 0.1.1 */
> >> - BDRVRBDState *s = bs->opaque;
> >> - return rbd_flush(s->image);
> >> -#else
> >> - return 0;
> >> -#endif
> >> -}
> >> -#endif
> >> -
> >> static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
> >> {
> >> BDRVRBDState *s = bs->opaque;
> >> @@ -1210,7 +1119,6 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
> >> return snap_count;
> >> }
> >>
> >> -#ifdef LIBRBD_SUPPORTS_DISCARD
> >> static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
> >> int64_t offset,
> >> int bytes,
> >> @@ -1220,9 +1128,7 @@ static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
> >> return rbd_start_aio(bs, offset, NULL, bytes, cb, opaque,
> >> RBD_AIO_DISCARD);
> >> }
> >> -#endif
> >>
> >> -#ifdef LIBRBD_SUPPORTS_INVALIDATE
> >> static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs,
> >> Error **errp)
> >> {
> >> @@ -1232,7 +1138,6 @@ static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs,
> >> error_setg_errno(errp, -r, "Failed to invalidate the cache");
> >> }
> >> }
> >> -#endif
> >>
> >> static QemuOptsList qemu_rbd_create_opts = {
> >> .name = "rbd-create-opts",
> >> @@ -1290,23 +1195,14 @@ static BlockDriver bdrv_rbd = {
> >> .bdrv_aio_preadv = qemu_rbd_aio_preadv,
> >> .bdrv_aio_pwritev = qemu_rbd_aio_pwritev,
> >>
> >> -#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
> >> .bdrv_aio_flush = qemu_rbd_aio_flush,
> >> -#else
> >> - .bdrv_co_flush_to_disk = qemu_rbd_co_flush,
> >> -#endif
> >> -
> >> -#ifdef LIBRBD_SUPPORTS_DISCARD
> >> .bdrv_aio_pdiscard = qemu_rbd_aio_pdiscard,
> >> -#endif
> >>
> >> .bdrv_snapshot_create = qemu_rbd_snap_create,
> >> .bdrv_snapshot_delete = qemu_rbd_snap_remove,
> >> .bdrv_snapshot_list = qemu_rbd_snap_list,
> >> .bdrv_snapshot_goto = qemu_rbd_snap_rollback,
> >> -#ifdef LIBRBD_SUPPORTS_INVALIDATE
> >> .bdrv_co_invalidate_cache = qemu_rbd_co_invalidate_cache,
> >> -#endif
> >>
> >> .strong_runtime_opts = qemu_rbd_strong_runtime_opts,
> >> };
> >> diff --git a/meson.build b/meson.build
> >> index 1559e8d873..644ef36476 100644
> >> --- a/meson.build
> >> +++ b/meson.build
> >> @@ -721,13 +721,16 @@ if not get_option('rbd').auto() or have_block
> >> int main(void) {
> >> rados_t cluster;
> >> rados_create(&cluster, NULL);
> >> + #if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 12, 0)
> >> + #error
> > Hi Peter,
> >
> > Just a nit, but I would stick to the actual version of the library
> > instead of the package version. librbd major version is 1, librados
> > major version is 2 -- it shows up in ldd, when listing /usr/lib, etc.
> > Something like
> >
> > #error librbd >= 1.12.0 required
> >
> > here
>
>
> Okay.
>
>
> >
> >> + #endif
> >> return 0;
> >> }''', dependencies: [librbd, librados])
> >> rbd = declare_dependency(dependencies: [librbd, librados])
> >> elif get_option('rbd').enabled()
> >> - error('could not link librados')
> >> + error('librados/librbd >= 12.0.0 required')
> > and
> >
> > error('could not link librados/librbd')
> >
> >> else
> >> - warning('could not link librados, disabling')
> >> + warning('librados/librbd >= 12.0.0 not found, disabling rbd support')
> > warning('could not link librados/librbd, disabling')
> >
> > here to avoid repeating the version.
>
>
> I would only like to change it if the "#error" is visible in the actual configure output. Have not testet your proposal yet.
Looks like it's not, only in build/meson-logs/meson-log.txt.
In that case, just tweak the version numbers and since librados and
librbd have different versions, don't mention librados.
Thanks,
Ilya
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH V3 2/6] block/rbd: store object_size in BDRVRBDState
2021-05-19 14:23 [PATCH V3 0/6] block/rbd: migrate to coroutines and add write zeroes support Peter Lieven
2021-05-19 14:23 ` [PATCH V3 1/6] block/rbd: bump librbd requirement to luminous release Peter Lieven
@ 2021-05-19 14:23 ` Peter Lieven
2021-06-19 20:02 ` Ilya Dryomov
2021-05-19 14:23 ` [PATCH V3 3/6] block/rbd: update s->image_size in qemu_rbd_getlength Peter Lieven
` (3 subsequent siblings)
5 siblings, 1 reply; 22+ messages in thread
From: Peter Lieven @ 2021-05-19 14:23 UTC (permalink / raw)
To: qemu-block
Cc: kwolf, idryomov, berrange, Peter Lieven, qemu-devel, ct,
pbonzini, mreitz, dillaman
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/rbd.c | 18 +++++++-----------
1 file changed, 7 insertions(+), 11 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 6b1cbe1d75..b4caea4f1b 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -90,6 +90,7 @@ typedef struct BDRVRBDState {
char *snap;
char *namespace;
uint64_t image_size;
+ uint64_t object_size;
} BDRVRBDState;
static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
@@ -675,6 +676,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
const QDictEntry *e;
Error *local_err = NULL;
char *keypairs, *secretid;
+ rbd_image_info_t info;
int r;
keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs"));
@@ -739,13 +741,15 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
goto failed_open;
}
- r = rbd_get_size(s->image, &s->image_size);
+ r = rbd_stat(s->image, &info, sizeof(info));
if (r < 0) {
- error_setg_errno(errp, -r, "error getting image size from %s",
+ error_setg_errno(errp, -r, "error getting image info from %s",
s->image_name);
rbd_close(s->image);
goto failed_open;
}
+ s->image_size = info.size;
+ s->object_size = info.obj_size;
/* If we are using an rbd snapshot, we must be r/o, otherwise
* leave as-is */
@@ -957,15 +961,7 @@ static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
{
BDRVRBDState *s = bs->opaque;
- rbd_image_info_t info;
- int r;
-
- r = rbd_stat(s->image, &info, sizeof(info));
- if (r < 0) {
- return r;
- }
-
- bdi->cluster_size = info.obj_size;
+ bdi->cluster_size = s->object_size;
return 0;
}
--
2.17.1
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH V3 2/6] block/rbd: store object_size in BDRVRBDState
2021-05-19 14:23 ` [PATCH V3 2/6] block/rbd: store object_size in BDRVRBDState Peter Lieven
@ 2021-06-19 20:02 ` Ilya Dryomov
0 siblings, 0 replies; 22+ messages in thread
From: Ilya Dryomov @ 2021-06-19 20:02 UTC (permalink / raw)
To: Peter Lieven
Cc: kwolf, berrange, qemu-block, qemu-devel, ct, mreitz,
Paolo Bonzini, Ilya Dryomov, Jason Dillaman
On Wed, May 19, 2021 at 4:29 PM Peter Lieven <pl@kamp.de> wrote:
>
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
> block/rbd.c | 18 +++++++-----------
> 1 file changed, 7 insertions(+), 11 deletions(-)
>
> diff --git a/block/rbd.c b/block/rbd.c
> index 6b1cbe1d75..b4caea4f1b 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -90,6 +90,7 @@ typedef struct BDRVRBDState {
> char *snap;
> char *namespace;
> uint64_t image_size;
> + uint64_t object_size;
> } BDRVRBDState;
>
> static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
> @@ -675,6 +676,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
> const QDictEntry *e;
> Error *local_err = NULL;
> char *keypairs, *secretid;
> + rbd_image_info_t info;
> int r;
>
> keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs"));
> @@ -739,13 +741,15 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
> goto failed_open;
> }
>
> - r = rbd_get_size(s->image, &s->image_size);
> + r = rbd_stat(s->image, &info, sizeof(info));
> if (r < 0) {
> - error_setg_errno(errp, -r, "error getting image size from %s",
> + error_setg_errno(errp, -r, "error getting image info from %s",
> s->image_name);
> rbd_close(s->image);
> goto failed_open;
> }
> + s->image_size = info.size;
> + s->object_size = info.obj_size;
>
> /* If we are using an rbd snapshot, we must be r/o, otherwise
> * leave as-is */
> @@ -957,15 +961,7 @@ static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
> static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
> {
> BDRVRBDState *s = bs->opaque;
> - rbd_image_info_t info;
> - int r;
> -
> - r = rbd_stat(s->image, &info, sizeof(info));
> - if (r < 0) {
> - return r;
> - }
> -
> - bdi->cluster_size = info.obj_size;
> + bdi->cluster_size = s->object_size;
> return 0;
> }
>
> --
> 2.17.1
>
>
>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Thanks,
Ilya
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH V3 3/6] block/rbd: update s->image_size in qemu_rbd_getlength
2021-05-19 14:23 [PATCH V3 0/6] block/rbd: migrate to coroutines and add write zeroes support Peter Lieven
2021-05-19 14:23 ` [PATCH V3 1/6] block/rbd: bump librbd requirement to luminous release Peter Lieven
2021-05-19 14:23 ` [PATCH V3 2/6] block/rbd: store object_size in BDRVRBDState Peter Lieven
@ 2021-05-19 14:23 ` Peter Lieven
2021-06-19 19:57 ` Ilya Dryomov
2021-05-19 14:23 ` [PATCH V3 4/6] block/rbd: migrate from aio to coroutines Peter Lieven
` (2 subsequent siblings)
5 siblings, 1 reply; 22+ messages in thread
From: Peter Lieven @ 2021-05-19 14:23 UTC (permalink / raw)
To: qemu-block
Cc: kwolf, idryomov, berrange, Peter Lieven, qemu-devel, ct,
pbonzini, mreitz, dillaman
in case the image size changed we should adjust our internally stored size as well.
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/rbd.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/block/rbd.c b/block/rbd.c
index b4caea4f1b..97a2ae4c84 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -976,6 +976,7 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
return r;
}
+ s->image_size = info.size;
return info.size;
}
--
2.17.1
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH V3 3/6] block/rbd: update s->image_size in qemu_rbd_getlength
2021-05-19 14:23 ` [PATCH V3 3/6] block/rbd: update s->image_size in qemu_rbd_getlength Peter Lieven
@ 2021-06-19 19:57 ` Ilya Dryomov
0 siblings, 0 replies; 22+ messages in thread
From: Ilya Dryomov @ 2021-06-19 19:57 UTC (permalink / raw)
To: Peter Lieven
Cc: kwolf, berrange, qemu-block, qemu-devel, ct, Paolo Bonzini, mreitz
On Wed, May 19, 2021 at 4:26 PM Peter Lieven <pl@kamp.de> wrote:
>
> in case the image size changed we should adjust our internally stored size as well.
>
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
> block/rbd.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/block/rbd.c b/block/rbd.c
> index b4caea4f1b..97a2ae4c84 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -976,6 +976,7 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
> return r;
> }
>
> + s->image_size = info.size;
> return info.size;
Since you are touching this function might as well switch to
rbd_get_size() to put size directly into s->image_size and return
s->image_size, skipping rbd_image_info_t.
Thanks,
Ilya
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH V3 4/6] block/rbd: migrate from aio to coroutines
2021-05-19 14:23 [PATCH V3 0/6] block/rbd: migrate to coroutines and add write zeroes support Peter Lieven
` (2 preceding siblings ...)
2021-05-19 14:23 ` [PATCH V3 3/6] block/rbd: update s->image_size in qemu_rbd_getlength Peter Lieven
@ 2021-05-19 14:23 ` Peter Lieven
2021-06-17 14:43 ` Ilya Dryomov
2021-05-19 14:23 ` [PATCH V3 5/6] block/rbd: add write zeroes support Peter Lieven
2021-05-19 14:23 ` [PATCH V3 6/6] block/rbd: drop qemu_rbd_refresh_limits Peter Lieven
5 siblings, 1 reply; 22+ messages in thread
From: Peter Lieven @ 2021-05-19 14:23 UTC (permalink / raw)
To: qemu-block
Cc: kwolf, idryomov, berrange, Peter Lieven, qemu-devel, ct,
pbonzini, mreitz, dillaman
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/rbd.c | 255 ++++++++++++++++++----------------------------------
1 file changed, 87 insertions(+), 168 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 97a2ae4c84..0d8612a988 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -66,22 +66,6 @@ typedef enum {
RBD_AIO_FLUSH
} RBDAIOCmd;
-typedef struct RBDAIOCB {
- BlockAIOCB common;
- int64_t ret;
- QEMUIOVector *qiov;
- RBDAIOCmd cmd;
- int error;
- struct BDRVRBDState *s;
-} RBDAIOCB;
-
-typedef struct RADOSCB {
- RBDAIOCB *acb;
- struct BDRVRBDState *s;
- int64_t size;
- int64_t ret;
-} RADOSCB;
-
typedef struct BDRVRBDState {
rados_t cluster;
rados_ioctx_t io_ctx;
@@ -93,6 +77,13 @@ typedef struct BDRVRBDState {
uint64_t object_size;
} BDRVRBDState;
+typedef struct RBDTask {
+ BlockDriverState *bs;
+ Coroutine *co;
+ bool complete;
+ int64_t ret;
+} RBDTask;
+
static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
BlockdevOptionsRbd *opts, bool cache,
const char *keypairs, const char *secretid,
@@ -325,13 +316,6 @@ static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
return ret;
}
-static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
-{
- RBDAIOCB *acb = rcb->acb;
- iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
- acb->qiov->size - offs);
-}
-
/* FIXME Deprecate and remove keypairs or make it available in QMP. */
static int qemu_rbd_do_create(BlockdevCreateOptions *options,
const char *keypairs, const char *password_secret,
@@ -450,46 +434,6 @@ exit:
return ret;
}
-/*
- * This aio completion is being called from rbd_finish_bh() and runs in qemu
- * BH context.
- */
-static void qemu_rbd_complete_aio(RADOSCB *rcb)
-{
- RBDAIOCB *acb = rcb->acb;
- int64_t r;
-
- r = rcb->ret;
-
- if (acb->cmd != RBD_AIO_READ) {
- if (r < 0) {
- acb->ret = r;
- acb->error = 1;
- } else if (!acb->error) {
- acb->ret = rcb->size;
- }
- } else {
- if (r < 0) {
- qemu_rbd_memset(rcb, 0);
- acb->ret = r;
- acb->error = 1;
- } else if (r < rcb->size) {
- qemu_rbd_memset(rcb, r);
- if (!acb->error) {
- acb->ret = rcb->size;
- }
- } else if (!acb->error) {
- acb->ret = r;
- }
- }
-
- g_free(rcb);
-
- acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
-
- qemu_aio_unref(acb);
-}
-
static char *qemu_rbd_mon_host(BlockdevOptionsRbd *opts, Error **errp)
{
const char **vals;
@@ -826,89 +770,50 @@ static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size)
return 0;
}
-static const AIOCBInfo rbd_aiocb_info = {
- .aiocb_size = sizeof(RBDAIOCB),
-};
-
-static void rbd_finish_bh(void *opaque)
+static void qemu_rbd_finish_bh(void *opaque)
{
- RADOSCB *rcb = opaque;
- qemu_rbd_complete_aio(rcb);
+ RBDTask *task = opaque;
+ task->complete = 1;
+ aio_co_wake(task->co);
}
-/*
- * This is the callback function for rbd_aio_read and _write
- *
- * Note: this function is being called from a non qemu thread so
- * we need to be careful about what we do here. Generally we only
- * schedule a BH, and do the rest of the io completion handling
- * from rbd_finish_bh() which runs in a qemu context.
- */
-static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
+static void qemu_rbd_completion_cb(rbd_completion_t c, RBDTask *task)
{
- RBDAIOCB *acb = rcb->acb;
-
- rcb->ret = rbd_aio_get_return_value(c);
+ task->ret = rbd_aio_get_return_value(c);
rbd_aio_release(c);
-
- replay_bh_schedule_oneshot_event(bdrv_get_aio_context(acb->common.bs),
- rbd_finish_bh, rcb);
+ aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs),
+ qemu_rbd_finish_bh, task);
}
-static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
- int64_t off,
- QEMUIOVector *qiov,
- int64_t size,
- BlockCompletionFunc *cb,
- void *opaque,
- RBDAIOCmd cmd)
+static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
+ uint64_t offset,
+ uint64_t bytes,
+ QEMUIOVector *qiov,
+ int flags,
+ RBDAIOCmd cmd)
{
- RBDAIOCB *acb;
- RADOSCB *rcb = NULL;
+ BDRVRBDState *s = bs->opaque;
+ RBDTask task = { .bs = bs, .co = qemu_coroutine_self() };
rbd_completion_t c;
int r;
- BDRVRBDState *s = bs->opaque;
+ assert(!qiov || qiov->size == bytes);
- acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
- acb->cmd = cmd;
- acb->qiov = qiov;
- assert(!qiov || qiov->size == size);
-
- rcb = g_new(RADOSCB, 1);
-
- acb->ret = 0;
- acb->error = 0;
- acb->s = s;
-
- rcb->acb = acb;
- rcb->s = acb->s;
- rcb->size = size;
- r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
+ r = rbd_aio_create_completion(&task,
+ (rbd_callback_t) qemu_rbd_completion_cb, &c);
if (r < 0) {
- goto failed;
+ return r;
}
switch (cmd) {
- case RBD_AIO_WRITE:
- /*
- * RBD APIs don't allow us to write more than actual size, so in order
- * to support growing images, we resize the image before write
- * operations that exceed the current size.
- */
- if (off + size > s->image_size) {
- r = qemu_rbd_resize(bs, off + size);
- if (r < 0) {
- goto failed_completion;
- }
- }
- r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
- break;
case RBD_AIO_READ:
- r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
+ r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, offset, c);
+ break;
+ case RBD_AIO_WRITE:
+ r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, offset, c);
break;
case RBD_AIO_DISCARD:
- r = rbd_aio_discard(s->image, off, size, c);
+ r = rbd_aio_discard(s->image, offset, bytes, c);
break;
case RBD_AIO_FLUSH:
r = rbd_aio_flush(s->image, c);
@@ -918,44 +823,69 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
}
if (r < 0) {
- goto failed_completion;
+ error_report("rbd request failed early: cmd %d offset %" PRIu64
+ " bytes %" PRIu64 " flags %d r %d (%s)", cmd, offset,
+ bytes, flags, r, strerror(-r));
+ rbd_aio_release(c);
+ return r;
}
- return &acb->common;
-failed_completion:
- rbd_aio_release(c);
-failed:
- g_free(rcb);
+ while (!task.complete) {
+ qemu_coroutine_yield();
+ }
- qemu_aio_unref(acb);
- return NULL;
+ if (task.ret < 0) {
+ error_report("rbd request failed: cmd %d offset %" PRIu64 " bytes %"
+ PRIu64 " flags %d task.ret %" PRIi64 " (%s)", cmd, offset,
+ bytes, flags, task.ret, strerror(-task.ret));
+ return task.ret;
+ }
+
+ /* zero pad short reads */
+ if (cmd == RBD_AIO_READ && task.ret < qiov->size) {
+ qemu_iovec_memset(qiov, task.ret, 0, qiov->size - task.ret);
+ }
+
+ return 0;
+}
+
+static int
+coroutine_fn qemu_rbd_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
+{
+ return qemu_rbd_start_co(bs, offset, bytes, qiov, flags, RBD_AIO_READ);
}
-static BlockAIOCB *qemu_rbd_aio_preadv(BlockDriverState *bs,
- uint64_t offset, uint64_t bytes,
- QEMUIOVector *qiov, int flags,
- BlockCompletionFunc *cb,
- void *opaque)
+static int
+coroutine_fn qemu_rbd_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
- return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
- RBD_AIO_READ);
+ BDRVRBDState *s = bs->opaque;
+ /*
+ * RBD APIs don't allow us to write more than actual size, so in order
+ * to support growing images, we resize the image before write
+ * operations that exceed the current size.
+ */
+ if (offset + bytes > s->image_size) {
+ int r = qemu_rbd_resize(bs, offset + bytes);
+ if (r < 0) {
+ return r;
+ }
+ }
+ return qemu_rbd_start_co(bs, offset, bytes, qiov, flags, RBD_AIO_WRITE);
}
-static BlockAIOCB *qemu_rbd_aio_pwritev(BlockDriverState *bs,
- uint64_t offset, uint64_t bytes,
- QEMUIOVector *qiov, int flags,
- BlockCompletionFunc *cb,
- void *opaque)
+static int coroutine_fn qemu_rbd_co_flush(BlockDriverState *bs)
{
- return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
- RBD_AIO_WRITE);
+ return qemu_rbd_start_co(bs, 0, 0, NULL, 0, RBD_AIO_FLUSH);
}
-static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
- BlockCompletionFunc *cb,
- void *opaque)
+static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count)
{
- return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
+ return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD);
}
static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
@@ -1116,16 +1046,6 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
return snap_count;
}
-static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
- int64_t offset,
- int bytes,
- BlockCompletionFunc *cb,
- void *opaque)
-{
- return rbd_start_aio(bs, offset, NULL, bytes, cb, opaque,
- RBD_AIO_DISCARD);
-}
-
static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs,
Error **errp)
{
@@ -1189,11 +1109,10 @@ static BlockDriver bdrv_rbd = {
.bdrv_co_truncate = qemu_rbd_co_truncate,
.protocol_name = "rbd",
- .bdrv_aio_preadv = qemu_rbd_aio_preadv,
- .bdrv_aio_pwritev = qemu_rbd_aio_pwritev,
-
- .bdrv_aio_flush = qemu_rbd_aio_flush,
- .bdrv_aio_pdiscard = qemu_rbd_aio_pdiscard,
+ .bdrv_co_preadv = qemu_rbd_co_preadv,
+ .bdrv_co_pwritev = qemu_rbd_co_pwritev,
+ .bdrv_co_flush_to_disk = qemu_rbd_co_flush,
+ .bdrv_co_pdiscard = qemu_rbd_co_pdiscard,
.bdrv_snapshot_create = qemu_rbd_snap_create,
.bdrv_snapshot_delete = qemu_rbd_snap_remove,
--
2.17.1
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH V3 4/6] block/rbd: migrate from aio to coroutines
2021-05-19 14:23 ` [PATCH V3 4/6] block/rbd: migrate from aio to coroutines Peter Lieven
@ 2021-06-17 14:43 ` Ilya Dryomov
2021-06-18 9:07 ` Peter Lieven
0 siblings, 1 reply; 22+ messages in thread
From: Ilya Dryomov @ 2021-06-17 14:43 UTC (permalink / raw)
To: Peter Lieven
Cc: kwolf, berrange, qemu-block, qemu-devel, ct, Paolo Bonzini, mreitz
On Wed, May 19, 2021 at 4:27 PM Peter Lieven <pl@kamp.de> wrote:
>
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
> block/rbd.c | 255 ++++++++++++++++++----------------------------------
> 1 file changed, 87 insertions(+), 168 deletions(-)
>
> diff --git a/block/rbd.c b/block/rbd.c
> index 97a2ae4c84..0d8612a988 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -66,22 +66,6 @@ typedef enum {
> RBD_AIO_FLUSH
> } RBDAIOCmd;
>
> -typedef struct RBDAIOCB {
> - BlockAIOCB common;
> - int64_t ret;
> - QEMUIOVector *qiov;
> - RBDAIOCmd cmd;
> - int error;
> - struct BDRVRBDState *s;
> -} RBDAIOCB;
> -
> -typedef struct RADOSCB {
> - RBDAIOCB *acb;
> - struct BDRVRBDState *s;
> - int64_t size;
> - int64_t ret;
> -} RADOSCB;
> -
> typedef struct BDRVRBDState {
> rados_t cluster;
> rados_ioctx_t io_ctx;
> @@ -93,6 +77,13 @@ typedef struct BDRVRBDState {
> uint64_t object_size;
> } BDRVRBDState;
>
> +typedef struct RBDTask {
> + BlockDriverState *bs;
> + Coroutine *co;
> + bool complete;
> + int64_t ret;
> +} RBDTask;
> +
> static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
> BlockdevOptionsRbd *opts, bool cache,
> const char *keypairs, const char *secretid,
> @@ -325,13 +316,6 @@ static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
> return ret;
> }
>
> -static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
> -{
> - RBDAIOCB *acb = rcb->acb;
> - iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
> - acb->qiov->size - offs);
> -}
> -
> /* FIXME Deprecate and remove keypairs or make it available in QMP. */
> static int qemu_rbd_do_create(BlockdevCreateOptions *options,
> const char *keypairs, const char *password_secret,
> @@ -450,46 +434,6 @@ exit:
> return ret;
> }
>
> -/*
> - * This aio completion is being called from rbd_finish_bh() and runs in qemu
> - * BH context.
> - */
> -static void qemu_rbd_complete_aio(RADOSCB *rcb)
> -{
> - RBDAIOCB *acb = rcb->acb;
> - int64_t r;
> -
> - r = rcb->ret;
> -
> - if (acb->cmd != RBD_AIO_READ) {
> - if (r < 0) {
> - acb->ret = r;
> - acb->error = 1;
> - } else if (!acb->error) {
> - acb->ret = rcb->size;
> - }
> - } else {
> - if (r < 0) {
> - qemu_rbd_memset(rcb, 0);
> - acb->ret = r;
> - acb->error = 1;
> - } else if (r < rcb->size) {
> - qemu_rbd_memset(rcb, r);
> - if (!acb->error) {
> - acb->ret = rcb->size;
> - }
> - } else if (!acb->error) {
> - acb->ret = r;
> - }
> - }
> -
> - g_free(rcb);
> -
> - acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
> -
> - qemu_aio_unref(acb);
> -}
> -
> static char *qemu_rbd_mon_host(BlockdevOptionsRbd *opts, Error **errp)
> {
> const char **vals;
> @@ -826,89 +770,50 @@ static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size)
> return 0;
> }
>
> -static const AIOCBInfo rbd_aiocb_info = {
> - .aiocb_size = sizeof(RBDAIOCB),
> -};
> -
> -static void rbd_finish_bh(void *opaque)
> +static void qemu_rbd_finish_bh(void *opaque)
> {
> - RADOSCB *rcb = opaque;
> - qemu_rbd_complete_aio(rcb);
> + RBDTask *task = opaque;
> + task->complete = 1;
> + aio_co_wake(task->co);
> }
>
> -/*
> - * This is the callback function for rbd_aio_read and _write
> - *
> - * Note: this function is being called from a non qemu thread so
> - * we need to be careful about what we do here. Generally we only
> - * schedule a BH, and do the rest of the io completion handling
> - * from rbd_finish_bh() which runs in a qemu context.
> - */
I would adapt this comment instead of removing it. I mean, it is
still true and the reason for going through aio_bh_schedule_oneshot()
instead of calling aio_co_wake() directly, right?
> -static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
> +static void qemu_rbd_completion_cb(rbd_completion_t c, RBDTask *task)
> {
> - RBDAIOCB *acb = rcb->acb;
> -
> - rcb->ret = rbd_aio_get_return_value(c);
> + task->ret = rbd_aio_get_return_value(c);
> rbd_aio_release(c);
> -
> - replay_bh_schedule_oneshot_event(bdrv_get_aio_context(acb->common.bs),
> - rbd_finish_bh, rcb);
> + aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs),
> + qemu_rbd_finish_bh, task);
> }
>
> -static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> - int64_t off,
> - QEMUIOVector *qiov,
> - int64_t size,
> - BlockCompletionFunc *cb,
> - void *opaque,
> - RBDAIOCmd cmd)
> +static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
> + uint64_t offset,
> + uint64_t bytes,
> + QEMUIOVector *qiov,
> + int flags,
> + RBDAIOCmd cmd)
> {
> - RBDAIOCB *acb;
> - RADOSCB *rcb = NULL;
> + BDRVRBDState *s = bs->opaque;
> + RBDTask task = { .bs = bs, .co = qemu_coroutine_self() };
> rbd_completion_t c;
> int r;
>
> - BDRVRBDState *s = bs->opaque;
> + assert(!qiov || qiov->size == bytes);
>
> - acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
> - acb->cmd = cmd;
> - acb->qiov = qiov;
> - assert(!qiov || qiov->size == size);
> -
> - rcb = g_new(RADOSCB, 1);
> -
> - acb->ret = 0;
> - acb->error = 0;
> - acb->s = s;
> -
> - rcb->acb = acb;
> - rcb->s = acb->s;
> - rcb->size = size;
> - r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
> + r = rbd_aio_create_completion(&task,
> + (rbd_callback_t) qemu_rbd_completion_cb, &c);
> if (r < 0) {
> - goto failed;
> + return r;
> }
>
> switch (cmd) {
> - case RBD_AIO_WRITE:
> - /*
> - * RBD APIs don't allow us to write more than actual size, so in order
> - * to support growing images, we resize the image before write
> - * operations that exceed the current size.
> - */
> - if (off + size > s->image_size) {
> - r = qemu_rbd_resize(bs, off + size);
> - if (r < 0) {
> - goto failed_completion;
> - }
> - }
> - r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
> - break;
> case RBD_AIO_READ:
> - r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
> + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, offset, c);
> + break;
> + case RBD_AIO_WRITE:
> + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, offset, c);
> break;
> case RBD_AIO_DISCARD:
> - r = rbd_aio_discard(s->image, off, size, c);
> + r = rbd_aio_discard(s->image, offset, bytes, c);
> break;
> case RBD_AIO_FLUSH:
> r = rbd_aio_flush(s->image, c);
> @@ -918,44 +823,69 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> }
>
> if (r < 0) {
> - goto failed_completion;
> + error_report("rbd request failed early: cmd %d offset %" PRIu64
> + " bytes %" PRIu64 " flags %d r %d (%s)", cmd, offset,
> + bytes, flags, r, strerror(-r));
> + rbd_aio_release(c);
> + return r;
> }
> - return &acb->common;
>
> -failed_completion:
> - rbd_aio_release(c);
> -failed:
> - g_free(rcb);
> + while (!task.complete) {
> + qemu_coroutine_yield();
> + }
>
> - qemu_aio_unref(acb);
> - return NULL;
> + if (task.ret < 0) {
> + error_report("rbd request failed: cmd %d offset %" PRIu64 " bytes %"
> + PRIu64 " flags %d task.ret %" PRIi64 " (%s)", cmd, offset,
> + bytes, flags, task.ret, strerror(-task.ret));
> + return task.ret;
> + }
> +
> + /* zero pad short reads */
> + if (cmd == RBD_AIO_READ && task.ret < qiov->size) {
> + qemu_iovec_memset(qiov, task.ret, 0, qiov->size - task.ret);
I would use bytes instead of qiov->size here and on the previous
line. In many systems the iovec can be larger than the op and it
took me a couple of minutes to spot the "qiov->size == bytes" assert.
Also, previously we zeroed the entire op on read errors. I don't think
it matters but want to make sure this was dropped on purpose.
> + }
> +
> + return 0;
> +}
> +
> +static int
> +coroutine_fn qemu_rbd_co_preadv(BlockDriverState *bs, uint64_t offset,
> + uint64_t bytes, QEMUIOVector *qiov,
> + int flags)
> +{
> + return qemu_rbd_start_co(bs, offset, bytes, qiov, flags, RBD_AIO_READ);
> }
>
> -static BlockAIOCB *qemu_rbd_aio_preadv(BlockDriverState *bs,
> - uint64_t offset, uint64_t bytes,
> - QEMUIOVector *qiov, int flags,
> - BlockCompletionFunc *cb,
> - void *opaque)
> +static int
> +coroutine_fn qemu_rbd_co_pwritev(BlockDriverState *bs, uint64_t offset,
> + uint64_t bytes, QEMUIOVector *qiov,
> + int flags)
> {
> - return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
> - RBD_AIO_READ);
> + BDRVRBDState *s = bs->opaque;
> + /*
> + * RBD APIs don't allow us to write more than actual size, so in order
> + * to support growing images, we resize the image before write
> + * operations that exceed the current size.
> + */
> + if (offset + bytes > s->image_size) {
> + int r = qemu_rbd_resize(bs, offset + bytes);
> + if (r < 0) {
> + return r;
> + }
> + }
How can this be triggered today? qemu-io used to be able to, but that
support was removed a long time ago:
https://mail.gnu.org/archive/html/qemu-devel/2014-12/msg01592.html
Just checking that this piece of code is not vestigial at this point.
Thanks,
Ilya
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH V3 4/6] block/rbd: migrate from aio to coroutines
2021-06-17 14:43 ` Ilya Dryomov
@ 2021-06-18 9:07 ` Peter Lieven
2021-06-18 11:26 ` Ilya Dryomov
0 siblings, 1 reply; 22+ messages in thread
From: Peter Lieven @ 2021-06-18 9:07 UTC (permalink / raw)
To: Ilya Dryomov
Cc: kwolf, berrange, qemu-block, qemu-devel, ct, Paolo Bonzini, mreitz
Am 17.06.21 um 16:43 schrieb Ilya Dryomov:
> On Wed, May 19, 2021 at 4:27 PM Peter Lieven <pl@kamp.de> wrote:
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>> block/rbd.c | 255 ++++++++++++++++++----------------------------------
>> 1 file changed, 87 insertions(+), 168 deletions(-)
>>
>> diff --git a/block/rbd.c b/block/rbd.c
>> index 97a2ae4c84..0d8612a988 100644
>> --- a/block/rbd.c
>> +++ b/block/rbd.c
>> @@ -66,22 +66,6 @@ typedef enum {
>> RBD_AIO_FLUSH
>> } RBDAIOCmd;
>>
>> -typedef struct RBDAIOCB {
>> - BlockAIOCB common;
>> - int64_t ret;
>> - QEMUIOVector *qiov;
>> - RBDAIOCmd cmd;
>> - int error;
>> - struct BDRVRBDState *s;
>> -} RBDAIOCB;
>> -
>> -typedef struct RADOSCB {
>> - RBDAIOCB *acb;
>> - struct BDRVRBDState *s;
>> - int64_t size;
>> - int64_t ret;
>> -} RADOSCB;
>> -
>> typedef struct BDRVRBDState {
>> rados_t cluster;
>> rados_ioctx_t io_ctx;
>> @@ -93,6 +77,13 @@ typedef struct BDRVRBDState {
>> uint64_t object_size;
>> } BDRVRBDState;
>>
>> +typedef struct RBDTask {
>> + BlockDriverState *bs;
>> + Coroutine *co;
>> + bool complete;
>> + int64_t ret;
>> +} RBDTask;
>> +
>> static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
>> BlockdevOptionsRbd *opts, bool cache,
>> const char *keypairs, const char *secretid,
>> @@ -325,13 +316,6 @@ static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
>> return ret;
>> }
>>
>> -static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
>> -{
>> - RBDAIOCB *acb = rcb->acb;
>> - iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
>> - acb->qiov->size - offs);
>> -}
>> -
>> /* FIXME Deprecate and remove keypairs or make it available in QMP. */
>> static int qemu_rbd_do_create(BlockdevCreateOptions *options,
>> const char *keypairs, const char *password_secret,
>> @@ -450,46 +434,6 @@ exit:
>> return ret;
>> }
>>
>> -/*
>> - * This aio completion is being called from rbd_finish_bh() and runs in qemu
>> - * BH context.
>> - */
>> -static void qemu_rbd_complete_aio(RADOSCB *rcb)
>> -{
>> - RBDAIOCB *acb = rcb->acb;
>> - int64_t r;
>> -
>> - r = rcb->ret;
>> -
>> - if (acb->cmd != RBD_AIO_READ) {
>> - if (r < 0) {
>> - acb->ret = r;
>> - acb->error = 1;
>> - } else if (!acb->error) {
>> - acb->ret = rcb->size;
>> - }
>> - } else {
>> - if (r < 0) {
>> - qemu_rbd_memset(rcb, 0);
>> - acb->ret = r;
>> - acb->error = 1;
>> - } else if (r < rcb->size) {
>> - qemu_rbd_memset(rcb, r);
>> - if (!acb->error) {
>> - acb->ret = rcb->size;
>> - }
>> - } else if (!acb->error) {
>> - acb->ret = r;
>> - }
>> - }
>> -
>> - g_free(rcb);
>> -
>> - acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
>> -
>> - qemu_aio_unref(acb);
>> -}
>> -
>> static char *qemu_rbd_mon_host(BlockdevOptionsRbd *opts, Error **errp)
>> {
>> const char **vals;
>> @@ -826,89 +770,50 @@ static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size)
>> return 0;
>> }
>>
>> -static const AIOCBInfo rbd_aiocb_info = {
>> - .aiocb_size = sizeof(RBDAIOCB),
>> -};
>> -
>> -static void rbd_finish_bh(void *opaque)
>> +static void qemu_rbd_finish_bh(void *opaque)
>> {
>> - RADOSCB *rcb = opaque;
>> - qemu_rbd_complete_aio(rcb);
>> + RBDTask *task = opaque;
>> + task->complete = 1;
>> + aio_co_wake(task->co);
>> }
>>
>> -/*
>> - * This is the callback function for rbd_aio_read and _write
>> - *
>> - * Note: this function is being called from a non qemu thread so
>> - * we need to be careful about what we do here. Generally we only
>> - * schedule a BH, and do the rest of the io completion handling
>> - * from rbd_finish_bh() which runs in a qemu context.
>> - */
> I would adapt this comment instead of removing it. I mean, it is
> still true and the reason for going through aio_bh_schedule_oneshot()
> instead of calling aio_co_wake() directly, right?
Sure, its still right, but I think rbd is the only driver with this comment.
I can leave it in, no problem.
>
>> -static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
>> +static void qemu_rbd_completion_cb(rbd_completion_t c, RBDTask *task)
>> {
>> - RBDAIOCB *acb = rcb->acb;
>> -
>> - rcb->ret = rbd_aio_get_return_value(c);
>> + task->ret = rbd_aio_get_return_value(c);
>> rbd_aio_release(c);
>> -
>> - replay_bh_schedule_oneshot_event(bdrv_get_aio_context(acb->common.bs),
>> - rbd_finish_bh, rcb);
>> + aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs),
>> + qemu_rbd_finish_bh, task);
>> }
>>
>> -static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>> - int64_t off,
>> - QEMUIOVector *qiov,
>> - int64_t size,
>> - BlockCompletionFunc *cb,
>> - void *opaque,
>> - RBDAIOCmd cmd)
>> +static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
>> + uint64_t offset,
>> + uint64_t bytes,
>> + QEMUIOVector *qiov,
>> + int flags,
>> + RBDAIOCmd cmd)
>> {
>> - RBDAIOCB *acb;
>> - RADOSCB *rcb = NULL;
>> + BDRVRBDState *s = bs->opaque;
>> + RBDTask task = { .bs = bs, .co = qemu_coroutine_self() };
>> rbd_completion_t c;
>> int r;
>>
>> - BDRVRBDState *s = bs->opaque;
>> + assert(!qiov || qiov->size == bytes);
>>
>> - acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
>> - acb->cmd = cmd;
>> - acb->qiov = qiov;
>> - assert(!qiov || qiov->size == size);
>> -
>> - rcb = g_new(RADOSCB, 1);
>> -
>> - acb->ret = 0;
>> - acb->error = 0;
>> - acb->s = s;
>> -
>> - rcb->acb = acb;
>> - rcb->s = acb->s;
>> - rcb->size = size;
>> - r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
>> + r = rbd_aio_create_completion(&task,
>> + (rbd_callback_t) qemu_rbd_completion_cb, &c);
>> if (r < 0) {
>> - goto failed;
>> + return r;
>> }
>>
>> switch (cmd) {
>> - case RBD_AIO_WRITE:
>> - /*
>> - * RBD APIs don't allow us to write more than actual size, so in order
>> - * to support growing images, we resize the image before write
>> - * operations that exceed the current size.
>> - */
>> - if (off + size > s->image_size) {
>> - r = qemu_rbd_resize(bs, off + size);
>> - if (r < 0) {
>> - goto failed_completion;
>> - }
>> - }
>> - r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
>> - break;
>> case RBD_AIO_READ:
>> - r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
>> + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, offset, c);
>> + break;
>> + case RBD_AIO_WRITE:
>> + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, offset, c);
>> break;
>> case RBD_AIO_DISCARD:
>> - r = rbd_aio_discard(s->image, off, size, c);
>> + r = rbd_aio_discard(s->image, offset, bytes, c);
>> break;
>> case RBD_AIO_FLUSH:
>> r = rbd_aio_flush(s->image, c);
>> @@ -918,44 +823,69 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>> }
>>
>> if (r < 0) {
>> - goto failed_completion;
>> + error_report("rbd request failed early: cmd %d offset %" PRIu64
>> + " bytes %" PRIu64 " flags %d r %d (%s)", cmd, offset,
>> + bytes, flags, r, strerror(-r));
>> + rbd_aio_release(c);
>> + return r;
>> }
>> - return &acb->common;
>>
>> -failed_completion:
>> - rbd_aio_release(c);
>> -failed:
>> - g_free(rcb);
>> + while (!task.complete) {
>> + qemu_coroutine_yield();
>> + }
>>
>> - qemu_aio_unref(acb);
>> - return NULL;
>> + if (task.ret < 0) {
>> + error_report("rbd request failed: cmd %d offset %" PRIu64 " bytes %"
>> + PRIu64 " flags %d task.ret %" PRIi64 " (%s)", cmd, offset,
>> + bytes, flags, task.ret, strerror(-task.ret));
>> + return task.ret;
>> + }
>> +
>> + /* zero pad short reads */
>> + if (cmd == RBD_AIO_READ && task.ret < qiov->size) {
>> + qemu_iovec_memset(qiov, task.ret, 0, qiov->size - task.ret);
> I would use bytes instead of qiov->size here and on the previous
> line. In many systems the iovec can be larger than the op and it
> took me a couple of minutes to spot the "qiov->size == bytes" assert.
qemu_iovec_memset is an operation on the qiov. If one missed
the assertion one could argue that we memset beyond the size of the qiov
if we pass bytes - task.ret as length.
Is it true that there a systems which pass a bigger iovec than the actual op?
In this case the assertion would trigger there.
>
> Also, previously we zeroed the entire op on read errors. I don't think
> it matters but want to make sure this was dropped on purpose.
I dropped it because its not done in other drivers. If you feel that we could reveal sensitive information I can put the wiping back in.
>
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int
>> +coroutine_fn qemu_rbd_co_preadv(BlockDriverState *bs, uint64_t offset,
>> + uint64_t bytes, QEMUIOVector *qiov,
>> + int flags)
>> +{
>> + return qemu_rbd_start_co(bs, offset, bytes, qiov, flags, RBD_AIO_READ);
>> }
>>
>> -static BlockAIOCB *qemu_rbd_aio_preadv(BlockDriverState *bs,
>> - uint64_t offset, uint64_t bytes,
>> - QEMUIOVector *qiov, int flags,
>> - BlockCompletionFunc *cb,
>> - void *opaque)
>> +static int
>> +coroutine_fn qemu_rbd_co_pwritev(BlockDriverState *bs, uint64_t offset,
>> + uint64_t bytes, QEMUIOVector *qiov,
>> + int flags)
>> {
>> - return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
>> - RBD_AIO_READ);
>> + BDRVRBDState *s = bs->opaque;
>> + /*
>> + * RBD APIs don't allow us to write more than actual size, so in order
>> + * to support growing images, we resize the image before write
>> + * operations that exceed the current size.
>> + */
>> + if (offset + bytes > s->image_size) {
>> + int r = qemu_rbd_resize(bs, offset + bytes);
>> + if (r < 0) {
>> + return r;
>> + }
>> + }
> How can this be triggered today? qemu-io used to be able to, but that
> support was removed a long time ago:
>
> https://mail.gnu.org/archive/html/qemu-devel/2014-12/msg01592.html
>
> Just checking that this piece of code is not vestigial at this point.
We had this discussion before. The way to hit this if you want to create e.g. a qcow2 image on rbd. It does not make that much sense, but it was supported in the past.
I would vote for removing it, but it might break something somewhere.
Thanks,
Peter
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH V3 4/6] block/rbd: migrate from aio to coroutines
2021-06-18 9:07 ` Peter Lieven
@ 2021-06-18 11:26 ` Ilya Dryomov
0 siblings, 0 replies; 22+ messages in thread
From: Ilya Dryomov @ 2021-06-18 11:26 UTC (permalink / raw)
To: Peter Lieven
Cc: kwolf, berrange, qemu-block, qemu-devel, ct, Paolo Bonzini, mreitz
On Fri, Jun 18, 2021 at 11:07 AM Peter Lieven <pl@kamp.de> wrote:
>
> Am 17.06.21 um 16:43 schrieb Ilya Dryomov:
> > On Wed, May 19, 2021 at 4:27 PM Peter Lieven <pl@kamp.de> wrote:
> >> Signed-off-by: Peter Lieven <pl@kamp.de>
> >> ---
> >> block/rbd.c | 255 ++++++++++++++++++----------------------------------
> >> 1 file changed, 87 insertions(+), 168 deletions(-)
> >>
> >> diff --git a/block/rbd.c b/block/rbd.c
> >> index 97a2ae4c84..0d8612a988 100644
> >> --- a/block/rbd.c
> >> +++ b/block/rbd.c
> >> @@ -66,22 +66,6 @@ typedef enum {
> >> RBD_AIO_FLUSH
> >> } RBDAIOCmd;
> >>
> >> -typedef struct RBDAIOCB {
> >> - BlockAIOCB common;
> >> - int64_t ret;
> >> - QEMUIOVector *qiov;
> >> - RBDAIOCmd cmd;
> >> - int error;
> >> - struct BDRVRBDState *s;
> >> -} RBDAIOCB;
> >> -
> >> -typedef struct RADOSCB {
> >> - RBDAIOCB *acb;
> >> - struct BDRVRBDState *s;
> >> - int64_t size;
> >> - int64_t ret;
> >> -} RADOSCB;
> >> -
> >> typedef struct BDRVRBDState {
> >> rados_t cluster;
> >> rados_ioctx_t io_ctx;
> >> @@ -93,6 +77,13 @@ typedef struct BDRVRBDState {
> >> uint64_t object_size;
> >> } BDRVRBDState;
> >>
> >> +typedef struct RBDTask {
> >> + BlockDriverState *bs;
> >> + Coroutine *co;
> >> + bool complete;
> >> + int64_t ret;
> >> +} RBDTask;
> >> +
> >> static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
> >> BlockdevOptionsRbd *opts, bool cache,
> >> const char *keypairs, const char *secretid,
> >> @@ -325,13 +316,6 @@ static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
> >> return ret;
> >> }
> >>
> >> -static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
> >> -{
> >> - RBDAIOCB *acb = rcb->acb;
> >> - iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
> >> - acb->qiov->size - offs);
> >> -}
> >> -
> >> /* FIXME Deprecate and remove keypairs or make it available in QMP. */
> >> static int qemu_rbd_do_create(BlockdevCreateOptions *options,
> >> const char *keypairs, const char *password_secret,
> >> @@ -450,46 +434,6 @@ exit:
> >> return ret;
> >> }
> >>
> >> -/*
> >> - * This aio completion is being called from rbd_finish_bh() and runs in qemu
> >> - * BH context.
> >> - */
> >> -static void qemu_rbd_complete_aio(RADOSCB *rcb)
> >> -{
> >> - RBDAIOCB *acb = rcb->acb;
> >> - int64_t r;
> >> -
> >> - r = rcb->ret;
> >> -
> >> - if (acb->cmd != RBD_AIO_READ) {
> >> - if (r < 0) {
> >> - acb->ret = r;
> >> - acb->error = 1;
> >> - } else if (!acb->error) {
> >> - acb->ret = rcb->size;
> >> - }
> >> - } else {
> >> - if (r < 0) {
> >> - qemu_rbd_memset(rcb, 0);
> >> - acb->ret = r;
> >> - acb->error = 1;
> >> - } else if (r < rcb->size) {
> >> - qemu_rbd_memset(rcb, r);
> >> - if (!acb->error) {
> >> - acb->ret = rcb->size;
> >> - }
> >> - } else if (!acb->error) {
> >> - acb->ret = r;
> >> - }
> >> - }
> >> -
> >> - g_free(rcb);
> >> -
> >> - acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
> >> -
> >> - qemu_aio_unref(acb);
> >> -}
> >> -
> >> static char *qemu_rbd_mon_host(BlockdevOptionsRbd *opts, Error **errp)
> >> {
> >> const char **vals;
> >> @@ -826,89 +770,50 @@ static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size)
> >> return 0;
> >> }
> >>
> >> -static const AIOCBInfo rbd_aiocb_info = {
> >> - .aiocb_size = sizeof(RBDAIOCB),
> >> -};
> >> -
> >> -static void rbd_finish_bh(void *opaque)
> >> +static void qemu_rbd_finish_bh(void *opaque)
> >> {
> >> - RADOSCB *rcb = opaque;
> >> - qemu_rbd_complete_aio(rcb);
> >> + RBDTask *task = opaque;
> >> + task->complete = 1;
> >> + aio_co_wake(task->co);
> >> }
> >>
> >> -/*
> >> - * This is the callback function for rbd_aio_read and _write
> >> - *
> >> - * Note: this function is being called from a non qemu thread so
> >> - * we need to be careful about what we do here. Generally we only
> >> - * schedule a BH, and do the rest of the io completion handling
> >> - * from rbd_finish_bh() which runs in a qemu context.
> >> - */
> > I would adapt this comment instead of removing it. I mean, it is
> > still true and the reason for going through aio_bh_schedule_oneshot()
> > instead of calling aio_co_wake() directly, right?
>
>
> Sure, its still right, but I think rbd is the only driver with this comment.
>
> I can leave it in, no problem.
Yeah, just massage it a bit to reflect the reality (at least the
function name).
>
>
> >
> >> -static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
> >> +static void qemu_rbd_completion_cb(rbd_completion_t c, RBDTask *task)
> >> {
> >> - RBDAIOCB *acb = rcb->acb;
> >> -
> >> - rcb->ret = rbd_aio_get_return_value(c);
> >> + task->ret = rbd_aio_get_return_value(c);
> >> rbd_aio_release(c);
> >> -
> >> - replay_bh_schedule_oneshot_event(bdrv_get_aio_context(acb->common.bs),
> >> - rbd_finish_bh, rcb);
> >> + aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs),
> >> + qemu_rbd_finish_bh, task);
> >> }
> >>
> >> -static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> >> - int64_t off,
> >> - QEMUIOVector *qiov,
> >> - int64_t size,
> >> - BlockCompletionFunc *cb,
> >> - void *opaque,
> >> - RBDAIOCmd cmd)
> >> +static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
> >> + uint64_t offset,
> >> + uint64_t bytes,
> >> + QEMUIOVector *qiov,
> >> + int flags,
> >> + RBDAIOCmd cmd)
> >> {
> >> - RBDAIOCB *acb;
> >> - RADOSCB *rcb = NULL;
> >> + BDRVRBDState *s = bs->opaque;
> >> + RBDTask task = { .bs = bs, .co = qemu_coroutine_self() };
> >> rbd_completion_t c;
> >> int r;
> >>
> >> - BDRVRBDState *s = bs->opaque;
> >> + assert(!qiov || qiov->size == bytes);
> >>
> >> - acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
> >> - acb->cmd = cmd;
> >> - acb->qiov = qiov;
> >> - assert(!qiov || qiov->size == size);
> >> -
> >> - rcb = g_new(RADOSCB, 1);
> >> -
> >> - acb->ret = 0;
> >> - acb->error = 0;
> >> - acb->s = s;
> >> -
> >> - rcb->acb = acb;
> >> - rcb->s = acb->s;
> >> - rcb->size = size;
> >> - r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
> >> + r = rbd_aio_create_completion(&task,
> >> + (rbd_callback_t) qemu_rbd_completion_cb, &c);
> >> if (r < 0) {
> >> - goto failed;
> >> + return r;
> >> }
> >>
> >> switch (cmd) {
> >> - case RBD_AIO_WRITE:
> >> - /*
> >> - * RBD APIs don't allow us to write more than actual size, so in order
> >> - * to support growing images, we resize the image before write
> >> - * operations that exceed the current size.
> >> - */
> >> - if (off + size > s->image_size) {
> >> - r = qemu_rbd_resize(bs, off + size);
> >> - if (r < 0) {
> >> - goto failed_completion;
> >> - }
> >> - }
> >> - r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
> >> - break;
> >> case RBD_AIO_READ:
> >> - r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
> >> + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, offset, c);
> >> + break;
> >> + case RBD_AIO_WRITE:
> >> + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, offset, c);
> >> break;
> >> case RBD_AIO_DISCARD:
> >> - r = rbd_aio_discard(s->image, off, size, c);
> >> + r = rbd_aio_discard(s->image, offset, bytes, c);
> >> break;
> >> case RBD_AIO_FLUSH:
> >> r = rbd_aio_flush(s->image, c);
> >> @@ -918,44 +823,69 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> >> }
> >>
> >> if (r < 0) {
> >> - goto failed_completion;
> >> + error_report("rbd request failed early: cmd %d offset %" PRIu64
> >> + " bytes %" PRIu64 " flags %d r %d (%s)", cmd, offset,
> >> + bytes, flags, r, strerror(-r));
> >> + rbd_aio_release(c);
> >> + return r;
> >> }
> >> - return &acb->common;
> >>
> >> -failed_completion:
> >> - rbd_aio_release(c);
> >> -failed:
> >> - g_free(rcb);
> >> + while (!task.complete) {
> >> + qemu_coroutine_yield();
> >> + }
> >>
> >> - qemu_aio_unref(acb);
> >> - return NULL;
> >> + if (task.ret < 0) {
> >> + error_report("rbd request failed: cmd %d offset %" PRIu64 " bytes %"
> >> + PRIu64 " flags %d task.ret %" PRIi64 " (%s)", cmd, offset,
> >> + bytes, flags, task.ret, strerror(-task.ret));
> >> + return task.ret;
> >> + }
> >> +
> >> + /* zero pad short reads */
> >> + if (cmd == RBD_AIO_READ && task.ret < qiov->size) {
> >> + qemu_iovec_memset(qiov, task.ret, 0, qiov->size - task.ret);
> > I would use bytes instead of qiov->size here and on the previous
> > line. In many systems the iovec can be larger than the op and it
> > took me a couple of minutes to spot the "qiov->size == bytes" assert.
>
>
> qemu_iovec_memset is an operation on the qiov. If one missed
>
> the assertion one could argue that we memset beyond the size of the qiov
>
> if we pass bytes - task.ret as length.
Sorry, disregard -- I missed that rbd_aio_readv/writev() are specified
in terms of iovecs (i.e. iovec base + iovec count instead of iovec base +
number of bytes).
>
>
> Is it true that there a systems which pass a bigger iovec than the actual op?
Perhaps not in QEMU but it comes in many other places. For example
in the kernel when the block device driver gets a "bio" which it doesn't
own and should act only on a small piece of it, etc.
>
> In this case the assertion would trigger there.
>
>
> >
> > Also, previously we zeroed the entire op on read errors. I don't think
> > it matters but want to make sure this was dropped on purpose.
>
>
> I dropped it because its not done in other drivers. If you feel that we could reveal sensitive information I can put the wiping back in.
I don't think so and we don't do this in kernel rbd either.
>
>
> >
> >> + }
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +static int
> >> +coroutine_fn qemu_rbd_co_preadv(BlockDriverState *bs, uint64_t offset,
> >> + uint64_t bytes, QEMUIOVector *qiov,
> >> + int flags)
> >> +{
> >> + return qemu_rbd_start_co(bs, offset, bytes, qiov, flags, RBD_AIO_READ);
> >> }
> >>
> >> -static BlockAIOCB *qemu_rbd_aio_preadv(BlockDriverState *bs,
> >> - uint64_t offset, uint64_t bytes,
> >> - QEMUIOVector *qiov, int flags,
> >> - BlockCompletionFunc *cb,
> >> - void *opaque)
> >> +static int
> >> +coroutine_fn qemu_rbd_co_pwritev(BlockDriverState *bs, uint64_t offset,
> >> + uint64_t bytes, QEMUIOVector *qiov,
> >> + int flags)
> >> {
> >> - return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
> >> - RBD_AIO_READ);
> >> + BDRVRBDState *s = bs->opaque;
> >> + /*
> >> + * RBD APIs don't allow us to write more than actual size, so in order
> >> + * to support growing images, we resize the image before write
> >> + * operations that exceed the current size.
> >> + */
> >> + if (offset + bytes > s->image_size) {
> >> + int r = qemu_rbd_resize(bs, offset + bytes);
> >> + if (r < 0) {
> >> + return r;
> >> + }
> >> + }
> > How can this be triggered today? qemu-io used to be able to, but that
> > support was removed a long time ago:
> >
> > https://mail.gnu.org/archive/html/qemu-devel/2014-12/msg01592.html
> >
> > Just checking that this piece of code is not vestigial at this point.
>
>
> We had this discussion before. The way to hit this if you want to create e.g. a qcow2 image on rbd. It does not make that much sense, but it was supported in the past.
Ah, so something like
qemu-img create -f qcow2 rbd:foo/bar 10G
>
> I would vote for removing it, but it might break something somewhere.
Yeah, it's weird but definitely not up for removal in a patch that
migrates from one internal API to another.
Thanks,
Ilya
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH V3 5/6] block/rbd: add write zeroes support
2021-05-19 14:23 [PATCH V3 0/6] block/rbd: migrate to coroutines and add write zeroes support Peter Lieven
` (3 preceding siblings ...)
2021-05-19 14:23 ` [PATCH V3 4/6] block/rbd: migrate from aio to coroutines Peter Lieven
@ 2021-05-19 14:23 ` Peter Lieven
2021-06-16 12:34 ` Ilya Dryomov
2021-05-19 14:23 ` [PATCH V3 6/6] block/rbd: drop qemu_rbd_refresh_limits Peter Lieven
5 siblings, 1 reply; 22+ messages in thread
From: Peter Lieven @ 2021-05-19 14:23 UTC (permalink / raw)
To: qemu-block
Cc: kwolf, idryomov, berrange, Peter Lieven, qemu-devel, ct,
pbonzini, mreitz, dillaman
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/rbd.c | 37 ++++++++++++++++++++++++++++++++++++-
1 file changed, 36 insertions(+), 1 deletion(-)
diff --git a/block/rbd.c b/block/rbd.c
index 0d8612a988..ee13f08a74 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -63,7 +63,8 @@ typedef enum {
RBD_AIO_READ,
RBD_AIO_WRITE,
RBD_AIO_DISCARD,
- RBD_AIO_FLUSH
+ RBD_AIO_FLUSH,
+ RBD_AIO_WRITE_ZEROES
} RBDAIOCmd;
typedef struct BDRVRBDState {
@@ -705,6 +706,10 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
}
}
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+ bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
+#endif
+
/* When extending regular files, we get zeros from the OS */
bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
@@ -818,6 +823,18 @@ static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
case RBD_AIO_FLUSH:
r = rbd_aio_flush(s->image, c);
break;
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+ case RBD_AIO_WRITE_ZEROES: {
+ int zero_flags = 0;
+#ifdef RBD_WRITE_ZEROES_FLAG_THICK_PROVISION
+ if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+ zero_flags = RBD_WRITE_ZEROES_FLAG_THICK_PROVISION;
+ }
+#endif
+ r = rbd_aio_write_zeroes(s->image, offset, bytes, c, zero_flags, 0);
+ break;
+ }
+#endif
default:
r = -EINVAL;
}
@@ -888,6 +905,21 @@ static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs,
return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD);
}
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+static int
+coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+ int count, BdrvRequestFlags flags)
+{
+#ifndef RBD_WRITE_ZEROES_FLAG_THICK_PROVISION
+ if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+ return -ENOTSUP;
+ }
+#endif
+ return qemu_rbd_start_co(bs, offset, count, NULL, flags,
+ RBD_AIO_WRITE_ZEROES);
+}
+#endif
+
static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
{
BDRVRBDState *s = bs->opaque;
@@ -1113,6 +1145,9 @@ static BlockDriver bdrv_rbd = {
.bdrv_co_pwritev = qemu_rbd_co_pwritev,
.bdrv_co_flush_to_disk = qemu_rbd_co_flush,
.bdrv_co_pdiscard = qemu_rbd_co_pdiscard,
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+ .bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes,
+#endif
.bdrv_snapshot_create = qemu_rbd_snap_create,
.bdrv_snapshot_delete = qemu_rbd_snap_remove,
--
2.17.1
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH V3 5/6] block/rbd: add write zeroes support
2021-05-19 14:23 ` [PATCH V3 5/6] block/rbd: add write zeroes support Peter Lieven
@ 2021-06-16 12:34 ` Ilya Dryomov
2021-06-18 9:00 ` Peter Lieven
0 siblings, 1 reply; 22+ messages in thread
From: Ilya Dryomov @ 2021-06-16 12:34 UTC (permalink / raw)
To: Peter Lieven
Cc: kwolf, berrange, qemu-block, qemu-devel, ct, Paolo Bonzini, mreitz
On Wed, May 19, 2021 at 4:28 PM Peter Lieven <pl@kamp.de> wrote:
>
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
> block/rbd.c | 37 ++++++++++++++++++++++++++++++++++++-
> 1 file changed, 36 insertions(+), 1 deletion(-)
>
> diff --git a/block/rbd.c b/block/rbd.c
> index 0d8612a988..ee13f08a74 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -63,7 +63,8 @@ typedef enum {
> RBD_AIO_READ,
> RBD_AIO_WRITE,
> RBD_AIO_DISCARD,
> - RBD_AIO_FLUSH
> + RBD_AIO_FLUSH,
> + RBD_AIO_WRITE_ZEROES
> } RBDAIOCmd;
>
> typedef struct BDRVRBDState {
> @@ -705,6 +706,10 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
> }
> }
>
> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
I wonder if we should also set BDRV_REQ_NO_FALLBACK here since librbd
does not really have a notion of non-efficient explicit zeroing.
Thanks,
Ilya
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH V3 5/6] block/rbd: add write zeroes support
2021-06-16 12:34 ` Ilya Dryomov
@ 2021-06-18 9:00 ` Peter Lieven
2021-06-18 10:34 ` Ilya Dryomov
0 siblings, 1 reply; 22+ messages in thread
From: Peter Lieven @ 2021-06-18 9:00 UTC (permalink / raw)
To: Ilya Dryomov
Cc: kwolf, berrange, qemu-block, qemu-devel, ct, Paolo Bonzini, mreitz
Am 16.06.21 um 14:34 schrieb Ilya Dryomov:
> On Wed, May 19, 2021 at 4:28 PM Peter Lieven <pl@kamp.de> wrote:
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>> block/rbd.c | 37 ++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 36 insertions(+), 1 deletion(-)
>>
>> diff --git a/block/rbd.c b/block/rbd.c
>> index 0d8612a988..ee13f08a74 100644
>> --- a/block/rbd.c
>> +++ b/block/rbd.c
>> @@ -63,7 +63,8 @@ typedef enum {
>> RBD_AIO_READ,
>> RBD_AIO_WRITE,
>> RBD_AIO_DISCARD,
>> - RBD_AIO_FLUSH
>> + RBD_AIO_FLUSH,
>> + RBD_AIO_WRITE_ZEROES
>> } RBDAIOCmd;
>>
>> typedef struct BDRVRBDState {
>> @@ -705,6 +706,10 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
>> }
>> }
>>
>> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
>> + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
> I wonder if we should also set BDRV_REQ_NO_FALLBACK here since librbd
> does not really have a notion of non-efficient explicit zeroing.
This is only true if thick provisioning is supported which is in Octopus onwards, right?
So it would only be correct to set this if thick provisioning is supported otherwise we could
fail with ENOTSUP and then qemu emulates the zeroing with plain writes.
Peter
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH V3 5/6] block/rbd: add write zeroes support
2021-06-18 9:00 ` Peter Lieven
@ 2021-06-18 10:34 ` Ilya Dryomov
2021-06-21 8:49 ` Peter Lieven
0 siblings, 1 reply; 22+ messages in thread
From: Ilya Dryomov @ 2021-06-18 10:34 UTC (permalink / raw)
To: Peter Lieven
Cc: kwolf, berrange, qemu-block, qemu-devel, ct, Paolo Bonzini, mreitz
On Fri, Jun 18, 2021 at 11:00 AM Peter Lieven <pl@kamp.de> wrote:
>
> Am 16.06.21 um 14:34 schrieb Ilya Dryomov:
> > On Wed, May 19, 2021 at 4:28 PM Peter Lieven <pl@kamp.de> wrote:
> >> Signed-off-by: Peter Lieven <pl@kamp.de>
> >> ---
> >> block/rbd.c | 37 ++++++++++++++++++++++++++++++++++++-
> >> 1 file changed, 36 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/block/rbd.c b/block/rbd.c
> >> index 0d8612a988..ee13f08a74 100644
> >> --- a/block/rbd.c
> >> +++ b/block/rbd.c
> >> @@ -63,7 +63,8 @@ typedef enum {
> >> RBD_AIO_READ,
> >> RBD_AIO_WRITE,
> >> RBD_AIO_DISCARD,
> >> - RBD_AIO_FLUSH
> >> + RBD_AIO_FLUSH,
> >> + RBD_AIO_WRITE_ZEROES
> >> } RBDAIOCmd;
> >>
> >> typedef struct BDRVRBDState {
> >> @@ -705,6 +706,10 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
> >> }
> >> }
> >>
> >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> >> + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
> > I wonder if we should also set BDRV_REQ_NO_FALLBACK here since librbd
> > does not really have a notion of non-efficient explicit zeroing.
>
>
> This is only true if thick provisioning is supported which is in Octopus onwards, right?
Since Pacific, I think.
>
> So it would only be correct to set this if thick provisioning is supported otherwise we could
>
> fail with ENOTSUP and then qemu emulates the zeroing with plain writes.
I actually had a question about that. Why are you returning ENOTSUP
in case BDRV_REQ_MAY_UNMAP is not specified and that can't be fulfilled
because librbd is too old for RBD_WRITE_ZEROES_FLAG_THICK_PROVISION?
My understanding has always been that BDRV_REQ_MAY_UNMAP is just
a hint. Deallocating if BDRV_REQ_MAY_UNMAP is specified is not nice
but should be perfectly acceptable. It is certainly better than
returning ENOTSUP, particularly if ENOTSUP causes Qemu to do plain
zeroing.
Thanks,
Ilya
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH V3 5/6] block/rbd: add write zeroes support
2021-06-18 10:34 ` Ilya Dryomov
@ 2021-06-21 8:49 ` Peter Lieven
2021-06-26 15:56 ` Ilya Dryomov
0 siblings, 1 reply; 22+ messages in thread
From: Peter Lieven @ 2021-06-21 8:49 UTC (permalink / raw)
To: Ilya Dryomov
Cc: kwolf, berrange, qemu-block, qemu-devel, ct, Paolo Bonzini, mreitz
Am 18.06.21 um 12:34 schrieb Ilya Dryomov:
> On Fri, Jun 18, 2021 at 11:00 AM Peter Lieven <pl@kamp.de> wrote:
>> Am 16.06.21 um 14:34 schrieb Ilya Dryomov:
>>> On Wed, May 19, 2021 at 4:28 PM Peter Lieven <pl@kamp.de> wrote:
>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>> ---
>>>> block/rbd.c | 37 ++++++++++++++++++++++++++++++++++++-
>>>> 1 file changed, 36 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/block/rbd.c b/block/rbd.c
>>>> index 0d8612a988..ee13f08a74 100644
>>>> --- a/block/rbd.c
>>>> +++ b/block/rbd.c
>>>> @@ -63,7 +63,8 @@ typedef enum {
>>>> RBD_AIO_READ,
>>>> RBD_AIO_WRITE,
>>>> RBD_AIO_DISCARD,
>>>> - RBD_AIO_FLUSH
>>>> + RBD_AIO_FLUSH,
>>>> + RBD_AIO_WRITE_ZEROES
>>>> } RBDAIOCmd;
>>>>
>>>> typedef struct BDRVRBDState {
>>>> @@ -705,6 +706,10 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
>>>> }
>>>> }
>>>>
>>>> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
>>>> + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
>>> I wonder if we should also set BDRV_REQ_NO_FALLBACK here since librbd
>>> does not really have a notion of non-efficient explicit zeroing.
>>
>> This is only true if thick provisioning is supported which is in Octopus onwards, right?
> Since Pacific, I think.
>
>> So it would only be correct to set this if thick provisioning is supported otherwise we could
>>
>> fail with ENOTSUP and then qemu emulates the zeroing with plain writes.
> I actually had a question about that. Why are you returning ENOTSUP
> in case BDRV_REQ_MAY_UNMAP is not specified and that can't be fulfilled
> because librbd is too old for RBD_WRITE_ZEROES_FLAG_THICK_PROVISION?
>
> My understanding has always been that BDRV_REQ_MAY_UNMAP is just
> a hint. Deallocating if BDRV_REQ_MAY_UNMAP is specified is not nice
> but should be perfectly acceptable. It is certainly better than
> returning ENOTSUP, particularly if ENOTSUP causes Qemu to do plain
> zeroing.
I think this was introduced to support different provisioning modes. If BDRV_REQ_MAY_UNMAP is not set
the caller of bdrv_write_zeroes expects that the driver does thick provisioning. If the driver cannot handle that (efficiently)
qemu does a plain zero write.
I am still not fully understanding the meaning of the BDRV_REQ_NO_FALLBACK flag. The original commit states that it was introduced for qemu-img to efficiently
zero out the target and avoid the slow fallback. When I last worked on qemu-img convert I remember that there was a call to zero out the target if bdrv_has_zero_init
is not 1. It seems hat meanwhile a target_is_zero cmdline switch for qemu-img convert was added to let the user assert that a preexisting target is zero.
Maybe someone can help here if it would be right to set BDRV_REQ_NO_FALLBACK for rbd in either of the 2 cases (thick provisioning is support or not)?
Thanks
Peter
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH V3 5/6] block/rbd: add write zeroes support
2021-06-21 8:49 ` Peter Lieven
@ 2021-06-26 15:56 ` Ilya Dryomov
2021-06-27 19:29 ` Peter Lieven
0 siblings, 1 reply; 22+ messages in thread
From: Ilya Dryomov @ 2021-06-26 15:56 UTC (permalink / raw)
To: Peter Lieven
Cc: kwolf, Daniel P. Berrangé,
qemu-block, qemu-devel, ct, Paolo Bonzini, mreitz
On Mon, Jun 21, 2021 at 10:49 AM Peter Lieven <pl@kamp.de> wrote:
>
> Am 18.06.21 um 12:34 schrieb Ilya Dryomov:
> > On Fri, Jun 18, 2021 at 11:00 AM Peter Lieven <pl@kamp.de> wrote:
> >> Am 16.06.21 um 14:34 schrieb Ilya Dryomov:
> >>> On Wed, May 19, 2021 at 4:28 PM Peter Lieven <pl@kamp.de> wrote:
> >>>> Signed-off-by: Peter Lieven <pl@kamp.de>
> >>>> ---
> >>>> block/rbd.c | 37 ++++++++++++++++++++++++++++++++++++-
> >>>> 1 file changed, 36 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/block/rbd.c b/block/rbd.c
> >>>> index 0d8612a988..ee13f08a74 100644
> >>>> --- a/block/rbd.c
> >>>> +++ b/block/rbd.c
> >>>> @@ -63,7 +63,8 @@ typedef enum {
> >>>> RBD_AIO_READ,
> >>>> RBD_AIO_WRITE,
> >>>> RBD_AIO_DISCARD,
> >>>> - RBD_AIO_FLUSH
> >>>> + RBD_AIO_FLUSH,
> >>>> + RBD_AIO_WRITE_ZEROES
> >>>> } RBDAIOCmd;
> >>>>
> >>>> typedef struct BDRVRBDState {
> >>>> @@ -705,6 +706,10 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
> >>>> }
> >>>> }
> >>>>
> >>>> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> >>>> + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
> >>> I wonder if we should also set BDRV_REQ_NO_FALLBACK here since librbd
> >>> does not really have a notion of non-efficient explicit zeroing.
> >>
> >> This is only true if thick provisioning is supported which is in Octopus onwards, right?
> > Since Pacific, I think.
> >
> >> So it would only be correct to set this if thick provisioning is supported otherwise we could
> >>
> >> fail with ENOTSUP and then qemu emulates the zeroing with plain writes.
> > I actually had a question about that. Why are you returning ENOTSUP
> > in case BDRV_REQ_MAY_UNMAP is not specified and that can't be fulfilled
> > because librbd is too old for RBD_WRITE_ZEROES_FLAG_THICK_PROVISION?
> >
> > My understanding has always been that BDRV_REQ_MAY_UNMAP is just
> > a hint. Deallocating if BDRV_REQ_MAY_UNMAP is specified is not nice
> > but should be perfectly acceptable. It is certainly better than
> > returning ENOTSUP, particularly if ENOTSUP causes Qemu to do plain
> > zeroing.
>
>
> I think this was introduced to support different provisioning modes. If BDRV_REQ_MAY_UNMAP is not set
>
> the caller of bdrv_write_zeroes expects that the driver does thick provisioning. If the driver cannot handle that (efficiently)
>
> qemu does a plain zero write.
>
>
> I am still not fully understanding the meaning of the BDRV_REQ_NO_FALLBACK flag. The original commit states that it was introduced for qemu-img to efficiently
>
> zero out the target and avoid the slow fallback. When I last worked on qemu-img convert I remember that there was a call to zero out the target if bdrv_has_zero_init
>
> is not 1. It seems hat meanwhile a target_is_zero cmdline switch for qemu-img convert was added to let the user assert that a preexisting target is zero.
>
> Maybe someone can help here if it would be right to set BDRV_REQ_NO_FALLBACK for rbd in either of the 2 cases (thick provisioning is support or not)?
Since no one spoke up I think we should
a) remove the BDRV_REQ_MAY_UNMAP check in qemu_rbd_co_pwrite_zeroes()
and as a consequence always unmap if librbd is too old
It's not clear what qemu's expectation is but in general Write
Zeroes is allowed to unmap. The only guarantee is that subsequent
reads return zeroes, everything else is a hint. This is how it is
specified in the kernel and in the NVMe spec.
In particular, block/nvme.c implements it as follows:
if (flags & BDRV_REQ_MAY_UNMAP) {
cdw12 |= (1 << 25);
}
This sets the Deallocate bit. But if it's not set, the device may
still deallocate:
"""
If the Deallocate bit (CDW12.DEAC) is set to '1' in a Write Zeroes
command, and the namespace supports clearing all bytes to 0h in the
values read (e.g., bits 2:0 in the DLFEAT field are set to 001b)
from a deallocated logical block and its metadata (excluding
protection information), then for each specified logical block, the
controller:
- should deallocate that logical block;
...
If the Deallocate bit is cleared to '0' in a Write Zeroes command,
and the namespace supports clearing all bytes to 0h in the values
read (e.g., bits 2:0 in the DLFEAT field are set to 001b) from
a deallocated logical block and its metadata (excluding protection
information), then, for each specified logical block, the
controller:
- may deallocate that logical block;
"""
https://nvmexpress.org/wp-content/uploads/NVM-Express-NVM-Command-Set-Specification-2021.06.02-Ratified-1.pdf
b) set BDRV_REQ_NO_FALLBACK in supported_zero_flags
Again, it's not clear what qemu expects here, but without it we end
up in a ridiculous situation where specifying the "don't allow slow
fallback" switch immediately fails all efficient zeroing requests on
a device where Write Zeroes is always efficient:
$ qemu-io -c 'help write' | grep -- '-[zun]'
-n, -- with -z, don't allow slow fallback
-u, -- with -z, allow unmapping
-z, -- write zeroes using blk_co_pwrite_zeroes
$ qemu-io -f rbd -c 'write -z -u -n 0 1M' rbd:foo/bar
write failed: Operation not supported
Thanks,
Ilya
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH V3 5/6] block/rbd: add write zeroes support
2021-06-26 15:56 ` Ilya Dryomov
@ 2021-06-27 19:29 ` Peter Lieven
0 siblings, 0 replies; 22+ messages in thread
From: Peter Lieven @ 2021-06-27 19:29 UTC (permalink / raw)
To: Ilya Dryomov
Cc: kwolf, Daniel P. Berrangé ,
qemu-block, qemu-devel, ct, Paolo Bonzini, mreitz
> Am 26.06.2021 um 17:57 schrieb Ilya Dryomov <idryomov@gmail.com>:
>
> On Mon, Jun 21, 2021 at 10:49 AM Peter Lieven <pl@kamp.de> wrote:
>>
>>> Am 18.06.21 um 12:34 schrieb Ilya Dryomov:
>>> On Fri, Jun 18, 2021 at 11:00 AM Peter Lieven <pl@kamp.de> wrote:
>>>> Am 16.06.21 um 14:34 schrieb Ilya Dryomov:
>>>>> On Wed, May 19, 2021 at 4:28 PM Peter Lieven <pl@kamp.de> wrote:
>>>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>>>> ---
>>>>>> block/rbd.c | 37 ++++++++++++++++++++++++++++++++++++-
>>>>>> 1 file changed, 36 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/block/rbd.c b/block/rbd.c
>>>>>> index 0d8612a988..ee13f08a74 100644
>>>>>> --- a/block/rbd.c
>>>>>> +++ b/block/rbd.c
>>>>>> @@ -63,7 +63,8 @@ typedef enum {
>>>>>> RBD_AIO_READ,
>>>>>> RBD_AIO_WRITE,
>>>>>> RBD_AIO_DISCARD,
>>>>>> - RBD_AIO_FLUSH
>>>>>> + RBD_AIO_FLUSH,
>>>>>> + RBD_AIO_WRITE_ZEROES
>>>>>> } RBDAIOCmd;
>>>>>>
>>>>>> typedef struct BDRVRBDState {
>>>>>> @@ -705,6 +706,10 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
>>>>>> }
>>>>>> }
>>>>>>
>>>>>> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
>>>>>> + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
>>>>> I wonder if we should also set BDRV_REQ_NO_FALLBACK here since librbd
>>>>> does not really have a notion of non-efficient explicit zeroing.
>>>>
>>>> This is only true if thick provisioning is supported which is in Octopus onwards, right?
>>> Since Pacific, I think.
>>>
>>>> So it would only be correct to set this if thick provisioning is supported otherwise we could
>>>>
>>>> fail with ENOTSUP and then qemu emulates the zeroing with plain writes.
>>> I actually had a question about that. Why are you returning ENOTSUP
>>> in case BDRV_REQ_MAY_UNMAP is not specified and that can't be fulfilled
>>> because librbd is too old for RBD_WRITE_ZEROES_FLAG_THICK_PROVISION?
>>>
>>> My understanding has always been that BDRV_REQ_MAY_UNMAP is just
>>> a hint. Deallocating if BDRV_REQ_MAY_UNMAP is specified is not nice
>>> but should be perfectly acceptable. It is certainly better than
>>> returning ENOTSUP, particularly if ENOTSUP causes Qemu to do plain
>>> zeroing.
>>
>>
>> I think this was introduced to support different provisioning modes. If BDRV_REQ_MAY_UNMAP is not set
>>
>> the caller of bdrv_write_zeroes expects that the driver does thick provisioning. If the driver cannot handle that (efficiently)
>>
>> qemu does a plain zero write.
>>
>>
>> I am still not fully understanding the meaning of the BDRV_REQ_NO_FALLBACK flag. The original commit states that it was introduced for qemu-img to efficiently
>>
>> zero out the target and avoid the slow fallback. When I last worked on qemu-img convert I remember that there was a call to zero out the target if bdrv_has_zero_init
>>
>> is not 1. It seems hat meanwhile a target_is_zero cmdline switch for qemu-img convert was added to let the user assert that a preexisting target is zero.
>>
>> Maybe someone can help here if it would be right to set BDRV_REQ_NO_FALLBACK for rbd in either of the 2 cases (thick provisioning is support or not)?
>
> Since no one spoke up I think we should
>
> a) remove the BDRV_REQ_MAY_UNMAP check in qemu_rbd_co_pwrite_zeroes()
> and as a consequence always unmap if librbd is too old
>
> It's not clear what qemu's expectation is but in general Write
> Zeroes is allowed to unmap. The only guarantee is that subsequent
> reads return zeroes, everything else is a hint. This is how it is
> specified in the kernel and in the NVMe spec.
>
> In particular, block/nvme.c implements it as follows:
>
> if (flags & BDRV_REQ_MAY_UNMAP) {
> cdw12 |= (1 << 25);
> }
>
> This sets the Deallocate bit. But if it's not set, the device may
> still deallocate:
>
> """
> If the Deallocate bit (CDW12.DEAC) is set to '1' in a Write Zeroes
> command, and the namespace supports clearing all bytes to 0h in the
> values read (e.g., bits 2:0 in the DLFEAT field are set to 001b)
> from a deallocated logical block and its metadata (excluding
> protection information), then for each specified logical block, the
> controller:
> - should deallocate that logical block;
>
> ...
>
> If the Deallocate bit is cleared to '0' in a Write Zeroes command,
> and the namespace supports clearing all bytes to 0h in the values
> read (e.g., bits 2:0 in the DLFEAT field are set to 001b) from
> a deallocated logical block and its metadata (excluding protection
> information), then, for each specified logical block, the
> controller:
> - may deallocate that logical block;
> """
>
> https://nvmexpress.org/wp-content/uploads/NVM-Express-NVM-Command-Set-Specification-2021.06.02-Ratified-1.pdf
>
> b) set BDRV_REQ_NO_FALLBACK in supported_zero_flags
>
> Again, it's not clear what qemu expects here, but without it we end
> up in a ridiculous situation where specifying the "don't allow slow
> fallback" switch immediately fails all efficient zeroing requests on
> a device where Write Zeroes is always efficient:
>
> $ qemu-io -c 'help write' | grep -- '-[zun]'
> -n, -- with -z, don't allow slow fallback
> -u, -- with -z, allow unmapping
> -z, -- write zeroes using blk_co_pwrite_zeroes
>
> $ qemu-io -f rbd -c 'write -z -u -n 0 1M' rbd:foo/bar
> write failed: Operation not supported
Agreed,
I will try to send a V4 early this week.
Peter
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH V3 6/6] block/rbd: drop qemu_rbd_refresh_limits
2021-05-19 14:23 [PATCH V3 0/6] block/rbd: migrate to coroutines and add write zeroes support Peter Lieven
` (4 preceding siblings ...)
2021-05-19 14:23 ` [PATCH V3 5/6] block/rbd: add write zeroes support Peter Lieven
@ 2021-05-19 14:23 ` Peter Lieven
2021-06-19 20:10 ` Ilya Dryomov
5 siblings, 1 reply; 22+ messages in thread
From: Peter Lieven @ 2021-05-19 14:23 UTC (permalink / raw)
To: qemu-block
Cc: kwolf, idryomov, berrange, Peter Lieven, qemu-devel, ct,
pbonzini, mreitz, dillaman
librbd supports 1 byte alignment for all aio operations.
Currently, there is no API call to query limits from the ceph backend.
So drop the bdrv_refresh_limits completely until there is such an API call.
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/rbd.c | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index ee13f08a74..368a674aa0 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -228,14 +228,6 @@ done:
return;
}
-
-static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
-{
- /* XXX Does RBD support AIO on less than 512-byte alignment? */
- bs->bl.request_alignment = 512;
-}
-
-
static int qemu_rbd_set_auth(rados_t cluster, BlockdevOptionsRbd *opts,
Error **errp)
{
@@ -1128,7 +1120,6 @@ static BlockDriver bdrv_rbd = {
.format_name = "rbd",
.instance_size = sizeof(BDRVRBDState),
.bdrv_parse_filename = qemu_rbd_parse_filename,
- .bdrv_refresh_limits = qemu_rbd_refresh_limits,
.bdrv_file_open = qemu_rbd_open,
.bdrv_close = qemu_rbd_close,
.bdrv_reopen_prepare = qemu_rbd_reopen_prepare,
--
2.17.1
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH V3 6/6] block/rbd: drop qemu_rbd_refresh_limits
2021-05-19 14:23 ` [PATCH V3 6/6] block/rbd: drop qemu_rbd_refresh_limits Peter Lieven
@ 2021-06-19 20:10 ` Ilya Dryomov
0 siblings, 0 replies; 22+ messages in thread
From: Ilya Dryomov @ 2021-06-19 20:10 UTC (permalink / raw)
To: Peter Lieven
Cc: kwolf, berrange, qemu-block, qemu-devel, ct, Paolo Bonzini, mreitz
On Wed, May 19, 2021 at 4:26 PM Peter Lieven <pl@kamp.de> wrote:
>
> librbd supports 1 byte alignment for all aio operations.
>
> Currently, there is no API call to query limits from the ceph backend.
> So drop the bdrv_refresh_limits completely until there is such an API call.
>
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
> block/rbd.c | 9 ---------
> 1 file changed, 9 deletions(-)
>
> diff --git a/block/rbd.c b/block/rbd.c
> index ee13f08a74..368a674aa0 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -228,14 +228,6 @@ done:
> return;
> }
>
> -
> -static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
> -{
> - /* XXX Does RBD support AIO on less than 512-byte alignment? */
> - bs->bl.request_alignment = 512;
> -}
> -
> -
> static int qemu_rbd_set_auth(rados_t cluster, BlockdevOptionsRbd *opts,
> Error **errp)
> {
> @@ -1128,7 +1120,6 @@ static BlockDriver bdrv_rbd = {
> .format_name = "rbd",
> .instance_size = sizeof(BDRVRBDState),
> .bdrv_parse_filename = qemu_rbd_parse_filename,
> - .bdrv_refresh_limits = qemu_rbd_refresh_limits,
> .bdrv_file_open = qemu_rbd_open,
> .bdrv_close = qemu_rbd_close,
> .bdrv_reopen_prepare = qemu_rbd_reopen_prepare,
> --
> 2.17.1
>
>
>
librbd does support 1-byte-aligned I/O (with the caveat that those
code paths are probably not very well tested). Regardless, I think
it is better to do read-modify-write and similar handling in librbd
than in QEMU.
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Thanks,
Ilya
^ permalink raw reply [flat|nested] 22+ messages in thread