* [Qemu-devel] [PATCH v2 1/6] block: Byte-based bdrv_co_do_copy_on_readv()
2016-06-14 13:32 [Qemu-devel] [PATCH v2 0/6] block: Enable byte granularity I/O Kevin Wolf
@ 2016-06-14 13:32 ` Kevin Wolf
2016-06-14 13:32 ` [Qemu-devel] [PATCH v2 2/6] block: Prepare bdrv_aligned_preadv() for byte-aligned requests Kevin Wolf
` (5 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: Kevin Wolf @ 2016-06-14 13:32 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, stefanha, famz, eblake, mreitz, qemu-devel
In a first step to convert the common I/O path to work on bytes rather
than sectors, this converts the copy-on-read logic that is used by
bdrv_aligned_preadv().
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/io.c | 63 +++++++++++++++++++++++++++++++--------------------
block/mirror.c | 10 ++++----
include/block/block.h | 10 +++++---
trace-events | 2 +-
4 files changed, 52 insertions(+), 33 deletions(-)
diff --git a/block/io.c b/block/io.c
index 5b2017f..b6a2c80 100644
--- a/block/io.c
+++ b/block/io.c
@@ -404,12 +404,12 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
}
/**
- * Round a region to cluster boundaries
+ * Round a region to cluster boundaries (sector-based)
*/
-void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int64_t *cluster_sector_num,
- int *cluster_nb_sectors)
+void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors)
{
BlockDriverInfo bdi;
@@ -424,6 +424,26 @@ void bdrv_round_to_clusters(BlockDriverState *bs,
}
}
+/**
+ * Round a region to cluster boundaries
+ */
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t offset, unsigned int bytes,
+ int64_t *cluster_offset,
+ unsigned int *cluster_bytes)
+{
+ BlockDriverInfo bdi;
+
+ if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
+ *cluster_offset = offset;
+ *cluster_bytes = bytes;
+ } else {
+ int64_t c = bdi.cluster_size;
+ *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
+ *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
+ }
+}
+
static int bdrv_get_cluster_size(BlockDriverState *bs)
{
BlockDriverInfo bdi;
@@ -865,7 +885,7 @@ emulate_flags:
}
static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
{
/* Perform I/O through a temporary buffer so that users who scribble over
* their read buffer while the operation is in progress do not end up
@@ -877,21 +897,20 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
BlockDriver *drv = bs->drv;
struct iovec iov;
QEMUIOVector bounce_qiov;
- int64_t cluster_sector_num;
- int cluster_nb_sectors;
+ int64_t cluster_offset;
+ unsigned int cluster_bytes;
size_t skip_bytes;
int ret;
/* Cover entire cluster so no additional backing file I/O is required when
* allocating cluster in the image file.
*/
- bdrv_round_to_clusters(bs, sector_num, nb_sectors,
- &cluster_sector_num, &cluster_nb_sectors);
+ bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
- trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
- cluster_sector_num, cluster_nb_sectors);
+ trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
+ cluster_offset, cluster_bytes);
- iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
+ iov.iov_len = cluster_bytes;
iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
if (bounce_buffer == NULL) {
ret = -ENOMEM;
@@ -900,8 +919,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
- ret = bdrv_driver_preadv(bs, cluster_sector_num * BDRV_SECTOR_SIZE,
- cluster_nb_sectors * BDRV_SECTOR_SIZE,
+ ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes,
&bounce_qiov, 0);
if (ret < 0) {
goto err;
@@ -909,16 +927,12 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
if (drv->bdrv_co_pwrite_zeroes &&
buffer_is_zero(bounce_buffer, iov.iov_len)) {
- ret = bdrv_co_do_pwrite_zeroes(bs,
- cluster_sector_num * BDRV_SECTOR_SIZE,
- cluster_nb_sectors * BDRV_SECTOR_SIZE,
- 0);
+ ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0);
} else {
/* This does not change the data on the disk, it is not necessary
* to flush even in cache=writethrough mode.
*/
- ret = bdrv_driver_pwritev(bs, cluster_sector_num * BDRV_SECTOR_SIZE,
- cluster_nb_sectors * BDRV_SECTOR_SIZE,
+ ret = bdrv_driver_pwritev(bs, cluster_offset, cluster_bytes,
&bounce_qiov, 0);
}
@@ -930,9 +944,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
goto err;
}
- skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
- qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
- nb_sectors * BDRV_SECTOR_SIZE);
+ skip_bytes = offset - cluster_offset;
+ qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, bytes);
err:
qemu_vfree(bounce_buffer);
@@ -982,7 +995,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
if (!ret || pnum != nb_sectors) {
- ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
+ ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
goto out;
}
}
diff --git a/block/mirror.c b/block/mirror.c
index 1f01f24..41848b2 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -185,8 +185,9 @@ static int mirror_cow_align(MirrorBlockJob *s,
need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
s->cow_bitmap);
if (need_cow) {
- bdrv_round_to_clusters(blk_bs(s->target), *sector_num, *nb_sectors,
- &align_sector_num, &align_nb_sectors);
+ bdrv_round_sectors_to_clusters(blk_bs(s->target), *sector_num,
+ *nb_sectors, &align_sector_num,
+ &align_nb_sectors);
}
if (align_nb_sectors > max_sectors) {
@@ -384,8 +385,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
} else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
int64_t target_sector_num;
int target_nb_sectors;
- bdrv_round_to_clusters(blk_bs(s->target), sector_num, io_sectors,
- &target_sector_num, &target_nb_sectors);
+ bdrv_round_sectors_to_clusters(blk_bs(s->target), sector_num,
+ io_sectors, &target_sector_num,
+ &target_nb_sectors);
if (target_sector_num == sector_num &&
target_nb_sectors == io_sectors) {
mirror_method = ret & BDRV_BLOCK_ZERO ?
diff --git a/include/block/block.h b/include/block/block.h
index 8cabcdd..9c3a62c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -404,10 +404,14 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs);
+void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors);
void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int64_t *cluster_sector_num,
- int *cluster_nb_sectors);
+ int64_t offset, unsigned int bytes,
+ int64_t *cluster_offset,
+ unsigned int *cluster_bytes);
const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
void bdrv_get_backing_filename(BlockDriverState *bs,
diff --git a/trace-events b/trace-events
index 9891120..c0e586f 100644
--- a/trace-events
+++ b/trace-events
@@ -73,7 +73,7 @@ bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs
bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags %#x"
-bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d"
+bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %u"
# block/stream.c
stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
--
1.8.3.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH v2 2/6] block: Prepare bdrv_aligned_preadv() for byte-aligned requests
2016-06-14 13:32 [Qemu-devel] [PATCH v2 0/6] block: Enable byte granularity I/O Kevin Wolf
2016-06-14 13:32 ` [Qemu-devel] [PATCH v2 1/6] block: Byte-based bdrv_co_do_copy_on_readv() Kevin Wolf
@ 2016-06-14 13:32 ` Kevin Wolf
2016-06-14 13:32 ` [Qemu-devel] [PATCH v2 3/6] block: Prepare bdrv_aligned_pwritev() " Kevin Wolf
` (4 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: Kevin Wolf @ 2016-06-14 13:32 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, stefanha, famz, eblake, mreitz, qemu-devel
This patch makes bdrv_aligned_preadv() ready to accept byte-aligned
requests. Note that this doesn't mean that such requests are actually
made. The caller still ensures that all requests are aligned to at least
512 bytes.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/io.c | 44 ++++++++++++++++++++------------------------
1 file changed, 20 insertions(+), 24 deletions(-)
diff --git a/block/io.c b/block/io.c
index b6a2c80..e75bce2 100644
--- a/block/io.c
+++ b/block/io.c
@@ -963,11 +963,9 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
{
int ret;
- int64_t sector_num = offset >> BDRV_SECTOR_BITS;
- unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
-
- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert(is_power_of_2(align));
+ assert((offset & (align - 1)) == 0);
+ assert((bytes & (align - 1)) == 0);
assert(!qiov || bytes == qiov->size);
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
assert(!(flags & ~BDRV_REQ_MASK));
@@ -987,9 +985,12 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
if (flags & BDRV_REQ_COPY_ON_READ) {
+ int64_t start_sector = offset >> BDRV_SECTOR_BITS;
+ int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+ unsigned int nb_sectors = end_sector - start_sector;
int pnum;
- ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
+ ret = bdrv_is_allocated(bs, start_sector, nb_sectors, &pnum);
if (ret < 0) {
goto out;
}
@@ -1005,28 +1006,24 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
} else {
/* Read zeros after EOF */
- int64_t total_sectors, max_nb_sectors;
+ int64_t total_bytes, max_bytes;
- total_sectors = bdrv_nb_sectors(bs);
- if (total_sectors < 0) {
- ret = total_sectors;
+ total_bytes = bdrv_getlength(bs);
+ if (total_bytes < 0) {
+ ret = total_bytes;
goto out;
}
- max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
- align >> BDRV_SECTOR_BITS);
- if (nb_sectors < max_nb_sectors) {
+ max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
+ if (bytes < max_bytes) {
ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
- } else if (max_nb_sectors > 0) {
+ } else if (max_bytes > 0) {
QEMUIOVector local_qiov;
qemu_iovec_init(&local_qiov, qiov->niov);
- qemu_iovec_concat(&local_qiov, qiov, 0,
- max_nb_sectors * BDRV_SECTOR_SIZE);
+ qemu_iovec_concat(&local_qiov, qiov, 0, max_bytes);
- ret = bdrv_driver_preadv(bs, offset,
- max_nb_sectors * BDRV_SECTOR_SIZE,
- &local_qiov, 0);
+ ret = bdrv_driver_preadv(bs, offset, max_bytes, &local_qiov, 0);
qemu_iovec_destroy(&local_qiov);
} else {
@@ -1034,11 +1031,10 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
/* Reading beyond end of file is supposed to produce zeroes */
- if (ret == 0 && total_sectors < sector_num + nb_sectors) {
- uint64_t offset = MAX(0, total_sectors - sector_num);
- uint64_t bytes = (sector_num + nb_sectors - offset) *
- BDRV_SECTOR_SIZE;
- qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
+ if (ret == 0 && total_bytes < offset + bytes) {
+ uint64_t zero_offset = MAX(0, total_bytes - offset);
+ uint64_t zero_bytes = offset + bytes - zero_offset;
+ qemu_iovec_memset(qiov, zero_offset, 0, zero_bytes);
}
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH v2 3/6] block: Prepare bdrv_aligned_pwritev() for byte-aligned requests
2016-06-14 13:32 [Qemu-devel] [PATCH v2 0/6] block: Enable byte granularity I/O Kevin Wolf
2016-06-14 13:32 ` [Qemu-devel] [PATCH v2 1/6] block: Byte-based bdrv_co_do_copy_on_readv() Kevin Wolf
2016-06-14 13:32 ` [Qemu-devel] [PATCH v2 2/6] block: Prepare bdrv_aligned_preadv() for byte-aligned requests Kevin Wolf
@ 2016-06-14 13:32 ` Kevin Wolf
2016-06-14 13:32 ` [Qemu-devel] [PATCH v2 4/6] raw-posix: Switch to bdrv_co_* interfaces Kevin Wolf
` (3 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: Kevin Wolf @ 2016-06-14 13:32 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, stefanha, famz, eblake, mreitz, qemu-devel
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/io.c | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/block/io.c b/block/io.c
index e75bce2..b261cc6 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1249,11 +1249,9 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
bool waited;
int ret;
- int64_t sector_num = offset >> BDRV_SECTOR_BITS;
- unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ int64_t start_sector = offset >> BDRV_SECTOR_BITS;
+ int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert(!qiov || bytes == qiov->size);
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
assert(!(flags & ~BDRV_REQ_MASK));
@@ -1278,22 +1276,21 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
/* Do nothing, write notifier decided to fail this request */
} else if (flags & BDRV_REQ_ZERO_WRITE) {
bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
- ret = bdrv_co_do_pwrite_zeroes(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors << BDRV_SECTOR_BITS, flags);
+ ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
} else {
bdrv_debug_event(bs, BLKDBG_PWRITEV);
ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
}
bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
- bdrv_set_dirty(bs, sector_num, nb_sectors);
+ bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
if (bs->wr_highest_offset < offset + bytes) {
bs->wr_highest_offset = offset + bytes;
}
if (ret >= 0) {
- bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
+ bs->total_sectors = MAX(bs->total_sectors, end_sector);
}
return ret;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH v2 4/6] raw-posix: Switch to bdrv_co_* interfaces
2016-06-14 13:32 [Qemu-devel] [PATCH v2 0/6] block: Enable byte granularity I/O Kevin Wolf
` (2 preceding siblings ...)
2016-06-14 13:32 ` [Qemu-devel] [PATCH v2 3/6] block: Prepare bdrv_aligned_pwritev() " Kevin Wolf
@ 2016-06-14 13:32 ` Kevin Wolf
2016-06-14 14:10 ` Eric Blake
2016-06-14 13:32 ` [Qemu-devel] [PATCH v2 5/6] raw-posix: Implement .bdrv_co_preadv/pwritev Kevin Wolf
` (2 subsequent siblings)
6 siblings, 1 reply; 11+ messages in thread
From: Kevin Wolf @ 2016-06-14 13:32 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, stefanha, famz, eblake, mreitz, qemu-devel
In order to use the modern byte-based .bdrv_co_preadv/pwritev()
interface, this patch switches raw-posix to coroutine-based interfaces
as a first step. In terms of semantics and performance, it doesn't make
a difference with the existing code whether we go from a coroutine to a
callback-based interface already in block/io.c or only in linux-aio.c
As there have been concerns in the past that this change may be a step
in the wrong direction with respect to a possible AIO fast path, the
old callback-based interface for linux-aio is left around and can be
reactivated when a fast path (e.g. directly from virtio-blk dataplane,
bypassing the whole block layer) is implemented.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/linux-aio.c | 87 +++++++++++++++++++++++++++++++++++++++++--------------
block/raw-aio.h | 4 +++
block/raw-posix.c | 59 +++++++++++++++++--------------------
3 files changed, 96 insertions(+), 54 deletions(-)
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 294b2bf..74d9b33 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -11,8 +11,10 @@
#include "qemu-common.h"
#include "block/aio.h"
#include "qemu/queue.h"
+#include "block/block.h"
#include "block/raw-aio.h"
#include "qemu/event_notifier.h"
+#include "qemu/coroutine.h"
#include <libaio.h>
@@ -30,6 +32,7 @@
struct qemu_laiocb {
BlockAIOCB common;
+ Coroutine *co;
LinuxAioState *ctx;
struct iocb iocb;
ssize_t ret;
@@ -88,9 +91,14 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
}
}
}
- laiocb->common.cb(laiocb->common.opaque, ret);
- qemu_aio_unref(laiocb);
+ laiocb->ret = ret;
+ if (laiocb->co) {
+ qemu_coroutine_enter(laiocb->co, NULL);
+ } else {
+ laiocb->common.cb(laiocb->common.opaque, ret);
+ qemu_aio_unref(laiocb);
+ }
}
/* The completion BH fetches completed I/O requests and invokes their
@@ -232,22 +240,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
}
}
-BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type)
+static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
+ int type)
{
- struct qemu_laiocb *laiocb;
- struct iocb *iocbs;
- off_t offset = sector_num * 512;
-
- laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
- laiocb->nbytes = nb_sectors * 512;
- laiocb->ctx = s;
- laiocb->ret = -EINPROGRESS;
- laiocb->is_read = (type == QEMU_AIO_READ);
- laiocb->qiov = qiov;
-
- iocbs = &laiocb->iocb;
+ LinuxAioState *s = laiocb->ctx;
+ struct iocb *iocbs = &laiocb->iocb;
+ QEMUIOVector *qiov = laiocb->qiov;
switch (type) {
case QEMU_AIO_WRITE:
@@ -260,7 +258,7 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
default:
fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
__func__, type);
- goto out_free_aiocb;
+ return -EIO;
}
io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
@@ -270,11 +268,56 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
(!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
ioq_submit(s);
}
- return &laiocb->common;
-out_free_aiocb:
- qemu_aio_unref(laiocb);
- return NULL;
+ return 0;
+}
+
+int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+ int64_t sector_num, QEMUIOVector *qiov,
+ int nb_sectors, int type)
+{
+ off_t offset = sector_num * BDRV_SECTOR_SIZE;
+ int ret;
+
+ struct qemu_laiocb laiocb = {
+ .co = qemu_coroutine_self(),
+ .nbytes = nb_sectors * BDRV_SECTOR_SIZE,
+ .ctx = s,
+ .is_read = (type == QEMU_AIO_READ),
+ .qiov = qiov,
+ };
+
+ ret = laio_do_submit(fd, &laiocb, offset, type);
+ if (ret < 0) {
+ return ret;
+ }
+
+ qemu_coroutine_yield();
+ return laiocb.ret;
+}
+
+BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque, int type)
+{
+ struct qemu_laiocb *laiocb;
+ off_t offset = sector_num * BDRV_SECTOR_SIZE;
+ int ret;
+
+ laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
+ laiocb->nbytes = nb_sectors * BDRV_SECTOR_SIZE;
+ laiocb->ctx = s;
+ laiocb->ret = -EINPROGRESS;
+ laiocb->is_read = (type == QEMU_AIO_READ);
+ laiocb->qiov = qiov;
+
+ ret = laio_do_submit(fd, laiocb, offset, type);
+ if (ret < 0) {
+ qemu_aio_unref(laiocb);
+ return NULL;
+ }
+
+ return &laiocb->common;
}
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 714714e..03bbfba 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -15,6 +15,7 @@
#ifndef QEMU_RAW_AIO_H
#define QEMU_RAW_AIO_H
+#include "qemu/coroutine.h"
#include "qemu/iov.h"
/* AIO request types */
@@ -38,6 +39,9 @@
typedef struct LinuxAioState LinuxAioState;
LinuxAioState *laio_init(void);
void laio_cleanup(LinuxAioState *s);
+int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+ int64_t sector_num, QEMUIOVector *qiov,
+ int nb_sectors, int type);
BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque, int type);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index ce2e20f..cb98769 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1325,14 +1325,13 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
-static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type)
+static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov, int type)
{
BDRVRawState *s = bs->opaque;
if (fd_open(bs) < 0)
- return NULL;
+ return -EIO;
/*
* Check if the underlying device requires requests to be aligned,
@@ -1345,14 +1344,26 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
} else if (s->use_aio) {
- return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
- nb_sectors, cb, opaque, type);
+ return laio_co_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
+ nb_sectors, type);
#endif
}
}
- return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
- cb, opaque, type);
+ return paio_submit_co(bs, s->fd, sector_num * BDRV_SECTOR_SIZE, qiov,
+ nb_sectors * BDRV_SECTOR_SIZE, type);
+}
+
+static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_READ);
+}
+
+static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_WRITE);
}
static void raw_aio_plug(BlockDriverState *bs)
@@ -1375,22 +1386,6 @@ static void raw_aio_unplug(BlockDriverState *bs)
#endif
}
-static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
- cb, opaque, QEMU_AIO_READ);
-}
-
-static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
- cb, opaque, QEMU_AIO_WRITE);
-}
-
static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque)
{
@@ -1957,8 +1952,8 @@ BlockDriver bdrv_file = {
.bdrv_co_get_block_status = raw_co_get_block_status,
.bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_readv = raw_co_readv,
+ .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
@@ -2405,8 +2400,8 @@ static BlockDriver bdrv_host_device = {
.create_opts = &raw_create_opts,
.bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_readv = raw_co_readv,
+ .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
@@ -2535,8 +2530,8 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_readv = raw_co_readv,
+ .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
@@ -2670,8 +2665,8 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_readv = raw_co_readv,
+ .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
--
1.8.3.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [PATCH v2 4/6] raw-posix: Switch to bdrv_co_* interfaces
2016-06-14 13:32 ` [Qemu-devel] [PATCH v2 4/6] raw-posix: Switch to bdrv_co_* interfaces Kevin Wolf
@ 2016-06-14 14:10 ` Eric Blake
0 siblings, 0 replies; 11+ messages in thread
From: Eric Blake @ 2016-06-14 14:10 UTC (permalink / raw)
To: Kevin Wolf, qemu-block; +Cc: stefanha, famz, mreitz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 2390 bytes --]
On 06/14/2016 07:32 AM, Kevin Wolf wrote:
> In order to use the modern byte-based .bdrv_co_preadv/pwritev()
> interface, this patch switches raw-posix to coroutine-based interfaces
> as a first step. In terms of semantics and performance, it doesn't make
> a difference with the existing code whether we go from a coroutine to a
> callback-based interface already in block/io.c or only in linux-aio.c
>
> As there have been concerns in the past that this change may be a step
> in the wrong direction with respect to a possible AIO fast path, the
> old callback-based interface for linux-aio is left around and can be
> reactivated when a fast path (e.g. directly from virtio-blk dataplane,
> bypassing the whole block layer) is implemented.
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> block/linux-aio.c | 87 +++++++++++++++++++++++++++++++++++++++++--------------
> block/raw-aio.h | 4 +++
> block/raw-posix.c | 59 +++++++++++++++++--------------------
> 3 files changed, 96 insertions(+), 54 deletions(-)
Reviewed-by: Eric Blake <eblake@redhat.com>
> @@ -1957,8 +1952,8 @@ BlockDriver bdrv_file = {
> .bdrv_co_get_block_status = raw_co_get_block_status,
> .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
>
> - .bdrv_aio_readv = raw_aio_readv,
> - .bdrv_aio_writev = raw_aio_writev,
> + .bdrv_co_readv = raw_co_readv,
> + .bdrv_co_writev = raw_co_writev,
> .bdrv_aio_flush = raw_aio_flush,
> .bdrv_aio_discard = raw_aio_discard,
The rest of this chunk doesn't try to align '='. Not worth a respin, but
something you may want to clean up on pull request.
> .bdrv_refresh_limits = raw_refresh_limits,
> @@ -2405,8 +2400,8 @@ static BlockDriver bdrv_host_device = {
> .create_opts = &raw_create_opts,
> .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
>
> - .bdrv_aio_readv = raw_aio_readv,
> - .bdrv_aio_writev = raw_aio_writev,
> + .bdrv_co_readv = raw_co_readv,
> + .bdrv_co_writev = raw_co_writev,
> .bdrv_aio_flush = raw_aio_flush,
> .bdrv_aio_discard = hdev_aio_discard,
> .bdrv_refresh_limits = raw_refresh_limits,
and this hunk is also inconsistent, but in a different way.
--
Eric Blake eblake redhat com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH v2 5/6] raw-posix: Implement .bdrv_co_preadv/pwritev
2016-06-14 13:32 [Qemu-devel] [PATCH v2 0/6] block: Enable byte granularity I/O Kevin Wolf
` (3 preceding siblings ...)
2016-06-14 13:32 ` [Qemu-devel] [PATCH v2 4/6] raw-posix: Switch to bdrv_co_* interfaces Kevin Wolf
@ 2016-06-14 13:32 ` Kevin Wolf
2016-06-14 14:15 ` Eric Blake
2016-06-14 13:32 ` [Qemu-devel] [PATCH v2 6/6] block: Don't enforce 512 byte minimum alignment Kevin Wolf
2016-06-15 9:02 ` [Qemu-devel] [PATCH v2 0/6] block: Enable byte granularity I/O Stefan Hajnoczi
6 siblings, 1 reply; 11+ messages in thread
From: Kevin Wolf @ 2016-06-14 13:32 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, stefanha, famz, eblake, mreitz, qemu-devel
The raw-posix block driver actually supports byte-aligned requests now
on non-O_DIRECT images, like it already (and previously incorrectly)
claimed in bs->request_alignment.
For some block drivers this means that a RMW cycle can be avoided when
they write sub-sector metadata e.g. for cluster allocation.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/linux-aio.c | 7 ++-----
block/raw-aio.h | 3 +--
block/raw-posix.c | 43 +++++++++++++++++++++++--------------------
3 files changed, 26 insertions(+), 27 deletions(-)
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 74d9b33..e468960 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -273,15 +273,12 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
}
int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
- int64_t sector_num, QEMUIOVector *qiov,
- int nb_sectors, int type)
+ uint64_t offset, QEMUIOVector *qiov, int type)
{
- off_t offset = sector_num * BDRV_SECTOR_SIZE;
int ret;
-
struct qemu_laiocb laiocb = {
.co = qemu_coroutine_self(),
- .nbytes = nb_sectors * BDRV_SECTOR_SIZE,
+ .nbytes = qiov->size,
.ctx = s,
.is_read = (type == QEMU_AIO_READ),
.qiov = qiov,
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 03bbfba..a4cdbbf 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -40,8 +40,7 @@ typedef struct LinuxAioState LinuxAioState;
LinuxAioState *laio_init(void);
void laio_cleanup(LinuxAioState *s);
int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
- int64_t sector_num, QEMUIOVector *qiov,
- int nb_sectors, int type);
+ uint64_t offset, QEMUIOVector *qiov, int type);
BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque, int type);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index cb98769..aacf132 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1325,8 +1325,8 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
-static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov, int type)
+static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int type)
{
BDRVRawState *s = bs->opaque;
@@ -1344,26 +1344,28 @@ static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num,
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
} else if (s->use_aio) {
- return laio_co_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
- nb_sectors, type);
+ assert(qiov->size == bytes);
+ return laio_co_submit(bs, s->aio_ctx, s->fd, offset, qiov, type);
#endif
}
}
- return paio_submit_co(bs, s->fd, sector_num * BDRV_SECTOR_SIZE, qiov,
- nb_sectors * BDRV_SECTOR_SIZE, type);
+ return paio_submit_co(bs, s->fd, offset, qiov, bytes, type);
}
-static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
- return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_READ);
+ return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
}
-static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
- return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_WRITE);
+ assert(flags == 0);
+ return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
}
static void raw_aio_plug(BlockDriverState *bs)
@@ -1952,8 +1954,8 @@ BlockDriver bdrv_file = {
.bdrv_co_get_block_status = raw_co_get_block_status,
.bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
- .bdrv_co_readv = raw_co_readv,
- .bdrv_co_writev = raw_co_writev,
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
@@ -2400,8 +2402,8 @@ static BlockDriver bdrv_host_device = {
.create_opts = &raw_create_opts,
.bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
- .bdrv_co_readv = raw_co_readv,
- .bdrv_co_writev = raw_co_writev,
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
@@ -2530,8 +2532,9 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_co_readv = raw_co_readv,
- .bdrv_co_writev = raw_co_writev,
+
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
@@ -2665,8 +2668,8 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_co_readv = raw_co_readv,
- .bdrv_co_writev = raw_co_writev,
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
--
1.8.3.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [PATCH v2 5/6] raw-posix: Implement .bdrv_co_preadv/pwritev
2016-06-14 13:32 ` [Qemu-devel] [PATCH v2 5/6] raw-posix: Implement .bdrv_co_preadv/pwritev Kevin Wolf
@ 2016-06-14 14:15 ` Eric Blake
0 siblings, 0 replies; 11+ messages in thread
From: Eric Blake @ 2016-06-14 14:15 UTC (permalink / raw)
To: Kevin Wolf, qemu-block; +Cc: stefanha, famz, mreitz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 1985 bytes --]
On 06/14/2016 07:32 AM, Kevin Wolf wrote:
> The raw-posix block driver actually supports byte-aligned requests now
> on non-O_DIRECT images, like it already (and previously incorrectly)
> claimed in bs->request_alignment.
>
> For some block drivers this means that a RMW cycle can be avoided when
> they write sub-sector metadata e.g. for cluster allocation.
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> block/linux-aio.c | 7 ++-----
> block/raw-aio.h | 3 +--
> block/raw-posix.c | 43 +++++++++++++++++++++++--------------------
> 3 files changed, 26 insertions(+), 27 deletions(-)
>
> -static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
> - int nb_sectors, QEMUIOVector *qiov)
> +static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
> + uint64_t bytes, QEMUIOVector *qiov,
> + int flags)
> {
> - return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_READ);
> + return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
> }
>
> -static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
> - int nb_sectors, QEMUIOVector *qiov)
> +static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
> + uint64_t bytes, QEMUIOVector *qiov,
> + int flags)
> {
> - return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_WRITE);
> + assert(flags == 0);
> + return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
> }
Looks odd to assert !flags on pwritev but not on preadv. Minor enough
that you could fix on pull request.
Reviewed-by: Eric Blake <eblake@redhat.com>
--
Eric Blake eblake redhat com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH v2 6/6] block: Don't enforce 512 byte minimum alignment
2016-06-14 13:32 [Qemu-devel] [PATCH v2 0/6] block: Enable byte granularity I/O Kevin Wolf
` (4 preceding siblings ...)
2016-06-14 13:32 ` [Qemu-devel] [PATCH v2 5/6] raw-posix: Implement .bdrv_co_preadv/pwritev Kevin Wolf
@ 2016-06-14 13:32 ` Kevin Wolf
2016-06-15 9:02 ` [Qemu-devel] [PATCH v2 0/6] block: Enable byte granularity I/O Stefan Hajnoczi
6 siblings, 0 replies; 11+ messages in thread
From: Kevin Wolf @ 2016-06-14 13:32 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, stefanha, famz, eblake, mreitz, qemu-devel
If block drivers say that they can do an alignment < 512 bytes, let's
just suppose they mean it. raw-posix used to be an offender with respect
to this, but it can actually deal with byte-aligned requests now.
The default is still 512 bytes for any drivers that only implement
sector-based interfaces, but it is 1 now for drivers that implement
.bdrv_co_preadv.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
block.c | 2 +-
block/io.c | 8 +++-----
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/block.c b/block.c
index f54bc25..3d850a2 100644
--- a/block.c
+++ b/block.c
@@ -937,7 +937,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
goto fail_opts;
}
- bs->request_alignment = 512;
+ bs->request_alignment = drv->bdrv_co_preadv ? 1 : 512;
bs->zero_beyond_eof = true;
bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
diff --git a/block/io.c b/block/io.c
index b261cc6..b3ff9be 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1052,8 +1052,7 @@ int coroutine_fn bdrv_co_preadv(BlockDriverState *bs,
BlockDriver *drv = bs->drv;
BdrvTrackedRequest req;
- /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint64_t align = bs->request_alignment;
uint8_t *head_buf = NULL;
uint8_t *tail_buf = NULL;
QEMUIOVector local_qiov;
@@ -1305,7 +1304,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
uint8_t *buf = NULL;
QEMUIOVector local_qiov;
struct iovec iov;
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint64_t align = bs->request_alignment;
unsigned int head_padding_bytes, tail_padding_bytes;
int ret = 0;
@@ -1392,8 +1391,7 @@ int coroutine_fn bdrv_co_pwritev(BlockDriverState *bs,
BdrvRequestFlags flags)
{
BdrvTrackedRequest req;
- /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint64_t align = bs->request_alignment;
uint8_t *head_buf = NULL;
uint8_t *tail_buf = NULL;
QEMUIOVector local_qiov;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [PATCH v2 0/6] block: Enable byte granularity I/O
2016-06-14 13:32 [Qemu-devel] [PATCH v2 0/6] block: Enable byte granularity I/O Kevin Wolf
` (5 preceding siblings ...)
2016-06-14 13:32 ` [Qemu-devel] [PATCH v2 6/6] block: Don't enforce 512 byte minimum alignment Kevin Wolf
@ 2016-06-15 9:02 ` Stefan Hajnoczi
2016-06-15 10:33 ` Kevin Wolf
6 siblings, 1 reply; 11+ messages in thread
From: Stefan Hajnoczi @ 2016-06-15 9:02 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-block, famz, eblake, mreitz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 1640 bytes --]
On Tue, Jun 14, 2016 at 03:32:29PM +0200, Kevin Wolf wrote:
> Previous series have already converted some block drivers to byte-based rather
> than sector-based interfaces. However, the common I/O path as well as raw-posix
> still enforced a minimum alignment of 512 bytes because some sector-based logic
> was involved.
>
> This patch series removes these limitations and a sub-sector request actually
> ends up as a sub-sector syscall now.
>
> v2:
> - Updated trace-events for bdrv_co_do_copy_on_readv() [Eric]
> - Added some assertions [Eric]
> - Renamed laio_submit_co() -> laio_co_submit() and added coroutine_fn
> to its prototype [Stefan]
> - linux-aio: Include block/block.h and use BDRV_SECTOR_SIZE instead
> of 512 [Eric]
>
> Kevin Wolf (6):
> block: Byte-based bdrv_co_do_copy_on_readv()
> block: Prepare bdrv_aligned_preadv() for byte-aligned requests
> block: Prepare bdrv_aligned_pwritev() for byte-aligned requests
> raw-posix: Switch to bdrv_co_* interfaces
> raw-posix: Implement .bdrv_co_preadv/pwritev
> block: Don't enforce 512 byte minimum alignment
>
> block.c | 2 +-
> block/io.c | 128 ++++++++++++++++++++++++++------------------------
> block/linux-aio.c | 84 ++++++++++++++++++++++++---------
> block/mirror.c | 10 ++--
> block/raw-aio.h | 3 ++
> block/raw-posix.c | 62 ++++++++++++------------
> include/block/block.h | 10 ++--
> trace-events | 2 +-
> 8 files changed, 176 insertions(+), 125 deletions(-)
>
> --
> 1.8.3.1
>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 473 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [PATCH v2 0/6] block: Enable byte granularity I/O
2016-06-15 9:02 ` [Qemu-devel] [PATCH v2 0/6] block: Enable byte granularity I/O Stefan Hajnoczi
@ 2016-06-15 10:33 ` Kevin Wolf
0 siblings, 0 replies; 11+ messages in thread
From: Kevin Wolf @ 2016-06-15 10:33 UTC (permalink / raw)
To: Stefan Hajnoczi; +Cc: qemu-block, famz, eblake, mreitz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 1791 bytes --]
Am 15.06.2016 um 11:02 hat Stefan Hajnoczi geschrieben:
> On Tue, Jun 14, 2016 at 03:32:29PM +0200, Kevin Wolf wrote:
> > Previous series have already converted some block drivers to byte-based rather
> > than sector-based interfaces. However, the common I/O path as well as raw-posix
> > still enforced a minimum alignment of 512 bytes because some sector-based logic
> > was involved.
> >
> > This patch series removes these limitations and a sub-sector request actually
> > ends up as a sub-sector syscall now.
> >
> > v2:
> > - Updated trace-events for bdrv_co_do_copy_on_readv() [Eric]
> > - Added some assertions [Eric]
> > - Renamed laio_submit_co() -> laio_co_submit() and added coroutine_fn
> > to its prototype [Stefan]
> > - linux-aio: Include block/block.h and use BDRV_SECTOR_SIZE instead
> > of 512 [Eric]
> >
> > Kevin Wolf (6):
> > block: Byte-based bdrv_co_do_copy_on_readv()
> > block: Prepare bdrv_aligned_preadv() for byte-aligned requests
> > block: Prepare bdrv_aligned_pwritev() for byte-aligned requests
> > raw-posix: Switch to bdrv_co_* interfaces
> > raw-posix: Implement .bdrv_co_preadv/pwritev
> > block: Don't enforce 512 byte minimum alignment
> >
> > block.c | 2 +-
> > block/io.c | 128 ++++++++++++++++++++++++++------------------------
> > block/linux-aio.c | 84 ++++++++++++++++++++++++---------
> > block/mirror.c | 10 ++--
> > block/raw-aio.h | 3 ++
> > block/raw-posix.c | 62 ++++++++++++------------
> > include/block/block.h | 10 ++--
> > trace-events | 2 +-
> > 8 files changed, 176 insertions(+), 125 deletions(-)
>
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Thanks. Applied to my block branch.
Kevin
[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread