* [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O
@ 2016-06-08 14:10 Kevin Wolf
2016-06-08 14:10 ` [Qemu-devel] [PATCH 1/6] block: Byte-based bdrv_co_do_copy_on_readv() Kevin Wolf
` (6 more replies)
0 siblings, 7 replies; 24+ messages in thread
From: Kevin Wolf @ 2016-06-08 14:10 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, eblake, pbonzini, mreitz, stefanha, famz, qemu-devel
Previous series have already converted some block drivers to byte-based rather
than sector-based interfaces. However, the common I/O path as well as raw-posix
still enforced a minimum alignment of 512 bytes because some sector-based logic
was involved.
This patch series removes these limitations and a sub-sector request actually
ends up as a sub-sector syscall now.
Kevin Wolf (6):
block: Byte-based bdrv_co_do_copy_on_readv()
block: Prepare bdrv_aligned_preadv() for byte-aligned requests
block: Prepare bdrv_aligned_pwritev() for byte-aligned requests
raw-posix: Switch to bdrv_co_* interfaces
raw-posix: Implement .bdrv_co_preadv/pwritev
block: Don't enforce 512 byte minimum alignment
block.c | 2 +-
block/io.c | 125 +++++++++++++++++++++++++-------------------------
block/linux-aio.c | 83 ++++++++++++++++++++++++---------
block/mirror.c | 10 ++--
block/raw-aio.h | 2 +
block/raw-posix.c | 61 ++++++++++++------------
include/block/block.h | 10 ++--
7 files changed, 169 insertions(+), 124 deletions(-)
--
1.8.3.1
^ permalink raw reply [flat|nested] 24+ messages in thread
* [Qemu-devel] [PATCH 1/6] block: Byte-based bdrv_co_do_copy_on_readv()
2016-06-08 14:10 [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O Kevin Wolf
@ 2016-06-08 14:10 ` Kevin Wolf
2016-06-08 14:25 ` Eric Blake
2016-06-14 12:09 ` Stefan Hajnoczi
2016-06-08 14:10 ` [Qemu-devel] [PATCH 2/6] block: Prepare bdrv_aligned_preadv() for byte-aligned requests Kevin Wolf
` (5 subsequent siblings)
6 siblings, 2 replies; 24+ messages in thread
From: Kevin Wolf @ 2016-06-08 14:10 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, eblake, pbonzini, mreitz, stefanha, famz, qemu-devel
In a first step to convert the common I/O path to work on bytes rather
than sectors, this converts the copy-on-read logic that is used by
bdrv_aligned_preadv().
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/io.c | 63 +++++++++++++++++++++++++++++++--------------------
block/mirror.c | 10 ++++----
include/block/block.h | 10 +++++---
3 files changed, 51 insertions(+), 32 deletions(-)
diff --git a/block/io.c b/block/io.c
index fb99a71..3b34f20 100644
--- a/block/io.c
+++ b/block/io.c
@@ -404,12 +404,12 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
}
/**
- * Round a region to cluster boundaries
+ * Round a region to cluster boundaries (sector-based)
*/
-void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int64_t *cluster_sector_num,
- int *cluster_nb_sectors)
+void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors)
{
BlockDriverInfo bdi;
@@ -424,6 +424,26 @@ void bdrv_round_to_clusters(BlockDriverState *bs,
}
}
+/**
+ * Round a region to cluster boundaries
+ */
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t offset, unsigned int bytes,
+ int64_t *cluster_offset,
+ unsigned int *cluster_bytes)
+{
+ BlockDriverInfo bdi;
+
+ if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
+ *cluster_offset = offset;
+ *cluster_bytes = bytes;
+ } else {
+ int64_t c = bdi.cluster_size;
+ *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
+ *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
+ }
+}
+
static int bdrv_get_cluster_size(BlockDriverState *bs)
{
BlockDriverInfo bdi;
@@ -861,7 +881,7 @@ emulate_flags:
}
static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
{
/* Perform I/O through a temporary buffer so that users who scribble over
* their read buffer while the operation is in progress do not end up
@@ -873,21 +893,20 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
BlockDriver *drv = bs->drv;
struct iovec iov;
QEMUIOVector bounce_qiov;
- int64_t cluster_sector_num;
- int cluster_nb_sectors;
+ int64_t cluster_offset;
+ unsigned int cluster_bytes;
size_t skip_bytes;
int ret;
/* Cover entire cluster so no additional backing file I/O is required when
* allocating cluster in the image file.
*/
- bdrv_round_to_clusters(bs, sector_num, nb_sectors,
- &cluster_sector_num, &cluster_nb_sectors);
+ bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
- trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
- cluster_sector_num, cluster_nb_sectors);
+ trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
+ cluster_offset, cluster_bytes);
- iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
+ iov.iov_len = cluster_bytes;
iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
if (bounce_buffer == NULL) {
ret = -ENOMEM;
@@ -896,8 +915,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
- ret = bdrv_driver_preadv(bs, cluster_sector_num * BDRV_SECTOR_SIZE,
- cluster_nb_sectors * BDRV_SECTOR_SIZE,
+ ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes,
&bounce_qiov, 0);
if (ret < 0) {
goto err;
@@ -905,16 +923,12 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
if (drv->bdrv_co_pwrite_zeroes &&
buffer_is_zero(bounce_buffer, iov.iov_len)) {
- ret = bdrv_co_do_pwrite_zeroes(bs,
- cluster_sector_num * BDRV_SECTOR_SIZE,
- cluster_nb_sectors * BDRV_SECTOR_SIZE,
- 0);
+ ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0);
} else {
/* This does not change the data on the disk, it is not necessary
* to flush even in cache=writethrough mode.
*/
- ret = bdrv_driver_pwritev(bs, cluster_sector_num * BDRV_SECTOR_SIZE,
- cluster_nb_sectors * BDRV_SECTOR_SIZE,
+ ret = bdrv_driver_pwritev(bs, cluster_offset, cluster_bytes,
&bounce_qiov, 0);
}
@@ -926,9 +940,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
goto err;
}
- skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
- qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
- nb_sectors * BDRV_SECTOR_SIZE);
+ skip_bytes = offset - cluster_offset;
+ qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, bytes);
err:
qemu_vfree(bounce_buffer);
@@ -977,7 +990,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
if (!ret || pnum != nb_sectors) {
- ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
+ ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
goto out;
}
}
diff --git a/block/mirror.c b/block/mirror.c
index 80fd3c7..fbbc496 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -186,8 +186,9 @@ static int mirror_cow_align(MirrorBlockJob *s,
need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
s->cow_bitmap);
if (need_cow) {
- bdrv_round_to_clusters(blk_bs(s->target), *sector_num, *nb_sectors,
- &align_sector_num, &align_nb_sectors);
+ bdrv_round_sectors_to_clusters(blk_bs(s->target), *sector_num,
+ *nb_sectors, &align_sector_num,
+ &align_nb_sectors);
}
if (align_nb_sectors > max_sectors) {
@@ -386,8 +387,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
} else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
int64_t target_sector_num;
int target_nb_sectors;
- bdrv_round_to_clusters(blk_bs(s->target), sector_num, io_sectors,
- &target_sector_num, &target_nb_sectors);
+ bdrv_round_sectors_to_clusters(blk_bs(s->target), sector_num,
+ io_sectors, &target_sector_num,
+ &target_nb_sectors);
if (target_sector_num == sector_num &&
target_nb_sectors == io_sectors) {
mirror_method = ret & BDRV_BLOCK_ZERO ?
diff --git a/include/block/block.h b/include/block/block.h
index 54cca28..fb0078f 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -401,10 +401,14 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs);
+void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors);
void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int64_t *cluster_sector_num,
- int *cluster_nb_sectors);
+ int64_t offset, unsigned int bytes,
+ int64_t *cluster_offset,
+ unsigned int *cluster_bytes);
const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
void bdrv_get_backing_filename(BlockDriverState *bs,
--
1.8.3.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [Qemu-devel] [PATCH 2/6] block: Prepare bdrv_aligned_preadv() for byte-aligned requests
2016-06-08 14:10 [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O Kevin Wolf
2016-06-08 14:10 ` [Qemu-devel] [PATCH 1/6] block: Byte-based bdrv_co_do_copy_on_readv() Kevin Wolf
@ 2016-06-08 14:10 ` Kevin Wolf
2016-06-08 14:33 ` Eric Blake
2016-06-14 12:09 ` Stefan Hajnoczi
2016-06-08 14:10 ` [Qemu-devel] [PATCH 3/6] block: Prepare bdrv_aligned_pwritev() " Kevin Wolf
` (4 subsequent siblings)
6 siblings, 2 replies; 24+ messages in thread
From: Kevin Wolf @ 2016-06-08 14:10 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, eblake, pbonzini, mreitz, stefanha, famz, qemu-devel
This patch makes bdrv_aligned_preadv() ready to accept byte-aligned
requests. Note that this doesn't mean that such requests are actually
made. The caller still ensures that all requests are aligned to at least
512 bytes.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/io.c | 41 +++++++++++++++++------------------------
1 file changed, 17 insertions(+), 24 deletions(-)
diff --git a/block/io.c b/block/io.c
index 3b34f20..2fd88cb 100644
--- a/block/io.c
+++ b/block/io.c
@@ -959,11 +959,6 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
{
int ret;
- int64_t sector_num = offset >> BDRV_SECTOR_BITS;
- unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
-
- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert(!qiov || bytes == qiov->size);
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
@@ -982,9 +977,12 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
if (flags & BDRV_REQ_COPY_ON_READ) {
+ int64_t start_sector = offset >> BDRV_SECTOR_BITS;
+ int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+ unsigned int nb_sectors = end_sector - start_sector;
int pnum;
- ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
+ ret = bdrv_is_allocated(bs, start_sector, nb_sectors, &pnum);
if (ret < 0) {
goto out;
}
@@ -1000,28 +998,24 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
} else {
/* Read zeros after EOF */
- int64_t total_sectors, max_nb_sectors;
+ int64_t total_bytes, max_bytes;
- total_sectors = bdrv_nb_sectors(bs);
- if (total_sectors < 0) {
- ret = total_sectors;
+ total_bytes = bdrv_getlength(bs);
+ if (total_bytes < 0) {
+ ret = total_bytes;
goto out;
}
- max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
- align >> BDRV_SECTOR_BITS);
- if (nb_sectors < max_nb_sectors) {
+ max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
+ if (bytes < max_bytes) {
ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
- } else if (max_nb_sectors > 0) {
+ } else if (max_bytes > 0) {
QEMUIOVector local_qiov;
qemu_iovec_init(&local_qiov, qiov->niov);
- qemu_iovec_concat(&local_qiov, qiov, 0,
- max_nb_sectors * BDRV_SECTOR_SIZE);
+ qemu_iovec_concat(&local_qiov, qiov, 0, max_bytes);
- ret = bdrv_driver_preadv(bs, offset,
- max_nb_sectors * BDRV_SECTOR_SIZE,
- &local_qiov, 0);
+ ret = bdrv_driver_preadv(bs, offset, max_bytes, &local_qiov, 0);
qemu_iovec_destroy(&local_qiov);
} else {
@@ -1029,11 +1023,10 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
/* Reading beyond end of file is supposed to produce zeroes */
- if (ret == 0 && total_sectors < sector_num + nb_sectors) {
- uint64_t offset = MAX(0, total_sectors - sector_num);
- uint64_t bytes = (sector_num + nb_sectors - offset) *
- BDRV_SECTOR_SIZE;
- qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
+ if (ret == 0 && total_bytes < offset + bytes) {
+ uint64_t zero_offset = MAX(0, total_bytes - offset);
+ uint64_t zero_bytes = offset + bytes - zero_offset;
+ qemu_iovec_memset(qiov, zero_offset, 0, zero_bytes);
}
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [Qemu-devel] [PATCH 3/6] block: Prepare bdrv_aligned_pwritev() for byte-aligned requests
2016-06-08 14:10 [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O Kevin Wolf
2016-06-08 14:10 ` [Qemu-devel] [PATCH 1/6] block: Byte-based bdrv_co_do_copy_on_readv() Kevin Wolf
2016-06-08 14:10 ` [Qemu-devel] [PATCH 2/6] block: Prepare bdrv_aligned_preadv() for byte-aligned requests Kevin Wolf
@ 2016-06-08 14:10 ` Kevin Wolf
2016-06-08 14:46 ` Eric Blake
2016-06-14 12:09 ` Stefan Hajnoczi
2016-06-08 14:10 ` [Qemu-devel] [PATCH 4/6] raw-posix: Switch to bdrv_co_* interfaces Kevin Wolf
` (3 subsequent siblings)
6 siblings, 2 replies; 24+ messages in thread
From: Kevin Wolf @ 2016-06-08 14:10 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, eblake, pbonzini, mreitz, stefanha, famz, qemu-devel
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/io.c | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/block/io.c b/block/io.c
index 2fd88cb..4af9c59 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1241,11 +1241,9 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
bool waited;
int ret;
- int64_t sector_num = offset >> BDRV_SECTOR_BITS;
- unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ int64_t start_sector = offset >> BDRV_SECTOR_BITS;
+ int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert(!qiov || bytes == qiov->size);
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
@@ -1269,22 +1267,21 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
/* Do nothing, write notifier decided to fail this request */
} else if (flags & BDRV_REQ_ZERO_WRITE) {
bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
- ret = bdrv_co_do_pwrite_zeroes(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors << BDRV_SECTOR_BITS, flags);
+ ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
} else {
bdrv_debug_event(bs, BLKDBG_PWRITEV);
ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
}
bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
- bdrv_set_dirty(bs, sector_num, nb_sectors);
+ bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
if (bs->wr_highest_offset < offset + bytes) {
bs->wr_highest_offset = offset + bytes;
}
if (ret >= 0) {
- bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
+ bs->total_sectors = MAX(bs->total_sectors, end_sector);
}
return ret;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [Qemu-devel] [PATCH 4/6] raw-posix: Switch to bdrv_co_* interfaces
2016-06-08 14:10 [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O Kevin Wolf
` (2 preceding siblings ...)
2016-06-08 14:10 ` [Qemu-devel] [PATCH 3/6] block: Prepare bdrv_aligned_pwritev() " Kevin Wolf
@ 2016-06-08 14:10 ` Kevin Wolf
2016-06-08 15:13 ` Eric Blake
2016-06-14 12:04 ` Stefan Hajnoczi
2016-06-08 14:10 ` [Qemu-devel] [PATCH 5/6] raw-posix: Implement .bdrv_co_preadv/pwritev Kevin Wolf
` (2 subsequent siblings)
6 siblings, 2 replies; 24+ messages in thread
From: Kevin Wolf @ 2016-06-08 14:10 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, eblake, pbonzini, mreitz, stefanha, famz, qemu-devel
In order to use the modern byte-based .bdrv_co_preadv/pwritev()
interface, this patch switches raw-posix to coroutine-based interfaces
as a first step. In terms of semantics and performance, it doesn't make
a difference with the existing code whether we go from a coroutine to a
callback-based interface already in block/io.c or only in linux-aio.c
As there have been concerns in the past that this change may be a step
in the wrong direction with respect to a possible AIO fast path, the
old callback-based interface for linux-aio is left around and can be
reactivated when a fast path (e.g. directly from virtio-blk dataplane,
bypassing the whole block layer) is implemented.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/linux-aio.c | 85 +++++++++++++++++++++++++++++++++++++++++--------------
block/raw-aio.h | 2 ++
block/raw-posix.c | 59 ++++++++++++++++++--------------------
3 files changed, 92 insertions(+), 54 deletions(-)
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 294b2bf..1a56543 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -13,6 +13,7 @@
#include "qemu/queue.h"
#include "block/raw-aio.h"
#include "qemu/event_notifier.h"
+#include "qemu/coroutine.h"
#include <libaio.h>
@@ -30,6 +31,7 @@
struct qemu_laiocb {
BlockAIOCB common;
+ Coroutine *co;
LinuxAioState *ctx;
struct iocb iocb;
ssize_t ret;
@@ -88,9 +90,14 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
}
}
}
- laiocb->common.cb(laiocb->common.opaque, ret);
- qemu_aio_unref(laiocb);
+ laiocb->ret = ret;
+ if (laiocb->co) {
+ qemu_coroutine_enter(laiocb->co, NULL);
+ } else {
+ laiocb->common.cb(laiocb->common.opaque, ret);
+ qemu_aio_unref(laiocb);
+ }
}
/* The completion BH fetches completed I/O requests and invokes their
@@ -232,22 +239,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
}
}
-BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type)
+static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
+ int type)
{
- struct qemu_laiocb *laiocb;
- struct iocb *iocbs;
- off_t offset = sector_num * 512;
-
- laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
- laiocb->nbytes = nb_sectors * 512;
- laiocb->ctx = s;
- laiocb->ret = -EINPROGRESS;
- laiocb->is_read = (type == QEMU_AIO_READ);
- laiocb->qiov = qiov;
-
- iocbs = &laiocb->iocb;
+ LinuxAioState *s = laiocb->ctx;
+ struct iocb *iocbs = &laiocb->iocb;
+ QEMUIOVector *qiov = laiocb->qiov;
switch (type) {
case QEMU_AIO_WRITE:
@@ -260,7 +257,7 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
default:
fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
__func__, type);
- goto out_free_aiocb;
+ return -EIO;
}
io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
@@ -270,11 +267,55 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
(!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
ioq_submit(s);
}
- return &laiocb->common;
-out_free_aiocb:
- qemu_aio_unref(laiocb);
- return NULL;
+ return 0;
+}
+
+int laio_submit_co(BlockDriverState *bs, LinuxAioState *s, int fd,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type)
+{
+ off_t offset = sector_num * 512;
+ int ret;
+
+ struct qemu_laiocb laiocb = {
+ .co = qemu_coroutine_self(),
+ .nbytes = nb_sectors * 512,
+ .ctx = s,
+ .is_read = (type == QEMU_AIO_READ),
+ .qiov = qiov,
+ };
+
+ ret = laio_do_submit(fd, &laiocb, offset, type);
+ if (ret < 0) {
+ return ret;
+ }
+
+ qemu_coroutine_yield();
+ return laiocb.ret;
+}
+
+BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque, int type)
+{
+ struct qemu_laiocb *laiocb;
+ off_t offset = sector_num * 512;
+ int ret;
+
+ laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
+ laiocb->nbytes = nb_sectors * 512;
+ laiocb->ctx = s;
+ laiocb->ret = -EINPROGRESS;
+ laiocb->is_read = (type == QEMU_AIO_READ);
+ laiocb->qiov = qiov;
+
+ ret = laio_do_submit(fd, laiocb, offset, type);
+ if (ret < 0) {
+ qemu_aio_unref(laiocb);
+ return NULL;
+ }
+
+ return &laiocb->common;
}
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 714714e..1037502 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -38,6 +38,8 @@
typedef struct LinuxAioState LinuxAioState;
LinuxAioState *laio_init(void);
void laio_cleanup(LinuxAioState *s);
+int laio_submit_co(BlockDriverState *bs, LinuxAioState *s, int fd,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type);
BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque, int type);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index ce2e20f..af7f69f 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1325,14 +1325,13 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
-static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type)
+static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov, int type)
{
BDRVRawState *s = bs->opaque;
if (fd_open(bs) < 0)
- return NULL;
+ return -EIO;
/*
* Check if the underlying device requires requests to be aligned,
@@ -1345,14 +1344,26 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
} else if (s->use_aio) {
- return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
- nb_sectors, cb, opaque, type);
+ return laio_submit_co(bs, s->aio_ctx, s->fd, sector_num, qiov,
+ nb_sectors, type);
#endif
}
}
- return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
- cb, opaque, type);
+ return paio_submit_co(bs, s->fd, sector_num * BDRV_SECTOR_SIZE, qiov,
+ nb_sectors * BDRV_SECTOR_SIZE, type);
+}
+
+static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_READ);
+}
+
+static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_WRITE);
}
static void raw_aio_plug(BlockDriverState *bs)
@@ -1375,22 +1386,6 @@ static void raw_aio_unplug(BlockDriverState *bs)
#endif
}
-static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
- cb, opaque, QEMU_AIO_READ);
-}
-
-static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
- cb, opaque, QEMU_AIO_WRITE);
-}
-
static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque)
{
@@ -1957,8 +1952,8 @@ BlockDriver bdrv_file = {
.bdrv_co_get_block_status = raw_co_get_block_status,
.bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_readv = raw_co_readv,
+ .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
@@ -2405,8 +2400,8 @@ static BlockDriver bdrv_host_device = {
.create_opts = &raw_create_opts,
.bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_readv = raw_co_readv,
+ .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
@@ -2535,8 +2530,8 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_readv = raw_co_readv,
+ .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
@@ -2670,8 +2665,8 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_readv = raw_co_readv,
+ .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
--
1.8.3.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [Qemu-devel] [PATCH 5/6] raw-posix: Implement .bdrv_co_preadv/pwritev
2016-06-08 14:10 [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O Kevin Wolf
` (3 preceding siblings ...)
2016-06-08 14:10 ` [Qemu-devel] [PATCH 4/6] raw-posix: Switch to bdrv_co_* interfaces Kevin Wolf
@ 2016-06-08 14:10 ` Kevin Wolf
2016-06-08 15:38 ` Eric Blake
2016-06-14 12:09 ` Stefan Hajnoczi
2016-06-08 14:10 ` [Qemu-devel] [PATCH 6/6] block: Don't enforce 512 byte minimum alignment Kevin Wolf
2016-06-13 13:27 ` [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O Stefan Hajnoczi
6 siblings, 2 replies; 24+ messages in thread
From: Kevin Wolf @ 2016-06-08 14:10 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, eblake, pbonzini, mreitz, stefanha, famz, qemu-devel
The raw-posix block driver actually supports byte-aligned requests now
on non-O_DIRECT images, like it already (and previously incorrectly)
claimed in bs->request_alignment.
For some block drivers this means that a RMW cycle can be avoided when
they write sub-sector metadata e.g. for cluster allocation.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/linux-aio.c | 6 ++----
block/raw-aio.h | 2 +-
block/raw-posix.c | 42 ++++++++++++++++++++++--------------------
3 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 1a56543..8dc34db 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -272,14 +272,12 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
}
int laio_submit_co(BlockDriverState *bs, LinuxAioState *s, int fd,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type)
+ uint64_t offset, QEMUIOVector *qiov, int type)
{
- off_t offset = sector_num * 512;
int ret;
-
struct qemu_laiocb laiocb = {
.co = qemu_coroutine_self(),
- .nbytes = nb_sectors * 512,
+ .nbytes = qiov->size,
.ctx = s,
.is_read = (type == QEMU_AIO_READ),
.qiov = qiov,
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 1037502..3f5b8bb 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -39,7 +39,7 @@ typedef struct LinuxAioState LinuxAioState;
LinuxAioState *laio_init(void);
void laio_cleanup(LinuxAioState *s);
int laio_submit_co(BlockDriverState *bs, LinuxAioState *s, int fd,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type);
+ uint64_t offset, QEMUIOVector *qiov, int type);
BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque, int type);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index af7f69f..0db7876 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1325,8 +1325,8 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
-static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov, int type)
+static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int type)
{
BDRVRawState *s = bs->opaque;
@@ -1344,26 +1344,27 @@ static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num,
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
} else if (s->use_aio) {
- return laio_submit_co(bs, s->aio_ctx, s->fd, sector_num, qiov,
- nb_sectors, type);
+ assert(qiov->size == bytes);
+ return laio_submit_co(bs, s->aio_ctx, s->fd, offset, qiov, type);
#endif
}
}
- return paio_submit_co(bs, s->fd, sector_num * BDRV_SECTOR_SIZE, qiov,
- nb_sectors * BDRV_SECTOR_SIZE, type);
+ return paio_submit_co(bs, s->fd, offset, qiov, bytes, type);
}
-static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
- return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_READ);
+ return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
}
-static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
- return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_WRITE);
+ return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
}
static void raw_aio_plug(BlockDriverState *bs)
@@ -1952,8 +1953,8 @@ BlockDriver bdrv_file = {
.bdrv_co_get_block_status = raw_co_get_block_status,
.bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
- .bdrv_co_readv = raw_co_readv,
- .bdrv_co_writev = raw_co_writev,
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
@@ -2400,8 +2401,8 @@ static BlockDriver bdrv_host_device = {
.create_opts = &raw_create_opts,
.bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
- .bdrv_co_readv = raw_co_readv,
- .bdrv_co_writev = raw_co_writev,
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
@@ -2530,8 +2531,9 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_co_readv = raw_co_readv,
- .bdrv_co_writev = raw_co_writev,
+
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
@@ -2665,8 +2667,8 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_co_readv = raw_co_readv,
- .bdrv_co_writev = raw_co_writev,
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
--
1.8.3.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [Qemu-devel] [PATCH 6/6] block: Don't enforce 512 byte minimum alignment
2016-06-08 14:10 [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O Kevin Wolf
` (4 preceding siblings ...)
2016-06-08 14:10 ` [Qemu-devel] [PATCH 5/6] raw-posix: Implement .bdrv_co_preadv/pwritev Kevin Wolf
@ 2016-06-08 14:10 ` Kevin Wolf
2016-06-08 16:06 ` Eric Blake
2016-06-14 12:09 ` Stefan Hajnoczi
2016-06-13 13:27 ` [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O Stefan Hajnoczi
6 siblings, 2 replies; 24+ messages in thread
From: Kevin Wolf @ 2016-06-08 14:10 UTC (permalink / raw)
To: qemu-block; +Cc: kwolf, eblake, pbonzini, mreitz, stefanha, famz, qemu-devel
If block drivers say that they can do an alignment < 512 bytes, let's
just suppose they mean it. raw-posix used to be an offender with respect
to this, but it can actually deal with byte-aligned requests now.
The default is still 512 bytes for any drivers that only implement
sector-based interfaces, but it is 1 now for drivers that implement
.bdrv_co_preadv.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block.c | 2 +-
block/io.c | 8 +++-----
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/block.c b/block.c
index f54bc25..3d850a2 100644
--- a/block.c
+++ b/block.c
@@ -937,7 +937,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
goto fail_opts;
}
- bs->request_alignment = 512;
+ bs->request_alignment = drv->bdrv_co_preadv ? 1 : 512;
bs->zero_beyond_eof = true;
bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
diff --git a/block/io.c b/block/io.c
index 4af9c59..b3d6228 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1044,8 +1044,7 @@ int coroutine_fn bdrv_co_preadv(BlockDriverState *bs,
BlockDriver *drv = bs->drv;
BdrvTrackedRequest req;
- /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint64_t align = bs->request_alignment;
uint8_t *head_buf = NULL;
uint8_t *tail_buf = NULL;
QEMUIOVector local_qiov;
@@ -1296,7 +1295,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
uint8_t *buf = NULL;
QEMUIOVector local_qiov;
struct iovec iov;
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint64_t align = bs->request_alignment;
unsigned int head_padding_bytes, tail_padding_bytes;
int ret = 0;
@@ -1383,8 +1382,7 @@ int coroutine_fn bdrv_co_pwritev(BlockDriverState *bs,
BdrvRequestFlags flags)
{
BdrvTrackedRequest req;
- /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint64_t align = bs->request_alignment;
uint8_t *head_buf = NULL;
uint8_t *tail_buf = NULL;
QEMUIOVector local_qiov;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 1/6] block: Byte-based bdrv_co_do_copy_on_readv()
2016-06-08 14:10 ` [Qemu-devel] [PATCH 1/6] block: Byte-based bdrv_co_do_copy_on_readv() Kevin Wolf
@ 2016-06-08 14:25 ` Eric Blake
2016-06-14 12:09 ` Stefan Hajnoczi
1 sibling, 0 replies; 24+ messages in thread
From: Eric Blake @ 2016-06-08 14:25 UTC (permalink / raw)
To: Kevin Wolf, qemu-block; +Cc: pbonzini, mreitz, stefanha, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 1752 bytes --]
On 06/08/2016 08:10 AM, Kevin Wolf wrote:
> In a first step to convert the common I/O path to work on bytes rather
> than sectors, this converts the copy-on-read logic that is used by
> bdrv_aligned_preadv().
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> block/io.c | 63 +++++++++++++++++++++++++++++++--------------------
> block/mirror.c | 10 ++++----
> include/block/block.h | 10 +++++---
> 3 files changed, 51 insertions(+), 32 deletions(-)
>
> @@ -873,21 +893,20 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
> BlockDriver *drv = bs->drv;
> struct iovec iov;
> QEMUIOVector bounce_qiov;
> - int64_t cluster_sector_num;
> - int cluster_nb_sectors;
> + int64_t cluster_offset;
> + unsigned int cluster_bytes;
> size_t skip_bytes;
> int ret;
>
> /* Cover entire cluster so no additional backing file I/O is required when
> * allocating cluster in the image file.
> */
> - bdrv_round_to_clusters(bs, sector_num, nb_sectors,
> - &cluster_sector_num, &cluster_nb_sectors);
> + bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
>
> - trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
> - cluster_sector_num, cluster_nb_sectors);
> + trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
> + cluster_offset, cluster_bytes);
Missing patch to trace-events to advertise new semantics.
With that fixed,
Reviewed-by: Eric Blake <eblake@redhat.com>
--
Eric Blake eblake redhat com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 2/6] block: Prepare bdrv_aligned_preadv() for byte-aligned requests
2016-06-08 14:10 ` [Qemu-devel] [PATCH 2/6] block: Prepare bdrv_aligned_preadv() for byte-aligned requests Kevin Wolf
@ 2016-06-08 14:33 ` Eric Blake
2016-06-14 12:09 ` Stefan Hajnoczi
1 sibling, 0 replies; 24+ messages in thread
From: Eric Blake @ 2016-06-08 14:33 UTC (permalink / raw)
To: Kevin Wolf, qemu-block; +Cc: pbonzini, mreitz, stefanha, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 1871 bytes --]
On 06/08/2016 08:10 AM, Kevin Wolf wrote:
> This patch makes bdrv_aligned_preadv() ready to accept byte-aligned
> requests. Note that this doesn't mean that such requests are actually
> made. The caller still ensures that all requests are aligned to at least
> 512 bytes.
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> block/io.c | 41 +++++++++++++++++------------------------
> 1 file changed, 17 insertions(+), 24 deletions(-)
>
> diff --git a/block/io.c b/block/io.c
> index 3b34f20..2fd88cb 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -959,11 +959,6 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
> {
> int ret;
>
> - int64_t sector_num = offset >> BDRV_SECTOR_BITS;
> - unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
> -
> - assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
> - assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
My recent patch proposal conflicts here; I can drop (or rebase) mine,
but I think you want to keep the asserts, after modifying them to:
assert((offset & (align - 1)) == 0);
assert((bytes & (align - 1)) == 0);
and maybe add 'assert(is_power_of_2(align)); for good measure.
>
> - max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
> - align >> BDRV_SECTOR_BITS);
> - if (nb_sectors < max_nb_sectors) {
> + max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
> + if (bytes < max_bytes) {
Another one of my posted patches switches this to <= to avoid a harmless
off-by-one, also something I can rebase.
Whether you add the assertions, or leave it for me to (re-)add them in
my rebase,
Reviewed-by: Eric Blake <eblake@redhat.com>
--
Eric Blake eblake redhat com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 3/6] block: Prepare bdrv_aligned_pwritev() for byte-aligned requests
2016-06-08 14:10 ` [Qemu-devel] [PATCH 3/6] block: Prepare bdrv_aligned_pwritev() " Kevin Wolf
@ 2016-06-08 14:46 ` Eric Blake
2016-06-14 12:09 ` Stefan Hajnoczi
1 sibling, 0 replies; 24+ messages in thread
From: Eric Blake @ 2016-06-08 14:46 UTC (permalink / raw)
To: Kevin Wolf, qemu-block; +Cc: pbonzini, mreitz, stefanha, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 1665 bytes --]
On 06/08/2016 08:10 AM, Kevin Wolf wrote:
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> block/io.c | 13 +++++--------
> 1 file changed, 5 insertions(+), 8 deletions(-)
>
> diff --git a/block/io.c b/block/io.c
> index 2fd88cb..4af9c59 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -1241,11 +1241,9 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
> bool waited;
> int ret;
>
> - int64_t sector_num = offset >> BDRV_SECTOR_BITS;
> - unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
> + int64_t start_sector = offset >> BDRV_SECTOR_BITS;
> + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
>
> - assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
> - assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
I wonder if we should add an 'unsigned int align' parameter to this
function, to mirror bdrv_aligned_preadv() - that way, we can assert that
any divisions we do based on 'align' are indeed aligned. If we do that,
then just as in the previous patch, I think we would want to keep
'assert((offset & (align - 1)) == 0)'. But at the moment, I don't see
any divisions based on align, so I think you are okay.
> if (ret >= 0) {
> - bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
> + bs->total_sectors = MAX(bs->total_sectors, end_sector);
Someday, we may want bs->total_bytes instead of total_sectors, but
unrelated to this patch.
Looks clean:
Reviewed-by: Eric Blake <eblake@redhat.com>
--
Eric Blake eblake redhat com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 4/6] raw-posix: Switch to bdrv_co_* interfaces
2016-06-08 14:10 ` [Qemu-devel] [PATCH 4/6] raw-posix: Switch to bdrv_co_* interfaces Kevin Wolf
@ 2016-06-08 15:13 ` Eric Blake
2016-06-14 12:04 ` Stefan Hajnoczi
1 sibling, 0 replies; 24+ messages in thread
From: Eric Blake @ 2016-06-08 15:13 UTC (permalink / raw)
To: Kevin Wolf, qemu-block; +Cc: pbonzini, mreitz, stefanha, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 3123 bytes --]
On 06/08/2016 08:10 AM, Kevin Wolf wrote:
> In order to use the modern byte-based .bdrv_co_preadv/pwritev()
> interface, this patch switches raw-posix to coroutine-based interfaces
> as a first step. In terms of semantics and performance, it doesn't make
> a difference with the existing code whether we go from a coroutine to a
> callback-based interface already in block/io.c or only in linux-aio.c
>
> As there have been concerns in the past that this change may be a step
> in the wrong direction with respect to a possible AIO fast path, the
> old callback-based interface for linux-aio is left around and can be
> reactivated when a fast path (e.g. directly from virtio-blk dataplane,
> bypassing the whole block layer) is implemented.
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> block/linux-aio.c | 85 +++++++++++++++++++++++++++++++++++++++++--------------
> block/raw-aio.h | 2 ++
> block/raw-posix.c | 59 ++++++++++++++++++--------------------
> 3 files changed, 92 insertions(+), 54 deletions(-)
>
> +
> +int laio_submit_co(BlockDriverState *bs, LinuxAioState *s, int fd,
> + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type)
> +{
> + off_t offset = sector_num * 512;
BDRV_SECTOR_SIZE instead of a magic number?
> + int ret;
> +
> + struct qemu_laiocb laiocb = {
> + .co = qemu_coroutine_self(),
> + .nbytes = nb_sectors * 512,
and again
> +
> +BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
> + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
> + BlockCompletionFunc *cb, void *opaque, int type)
> +{
> + struct qemu_laiocb *laiocb;
> + off_t offset = sector_num * 512;
> + int ret;
> +
> + laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
> + laiocb->nbytes = nb_sectors * 512;
and again
> @@ -1345,14 +1344,26 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
> type |= QEMU_AIO_MISALIGNED;
> #ifdef CONFIG_LINUX_AIO
> } else if (s->use_aio) {
> - return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
> - nb_sectors, cb, opaque, type);
> + return laio_submit_co(bs, s->aio_ctx, s->fd, sector_num, qiov,
> + nb_sectors, type);
Indentation is now off
> @@ -1957,8 +1952,8 @@ BlockDriver bdrv_file = {
> .bdrv_co_get_block_status = raw_co_get_block_status,
> .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
>
> - .bdrv_aio_readv = raw_aio_readv,
> - .bdrv_aio_writev = raw_aio_writev,
> + .bdrv_co_readv = raw_co_readv,
> + .bdrv_co_writev = raw_co_writev,
> .bdrv_aio_flush = raw_aio_flush,
Why bother with indented alignment of '=' when none of the neighbors do?
Findings are minor enough, so up to you whether to fix them or just mark
this:
Reviewed-by: Eric Blake <eblake@redhat.com>
--
Eric Blake eblake redhat com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 5/6] raw-posix: Implement .bdrv_co_preadv/pwritev
2016-06-08 14:10 ` [Qemu-devel] [PATCH 5/6] raw-posix: Implement .bdrv_co_preadv/pwritev Kevin Wolf
@ 2016-06-08 15:38 ` Eric Blake
2016-06-14 12:09 ` Stefan Hajnoczi
1 sibling, 0 replies; 24+ messages in thread
From: Eric Blake @ 2016-06-08 15:38 UTC (permalink / raw)
To: Kevin Wolf, qemu-block; +Cc: pbonzini, mreitz, stefanha, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 4098 bytes --]
On 06/08/2016 08:10 AM, Kevin Wolf wrote:
> The raw-posix block driver actually supports byte-aligned requests now
> on non-O_DIRECT images, like it already (and previously incorrectly)
> claimed in bs->request_alignment.
>
> For some block drivers this means that a RMW cycle can be avoided when
> they write sub-sector metadata e.g. for cluster allocation.
[well, there's still probably a RMW going on, but it's being done by the
kernel, rather than qemu - and choice of caching may let the kernel
optimize things... not worth cluttering the commit message with this,
though]
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> +++ b/block/linux-aio.c
> @@ -272,14 +272,12 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
> }
>
> int laio_submit_co(BlockDriverState *bs, LinuxAioState *s, int fd,
> - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type)
> + uint64_t offset, QEMUIOVector *qiov, int type)
> {
> - off_t offset = sector_num * 512;
> int ret;
> -
> struct qemu_laiocb laiocb = {
> .co = qemu_coroutine_self(),
> - .nbytes = nb_sectors * 512,
> + .nbytes = qiov->size,
So for this interface, we require non-NULL qiov and no duplicated
length; I guess it isn't used for write_zeroes. We may still want to do
some consistency sweep to decide what level of NULL-ness we want for
representing write_zeroes, rather than ad hoc decisions at each layer of
the call stack, but that's a task for another day.
> @@ -1344,26 +1344,27 @@ static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num,
> type |= QEMU_AIO_MISALIGNED;
> #ifdef CONFIG_LINUX_AIO
> } else if (s->use_aio) {
> - return laio_submit_co(bs, s->aio_ctx, s->fd, sector_num, qiov,
> - nb_sectors, type);
> + assert(qiov->size == bytes);
Worth hoisting the assertion outside of the #ifdef?...
> + return laio_submit_co(bs, s->aio_ctx, s->fd, offset, qiov, type);
> #endif
> }
> }
>
> - return paio_submit_co(bs, s->fd, sector_num * BDRV_SECTOR_SIZE, qiov,
> - nb_sectors * BDRV_SECTOR_SIZE, type);
> + return paio_submit_co(bs, s->fd, offset, qiov, bytes, type);
...then again, paio_submit_co() also does the assert - and this is more
evidence of our inconsistency on whether we duplicate a separate bytes
parameter or reuse qiov->size.
>
> -static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
> - int nb_sectors, QEMUIOVector *qiov)
> +static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
> + uint64_t bytes, QEMUIOVector *qiov,
> + int flags)
> {
> - return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_READ);
> + return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
We ignore flags, but that's not a change in semantics. (Maybe someday
we need .supported_read_flags)
> }
>
> -static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
> - int nb_sectors, QEMUIOVector *qiov)
> +static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
> + uint64_t bytes, QEMUIOVector *qiov,
> + int flags)
> {
> - return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_WRITE);
> + return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
And here, we could assert(!flags) (since we intentionally don't set
.supported_write_flags) - but I won't insist.
None of my comments require a code change, other than a possible added
assertion, so:
Reviewed-by: Eric Blake <eblake@redhat.com>
--
Eric Blake eblake redhat com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 6/6] block: Don't enforce 512 byte minimum alignment
2016-06-08 14:10 ` [Qemu-devel] [PATCH 6/6] block: Don't enforce 512 byte minimum alignment Kevin Wolf
@ 2016-06-08 16:06 ` Eric Blake
2016-06-14 12:09 ` Stefan Hajnoczi
1 sibling, 0 replies; 24+ messages in thread
From: Eric Blake @ 2016-06-08 16:06 UTC (permalink / raw)
To: Kevin Wolf, qemu-block; +Cc: pbonzini, mreitz, stefanha, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 750 bytes --]
On 06/08/2016 08:10 AM, Kevin Wolf wrote:
> If block drivers say that they can do an alignment < 512 bytes, let's
> just suppose they mean it. raw-posix used to be an offender with respect
> to this, but it can actually deal with byte-aligned requests now.
>
> The default is still 512 bytes for any drivers that only implement
> sector-based interfaces, but it is 1 now for drivers that implement
> .bdrv_co_preadv.
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> block.c | 2 +-
> block/io.c | 8 +++-----
> 2 files changed, 4 insertions(+), 6 deletions(-)
>
Reviewed-by: Eric Blake <eblake@redhat.com>
--
Eric Blake eblake redhat com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O
2016-06-08 14:10 [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O Kevin Wolf
` (5 preceding siblings ...)
2016-06-08 14:10 ` [Qemu-devel] [PATCH 6/6] block: Don't enforce 512 byte minimum alignment Kevin Wolf
@ 2016-06-13 13:27 ` Stefan Hajnoczi
2016-06-13 13:43 ` Kevin Wolf
6 siblings, 1 reply; 24+ messages in thread
From: Stefan Hajnoczi @ 2016-06-13 13:27 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-block, eblake, pbonzini, mreitz, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 1370 bytes --]
On Wed, Jun 08, 2016 at 04:10:05PM +0200, Kevin Wolf wrote:
> Previous series have already converted some block drivers to byte-based rather
> than sector-based interfaces. However, the common I/O path as well as raw-posix
> still enforced a minimum alignment of 512 bytes because some sector-based logic
> was involved.
>
> This patch series removes these limitations and a sub-sector request actually
> ends up as a sub-sector syscall now.
>
> Kevin Wolf (6):
> block: Byte-based bdrv_co_do_copy_on_readv()
> block: Prepare bdrv_aligned_preadv() for byte-aligned requests
> block: Prepare bdrv_aligned_pwritev() for byte-aligned requests
> raw-posix: Switch to bdrv_co_* interfaces
> raw-posix: Implement .bdrv_co_preadv/pwritev
> block: Don't enforce 512 byte minimum alignment
>
> block.c | 2 +-
> block/io.c | 125 +++++++++++++++++++++++++-------------------------
> block/linux-aio.c | 83 ++++++++++++++++++++++++---------
> block/mirror.c | 10 ++--
> block/raw-aio.h | 2 +
> block/raw-posix.c | 61 ++++++++++++------------
> include/block/block.h | 10 ++--
> 7 files changed, 169 insertions(+), 124 deletions(-)
I've taken an initial look and it looks good. Will review next revision
in depth so it can be merged after Eric's comments have been addressed.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 473 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O
2016-06-13 13:27 ` [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O Stefan Hajnoczi
@ 2016-06-13 13:43 ` Kevin Wolf
2016-06-14 8:57 ` Stefan Hajnoczi
2016-06-14 12:10 ` Stefan Hajnoczi
0 siblings, 2 replies; 24+ messages in thread
From: Kevin Wolf @ 2016-06-13 13:43 UTC (permalink / raw)
To: Stefan Hajnoczi; +Cc: qemu-block, eblake, pbonzini, mreitz, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 1738 bytes --]
Am 13.06.2016 um 15:27 hat Stefan Hajnoczi geschrieben:
> On Wed, Jun 08, 2016 at 04:10:05PM +0200, Kevin Wolf wrote:
> > Previous series have already converted some block drivers to byte-based rather
> > than sector-based interfaces. However, the common I/O path as well as raw-posix
> > still enforced a minimum alignment of 512 bytes because some sector-based logic
> > was involved.
> >
> > This patch series removes these limitations and a sub-sector request actually
> > ends up as a sub-sector syscall now.
> >
> > Kevin Wolf (6):
> > block: Byte-based bdrv_co_do_copy_on_readv()
> > block: Prepare bdrv_aligned_preadv() for byte-aligned requests
> > block: Prepare bdrv_aligned_pwritev() for byte-aligned requests
> > raw-posix: Switch to bdrv_co_* interfaces
> > raw-posix: Implement .bdrv_co_preadv/pwritev
> > block: Don't enforce 512 byte minimum alignment
> >
> > block.c | 2 +-
> > block/io.c | 125 +++++++++++++++++++++++++-------------------------
> > block/linux-aio.c | 83 ++++++++++++++++++++++++---------
> > block/mirror.c | 10 ++--
> > block/raw-aio.h | 2 +
> > block/raw-posix.c | 61 ++++++++++++------------
> > include/block/block.h | 10 ++--
> > 7 files changed, 169 insertions(+), 124 deletions(-)
>
> I've taken an initial look and it looks good. Will review next revision
> in depth so it can be merged after Eric's comments have been addressed.
Eric commented a lot, but only requested very few minor changes that
wouldn't strictly require resending the series. If you think it's
worthwhile to send a v2 for them, I can do that, but it shouldn't make a
big difference for your review.
Kevin
[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O
2016-06-13 13:43 ` Kevin Wolf
@ 2016-06-14 8:57 ` Stefan Hajnoczi
2016-06-14 12:10 ` Stefan Hajnoczi
1 sibling, 0 replies; 24+ messages in thread
From: Stefan Hajnoczi @ 2016-06-14 8:57 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-block, eblake, pbonzini, mreitz, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 1893 bytes --]
On Mon, Jun 13, 2016 at 03:43:06PM +0200, Kevin Wolf wrote:
> Am 13.06.2016 um 15:27 hat Stefan Hajnoczi geschrieben:
> > On Wed, Jun 08, 2016 at 04:10:05PM +0200, Kevin Wolf wrote:
> > > Previous series have already converted some block drivers to byte-based rather
> > > than sector-based interfaces. However, the common I/O path as well as raw-posix
> > > still enforced a minimum alignment of 512 bytes because some sector-based logic
> > > was involved.
> > >
> > > This patch series removes these limitations and a sub-sector request actually
> > > ends up as a sub-sector syscall now.
> > >
> > > Kevin Wolf (6):
> > > block: Byte-based bdrv_co_do_copy_on_readv()
> > > block: Prepare bdrv_aligned_preadv() for byte-aligned requests
> > > block: Prepare bdrv_aligned_pwritev() for byte-aligned requests
> > > raw-posix: Switch to bdrv_co_* interfaces
> > > raw-posix: Implement .bdrv_co_preadv/pwritev
> > > block: Don't enforce 512 byte minimum alignment
> > >
> > > block.c | 2 +-
> > > block/io.c | 125 +++++++++++++++++++++++++-------------------------
> > > block/linux-aio.c | 83 ++++++++++++++++++++++++---------
> > > block/mirror.c | 10 ++--
> > > block/raw-aio.h | 2 +
> > > block/raw-posix.c | 61 ++++++++++++------------
> > > include/block/block.h | 10 ++--
> > > 7 files changed, 169 insertions(+), 124 deletions(-)
> >
> > I've taken an initial look and it looks good. Will review next revision
> > in depth so it can be merged after Eric's comments have been addressed.
>
> Eric commented a lot, but only requested very few minor changes that
> wouldn't strictly require resending the series. If you think it's
> worthwhile to send a v2 for them, I can do that, but it shouldn't make a
> big difference for your review.
Okay, I'll review v1.
Stefan
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 473 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 4/6] raw-posix: Switch to bdrv_co_* interfaces
2016-06-08 14:10 ` [Qemu-devel] [PATCH 4/6] raw-posix: Switch to bdrv_co_* interfaces Kevin Wolf
2016-06-08 15:13 ` Eric Blake
@ 2016-06-14 12:04 ` Stefan Hajnoczi
1 sibling, 0 replies; 24+ messages in thread
From: Stefan Hajnoczi @ 2016-06-14 12:04 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-block, eblake, pbonzini, mreitz, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 606 bytes --]
On Wed, Jun 08, 2016 at 04:10:09PM +0200, Kevin Wolf wrote:
> diff --git a/block/raw-aio.h b/block/raw-aio.h
> index 714714e..1037502 100644
> --- a/block/raw-aio.h
> +++ b/block/raw-aio.h
> @@ -38,6 +38,8 @@
> typedef struct LinuxAioState LinuxAioState;
> LinuxAioState *laio_init(void);
> void laio_cleanup(LinuxAioState *s);
> +int laio_submit_co(BlockDriverState *bs, LinuxAioState *s, int fd,
> + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type);
Please add coroutine_fn.
Please rename to laio_co_submit(), the naming convention is
bdrv_co_foo() instead of bdrv_foo_co().
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 473 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 6/6] block: Don't enforce 512 byte minimum alignment
2016-06-08 14:10 ` [Qemu-devel] [PATCH 6/6] block: Don't enforce 512 byte minimum alignment Kevin Wolf
2016-06-08 16:06 ` Eric Blake
@ 2016-06-14 12:09 ` Stefan Hajnoczi
2016-06-14 13:04 ` Kevin Wolf
1 sibling, 1 reply; 24+ messages in thread
From: Stefan Hajnoczi @ 2016-06-14 12:09 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-block, eblake, pbonzini, mreitz, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 483 bytes --]
On Wed, Jun 08, 2016 at 04:10:11PM +0200, Kevin Wolf wrote:
> diff --git a/block.c b/block.c
> index f54bc25..3d850a2 100644
> --- a/block.c
> +++ b/block.c
> @@ -937,7 +937,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
> goto fail_opts;
> }
>
> - bs->request_alignment = 512;
> + bs->request_alignment = drv->bdrv_co_preadv ? 1 : 512;
What happens in the raw-posix.c AIO case? There we should still use
512.
Stefan
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 473 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 1/6] block: Byte-based bdrv_co_do_copy_on_readv()
2016-06-08 14:10 ` [Qemu-devel] [PATCH 1/6] block: Byte-based bdrv_co_do_copy_on_readv() Kevin Wolf
2016-06-08 14:25 ` Eric Blake
@ 2016-06-14 12:09 ` Stefan Hajnoczi
1 sibling, 0 replies; 24+ messages in thread
From: Stefan Hajnoczi @ 2016-06-14 12:09 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-block, eblake, pbonzini, mreitz, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 562 bytes --]
On Wed, Jun 08, 2016 at 04:10:06PM +0200, Kevin Wolf wrote:
> In a first step to convert the common I/O path to work on bytes rather
> than sectors, this converts the copy-on-read logic that is used by
> bdrv_aligned_preadv().
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> block/io.c | 63 +++++++++++++++++++++++++++++++--------------------
> block/mirror.c | 10 ++++----
> include/block/block.h | 10 +++++---
> 3 files changed, 51 insertions(+), 32 deletions(-)
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 473 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 2/6] block: Prepare bdrv_aligned_preadv() for byte-aligned requests
2016-06-08 14:10 ` [Qemu-devel] [PATCH 2/6] block: Prepare bdrv_aligned_preadv() for byte-aligned requests Kevin Wolf
2016-06-08 14:33 ` Eric Blake
@ 2016-06-14 12:09 ` Stefan Hajnoczi
1 sibling, 0 replies; 24+ messages in thread
From: Stefan Hajnoczi @ 2016-06-14 12:09 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-block, eblake, pbonzini, mreitz, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 524 bytes --]
On Wed, Jun 08, 2016 at 04:10:07PM +0200, Kevin Wolf wrote:
> This patch makes bdrv_aligned_preadv() ready to accept byte-aligned
> requests. Note that this doesn't mean that such requests are actually
> made. The caller still ensures that all requests are aligned to at least
> 512 bytes.
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> block/io.c | 41 +++++++++++++++++------------------------
> 1 file changed, 17 insertions(+), 24 deletions(-)
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 473 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 3/6] block: Prepare bdrv_aligned_pwritev() for byte-aligned requests
2016-06-08 14:10 ` [Qemu-devel] [PATCH 3/6] block: Prepare bdrv_aligned_pwritev() " Kevin Wolf
2016-06-08 14:46 ` Eric Blake
@ 2016-06-14 12:09 ` Stefan Hajnoczi
1 sibling, 0 replies; 24+ messages in thread
From: Stefan Hajnoczi @ 2016-06-14 12:09 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-block, eblake, pbonzini, mreitz, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 249 bytes --]
On Wed, Jun 08, 2016 at 04:10:08PM +0200, Kevin Wolf wrote:
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> block/io.c | 13 +++++--------
> 1 file changed, 5 insertions(+), 8 deletions(-)
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 473 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 5/6] raw-posix: Implement .bdrv_co_preadv/pwritev
2016-06-08 14:10 ` [Qemu-devel] [PATCH 5/6] raw-posix: Implement .bdrv_co_preadv/pwritev Kevin Wolf
2016-06-08 15:38 ` Eric Blake
@ 2016-06-14 12:09 ` Stefan Hajnoczi
1 sibling, 0 replies; 24+ messages in thread
From: Stefan Hajnoczi @ 2016-06-14 12:09 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-block, eblake, pbonzini, mreitz, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 687 bytes --]
On Wed, Jun 08, 2016 at 04:10:10PM +0200, Kevin Wolf wrote:
> The raw-posix block driver actually supports byte-aligned requests now
> on non-O_DIRECT images, like it already (and previously incorrectly)
> claimed in bs->request_alignment.
>
> For some block drivers this means that a RMW cycle can be avoided when
> they write sub-sector metadata e.g. for cluster allocation.
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> block/linux-aio.c | 6 ++----
> block/raw-aio.h | 2 +-
> block/raw-posix.c | 42 ++++++++++++++++++++++--------------------
> 3 files changed, 25 insertions(+), 25 deletions(-)
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 473 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O
2016-06-13 13:43 ` Kevin Wolf
2016-06-14 8:57 ` Stefan Hajnoczi
@ 2016-06-14 12:10 ` Stefan Hajnoczi
1 sibling, 0 replies; 24+ messages in thread
From: Stefan Hajnoczi @ 2016-06-14 12:10 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-block, eblake, pbonzini, mreitz, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 1896 bytes --]
On Mon, Jun 13, 2016 at 03:43:06PM +0200, Kevin Wolf wrote:
> Am 13.06.2016 um 15:27 hat Stefan Hajnoczi geschrieben:
> > On Wed, Jun 08, 2016 at 04:10:05PM +0200, Kevin Wolf wrote:
> > > Previous series have already converted some block drivers to byte-based rather
> > > than sector-based interfaces. However, the common I/O path as well as raw-posix
> > > still enforced a minimum alignment of 512 bytes because some sector-based logic
> > > was involved.
> > >
> > > This patch series removes these limitations and a sub-sector request actually
> > > ends up as a sub-sector syscall now.
> > >
> > > Kevin Wolf (6):
> > > block: Byte-based bdrv_co_do_copy_on_readv()
> > > block: Prepare bdrv_aligned_preadv() for byte-aligned requests
> > > block: Prepare bdrv_aligned_pwritev() for byte-aligned requests
> > > raw-posix: Switch to bdrv_co_* interfaces
> > > raw-posix: Implement .bdrv_co_preadv/pwritev
> > > block: Don't enforce 512 byte minimum alignment
> > >
> > > block.c | 2 +-
> > > block/io.c | 125 +++++++++++++++++++++++++-------------------------
> > > block/linux-aio.c | 83 ++++++++++++++++++++++++---------
> > > block/mirror.c | 10 ++--
> > > block/raw-aio.h | 2 +
> > > block/raw-posix.c | 61 ++++++++++++------------
> > > include/block/block.h | 10 ++--
> > > 7 files changed, 169 insertions(+), 124 deletions(-)
> >
> > I've taken an initial look and it looks good. Will review next revision
> > in depth so it can be merged after Eric's comments have been addressed.
>
> Eric commented a lot, but only requested very few minor changes that
> wouldn't strictly require resending the series. If you think it's
> worthwhile to send a v2 for them, I can do that, but it shouldn't make a
> big difference for your review.
Done. Looks very close.
Stefan
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 473 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH 6/6] block: Don't enforce 512 byte minimum alignment
2016-06-14 12:09 ` Stefan Hajnoczi
@ 2016-06-14 13:04 ` Kevin Wolf
0 siblings, 0 replies; 24+ messages in thread
From: Kevin Wolf @ 2016-06-14 13:04 UTC (permalink / raw)
To: Stefan Hajnoczi; +Cc: qemu-block, eblake, pbonzini, mreitz, famz, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 701 bytes --]
Am 14.06.2016 um 14:09 hat Stefan Hajnoczi geschrieben:
> On Wed, Jun 08, 2016 at 04:10:11PM +0200, Kevin Wolf wrote:
> > diff --git a/block.c b/block.c
> > index f54bc25..3d850a2 100644
> > --- a/block.c
> > +++ b/block.c
> > @@ -937,7 +937,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
> > goto fail_opts;
> > }
> >
> > - bs->request_alignment = 512;
> > + bs->request_alignment = drv->bdrv_co_preadv ? 1 : 512;
>
> What happens in the raw-posix.c AIO case? There we should still use
> 512.
I'm only changing the default value here. raw-posix already overrides
bs->request_alignment as needed, see raw_probe_alignment().
Kevin
[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2016-06-14 13:04 UTC | newest]
Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-08 14:10 [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O Kevin Wolf
2016-06-08 14:10 ` [Qemu-devel] [PATCH 1/6] block: Byte-based bdrv_co_do_copy_on_readv() Kevin Wolf
2016-06-08 14:25 ` Eric Blake
2016-06-14 12:09 ` Stefan Hajnoczi
2016-06-08 14:10 ` [Qemu-devel] [PATCH 2/6] block: Prepare bdrv_aligned_preadv() for byte-aligned requests Kevin Wolf
2016-06-08 14:33 ` Eric Blake
2016-06-14 12:09 ` Stefan Hajnoczi
2016-06-08 14:10 ` [Qemu-devel] [PATCH 3/6] block: Prepare bdrv_aligned_pwritev() " Kevin Wolf
2016-06-08 14:46 ` Eric Blake
2016-06-14 12:09 ` Stefan Hajnoczi
2016-06-08 14:10 ` [Qemu-devel] [PATCH 4/6] raw-posix: Switch to bdrv_co_* interfaces Kevin Wolf
2016-06-08 15:13 ` Eric Blake
2016-06-14 12:04 ` Stefan Hajnoczi
2016-06-08 14:10 ` [Qemu-devel] [PATCH 5/6] raw-posix: Implement .bdrv_co_preadv/pwritev Kevin Wolf
2016-06-08 15:38 ` Eric Blake
2016-06-14 12:09 ` Stefan Hajnoczi
2016-06-08 14:10 ` [Qemu-devel] [PATCH 6/6] block: Don't enforce 512 byte minimum alignment Kevin Wolf
2016-06-08 16:06 ` Eric Blake
2016-06-14 12:09 ` Stefan Hajnoczi
2016-06-14 13:04 ` Kevin Wolf
2016-06-13 13:27 ` [Qemu-devel] [PATCH 0/6] block: Enable byte granularity I/O Stefan Hajnoczi
2016-06-13 13:43 ` Kevin Wolf
2016-06-14 8:57 ` Stefan Hajnoczi
2016-06-14 12:10 ` Stefan Hajnoczi
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.