* [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
@ 2015-04-02 13:37 Paolo Bonzini
2015-04-02 14:39 ` Fam Zheng
0 siblings, 1 reply; 9+ messages in thread
From: Paolo Bonzini @ 2015-04-02 13:37 UTC (permalink / raw)
To: qemu-devel
Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst, dgilbert,
peter.huangpeng, arei.gonglei, stefanha, amit.shah, dgibson
After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
the zero size ultimately is used to compute virtqueue_push's len
argument. Therefore, reads from virtio-blk devices did not
migrate their results correctly. (Writes were okay).
Save the size in submit_requests, and use it when the request is
completed.
Based on a patch by Wen Congyang.
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/block/dataplane/virtio-blk.c | 2 +-
hw/block/virtio-blk.c | 16 +++++++++++++++-
include/hw/virtio/virtio-blk.h | 1 +
3 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index cd41478..b37ede3 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -78,7 +78,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
stb_p(&req->in->status, status);
vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
- req->qiov.size + sizeof(*req->in));
+ req->read_size + sizeof(*req->in));
/* Suppress notification to guest by BH and its scheduled
* flag because requests are completed as a batch after io
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 000c38d..2f00dc4 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
req->dev = s;
req->qiov.size = 0;
+ req->read_size = 0;
req->next = NULL;
req->mr_next = NULL;
return req;
@@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
trace_virtio_blk_req_complete(req, status);
stb_p(&req->in->status, status);
- virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
+ virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in));
virtio_notify(vdev, s->vq);
}
@@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
if (ret) {
int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
bool is_read = !(p & VIRTIO_BLK_T_OUT);
+ /* Note that memory may be dirtied on read failure. If the
+ * virtio request is not completed here, as is the case for
+ * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
+ * correctly during live migration. While this is ugly,
+ * it is acceptable because the device is free to write to
+ * the memory until the request is completed (which will
+ * happen on the other side of the migration).
+ */
if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
continue;
}
@@ -348,9 +357,14 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
}
if (is_write) {
+ mrb->reqs[start]->read_size = 0;
blk_aio_writev(blk, sector_num, qiov, nb_sectors,
virtio_blk_rw_complete, mrb->reqs[start]);
} else {
+ /* Save old qiov->size, which will be used in
+ * virtio_blk_complete_request()
+ */
+ mrb->reqs[start]->read_size = qiov->size;
blk_aio_readv(blk, sector_num, qiov, nb_sectors,
virtio_blk_rw_complete, mrb->reqs[start]);
}
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index b3ffcd9..d73ec06 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
struct virtio_blk_inhdr *in;
struct virtio_blk_outhdr out;
QEMUIOVector qiov;
+ size_t read_size;
struct VirtIOBlockReq *next;
struct VirtIOBlockReq *mr_next;
BlockAcctCookie acct;
--
2.3.4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
2015-04-02 13:37 [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory Paolo Bonzini
@ 2015-04-02 14:39 ` Fam Zheng
2015-04-02 14:51 ` Paolo Bonzini
0 siblings, 1 reply; 9+ messages in thread
From: Fam Zheng @ 2015-04-02 14:39 UTC (permalink / raw)
To: Paolo Bonzini
Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
peter.huangpeng
On Thu, 04/02 15:37, Paolo Bonzini wrote:
> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
> the zero size ultimately is used to compute virtqueue_push's len
> argument. Therefore, reads from virtio-blk devices did not
> migrate their results correctly. (Writes were okay).
Can't we move qemu_iovec_destroy to virtio_blk_free_request?
Fam
>
> Save the size in submit_requests, and use it when the request is
> completed.
>
> Based on a patch by Wen Congyang.
>
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> hw/block/dataplane/virtio-blk.c | 2 +-
> hw/block/virtio-blk.c | 16 +++++++++++++++-
> include/hw/virtio/virtio-blk.h | 1 +
> 3 files changed, 17 insertions(+), 2 deletions(-)
>
> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> index cd41478..b37ede3 100644
> --- a/hw/block/dataplane/virtio-blk.c
> +++ b/hw/block/dataplane/virtio-blk.c
> @@ -78,7 +78,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
> stb_p(&req->in->status, status);
>
> vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
> - req->qiov.size + sizeof(*req->in));
> + req->read_size + sizeof(*req->in));
>
> /* Suppress notification to guest by BH and its scheduled
> * flag because requests are completed as a batch after io
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 000c38d..2f00dc4 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
> VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
> req->dev = s;
> req->qiov.size = 0;
> + req->read_size = 0;
> req->next = NULL;
> req->mr_next = NULL;
> return req;
> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
> trace_virtio_blk_req_complete(req, status);
>
> stb_p(&req->in->status, status);
> - virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
> + virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in));
> virtio_notify(vdev, s->vq);
> }
>
> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
> if (ret) {
> int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
> bool is_read = !(p & VIRTIO_BLK_T_OUT);
> + /* Note that memory may be dirtied on read failure. If the
> + * virtio request is not completed here, as is the case for
> + * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
> + * correctly during live migration. While this is ugly,
> + * it is acceptable because the device is free to write to
> + * the memory until the request is completed (which will
> + * happen on the other side of the migration).
> + */
> if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
> continue;
> }
> @@ -348,9 +357,14 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
> }
>
> if (is_write) {
> + mrb->reqs[start]->read_size = 0;
> blk_aio_writev(blk, sector_num, qiov, nb_sectors,
> virtio_blk_rw_complete, mrb->reqs[start]);
> } else {
> + /* Save old qiov->size, which will be used in
> + * virtio_blk_complete_request()
> + */
> + mrb->reqs[start]->read_size = qiov->size;
> blk_aio_readv(blk, sector_num, qiov, nb_sectors,
> virtio_blk_rw_complete, mrb->reqs[start]);
> }
> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
> index b3ffcd9..d73ec06 100644
> --- a/include/hw/virtio/virtio-blk.h
> +++ b/include/hw/virtio/virtio-blk.h
> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
> struct virtio_blk_inhdr *in;
> struct virtio_blk_outhdr out;
> QEMUIOVector qiov;
> + size_t read_size;
> struct VirtIOBlockReq *next;
> struct VirtIOBlockReq *mr_next;
> BlockAcctCookie acct;
> --
> 2.3.4
>
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
2015-04-02 14:39 ` Fam Zheng
@ 2015-04-02 14:51 ` Paolo Bonzini
2015-04-02 15:16 ` Fam Zheng
0 siblings, 1 reply; 9+ messages in thread
From: Paolo Bonzini @ 2015-04-02 14:51 UTC (permalink / raw)
To: Fam Zheng
Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
peter.huangpeng
On 02/04/2015 16:39, Fam Zheng wrote:
> On Thu, 04/02 15:37, Paolo Bonzini wrote:
>> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
>> the zero size ultimately is used to compute virtqueue_push's len
>> argument. Therefore, reads from virtio-blk devices did not
>> migrate their results correctly. (Writes were okay).
>
> Can't we move qemu_iovec_destroy to virtio_blk_free_request?
You would still have to add more code to differentiate reads and
writes---I think.
Paolo
> Fam
>
>>
>> Save the size in submit_requests, and use it when the request is
>> completed.
>>
>> Based on a patch by Wen Congyang.
>>
>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>> hw/block/dataplane/virtio-blk.c | 2 +-
>> hw/block/virtio-blk.c | 16 +++++++++++++++-
>> include/hw/virtio/virtio-blk.h | 1 +
>> 3 files changed, 17 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
>> index cd41478..b37ede3 100644
>> --- a/hw/block/dataplane/virtio-blk.c
>> +++ b/hw/block/dataplane/virtio-blk.c
>> @@ -78,7 +78,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
>> stb_p(&req->in->status, status);
>>
>> vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
>> - req->qiov.size + sizeof(*req->in));
>> + req->read_size + sizeof(*req->in));
>>
>> /* Suppress notification to guest by BH and its scheduled
>> * flag because requests are completed as a batch after io
>> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
>> index 000c38d..2f00dc4 100644
>> --- a/hw/block/virtio-blk.c
>> +++ b/hw/block/virtio-blk.c
>> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
>> VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
>> req->dev = s;
>> req->qiov.size = 0;
>> + req->read_size = 0;
>> req->next = NULL;
>> req->mr_next = NULL;
>> return req;
>> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
>> trace_virtio_blk_req_complete(req, status);
>>
>> stb_p(&req->in->status, status);
>> - virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
>> + virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in));
>> virtio_notify(vdev, s->vq);
>> }
>>
>> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>> if (ret) {
>> int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
>> bool is_read = !(p & VIRTIO_BLK_T_OUT);
>> + /* Note that memory may be dirtied on read failure. If the
>> + * virtio request is not completed here, as is the case for
>> + * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
>> + * correctly during live migration. While this is ugly,
>> + * it is acceptable because the device is free to write to
>> + * the memory until the request is completed (which will
>> + * happen on the other side of the migration).
>> + */
>> if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
>> continue;
>> }
>> @@ -348,9 +357,14 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
>> }
>>
>> if (is_write) {
>> + mrb->reqs[start]->read_size = 0;
>> blk_aio_writev(blk, sector_num, qiov, nb_sectors,
>> virtio_blk_rw_complete, mrb->reqs[start]);
>> } else {
>> + /* Save old qiov->size, which will be used in
>> + * virtio_blk_complete_request()
>> + */
>> + mrb->reqs[start]->read_size = qiov->size;
>> blk_aio_readv(blk, sector_num, qiov, nb_sectors,
>> virtio_blk_rw_complete, mrb->reqs[start]);
>> }
>> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
>> index b3ffcd9..d73ec06 100644
>> --- a/include/hw/virtio/virtio-blk.h
>> +++ b/include/hw/virtio/virtio-blk.h
>> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
>> struct virtio_blk_inhdr *in;
>> struct virtio_blk_outhdr out;
>> QEMUIOVector qiov;
>> + size_t read_size;
>> struct VirtIOBlockReq *next;
>> struct VirtIOBlockReq *mr_next;
>> BlockAcctCookie acct;
>> --
>> 2.3.4
>>
>>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
2015-04-02 14:51 ` Paolo Bonzini
@ 2015-04-02 15:16 ` Fam Zheng
2015-04-02 15:21 ` Paolo Bonzini
0 siblings, 1 reply; 9+ messages in thread
From: Fam Zheng @ 2015-04-02 15:16 UTC (permalink / raw)
To: Paolo Bonzini
Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
peter.huangpeng
On Thu, 04/02 16:51, Paolo Bonzini wrote:
>
>
> On 02/04/2015 16:39, Fam Zheng wrote:
> > On Thu, 04/02 15:37, Paolo Bonzini wrote:
> >> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
> >> the zero size ultimately is used to compute virtqueue_push's len
> >> argument. Therefore, reads from virtio-blk devices did not
> >> migrate their results correctly. (Writes were okay).
> >
> > Can't we move qemu_iovec_destroy to virtio_blk_free_request?
>
> You would still have to add more code to differentiate reads and
> writes---I think.
Yeah, but the extra field will not be needed.
Fam
>
> Paolo
>
> > Fam
> >
> >>
> >> Save the size in submit_requests, and use it when the request is
> >> completed.
> >>
> >> Based on a patch by Wen Congyang.
> >>
> >> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> >> ---
> >> hw/block/dataplane/virtio-blk.c | 2 +-
> >> hw/block/virtio-blk.c | 16 +++++++++++++++-
> >> include/hw/virtio/virtio-blk.h | 1 +
> >> 3 files changed, 17 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> >> index cd41478..b37ede3 100644
> >> --- a/hw/block/dataplane/virtio-blk.c
> >> +++ b/hw/block/dataplane/virtio-blk.c
> >> @@ -78,7 +78,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
> >> stb_p(&req->in->status, status);
> >>
> >> vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
> >> - req->qiov.size + sizeof(*req->in));
> >> + req->read_size + sizeof(*req->in));
> >>
> >> /* Suppress notification to guest by BH and its scheduled
> >> * flag because requests are completed as a batch after io
> >> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> >> index 000c38d..2f00dc4 100644
> >> --- a/hw/block/virtio-blk.c
> >> +++ b/hw/block/virtio-blk.c
> >> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
> >> VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
> >> req->dev = s;
> >> req->qiov.size = 0;
> >> + req->read_size = 0;
> >> req->next = NULL;
> >> req->mr_next = NULL;
> >> return req;
> >> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
> >> trace_virtio_blk_req_complete(req, status);
> >>
> >> stb_p(&req->in->status, status);
> >> - virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
> >> + virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in));
> >> virtio_notify(vdev, s->vq);
> >> }
> >>
> >> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
> >> if (ret) {
> >> int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
> >> bool is_read = !(p & VIRTIO_BLK_T_OUT);
> >> + /* Note that memory may be dirtied on read failure. If the
> >> + * virtio request is not completed here, as is the case for
> >> + * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
> >> + * correctly during live migration. While this is ugly,
> >> + * it is acceptable because the device is free to write to
> >> + * the memory until the request is completed (which will
> >> + * happen on the other side of the migration).
> >> + */
> >> if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
> >> continue;
> >> }
> >> @@ -348,9 +357,14 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
> >> }
> >>
> >> if (is_write) {
> >> + mrb->reqs[start]->read_size = 0;
> >> blk_aio_writev(blk, sector_num, qiov, nb_sectors,
> >> virtio_blk_rw_complete, mrb->reqs[start]);
> >> } else {
> >> + /* Save old qiov->size, which will be used in
> >> + * virtio_blk_complete_request()
> >> + */
> >> + mrb->reqs[start]->read_size = qiov->size;
> >> blk_aio_readv(blk, sector_num, qiov, nb_sectors,
> >> virtio_blk_rw_complete, mrb->reqs[start]);
> >> }
> >> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
> >> index b3ffcd9..d73ec06 100644
> >> --- a/include/hw/virtio/virtio-blk.h
> >> +++ b/include/hw/virtio/virtio-blk.h
> >> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
> >> struct virtio_blk_inhdr *in;
> >> struct virtio_blk_outhdr out;
> >> QEMUIOVector qiov;
> >> + size_t read_size;
> >> struct VirtIOBlockReq *next;
> >> struct VirtIOBlockReq *mr_next;
> >> BlockAcctCookie acct;
> >> --
> >> 2.3.4
> >>
> >>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
2015-04-02 15:16 ` Fam Zheng
@ 2015-04-02 15:21 ` Paolo Bonzini
2015-04-02 16:26 ` Fam Zheng
0 siblings, 1 reply; 9+ messages in thread
From: Paolo Bonzini @ 2015-04-02 15:21 UTC (permalink / raw)
To: Fam Zheng
Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
peter.huangpeng
On 02/04/2015 17:16, Fam Zheng wrote:
>>>> > >> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
>>>> > >> the zero size ultimately is used to compute virtqueue_push's len
>>>> > >> argument. Therefore, reads from virtio-blk devices did not
>>>> > >> migrate their results correctly. (Writes were okay).
>>> > >
>>> > > Can't we move qemu_iovec_destroy to virtio_blk_free_request?
>> >
>> > You would still have to add more code to differentiate reads and
>> > writes---I think.
> Yeah, but the extra field will not be needed.
Can you post an alternative patch? One small complication is that
is_write is in mrb but not in mrb->reqs[x]. virtio_blk_rw_complete is
already doing
int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
bool is_read = !(p & VIRTIO_BLK_T_OUT);
but only in a slow path.
Paolo
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
2015-04-02 15:21 ` Paolo Bonzini
@ 2015-04-02 16:26 ` Fam Zheng
2015-04-02 16:36 ` Paolo Bonzini
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Fam Zheng @ 2015-04-02 16:26 UTC (permalink / raw)
To: Paolo Bonzini
Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
peter.huangpeng
On Thu, 04/02 17:21, Paolo Bonzini wrote:
>
>
> On 02/04/2015 17:16, Fam Zheng wrote:
> >>>> > >> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
> >>>> > >> the zero size ultimately is used to compute virtqueue_push's len
> >>>> > >> argument. Therefore, reads from virtio-blk devices did not
> >>>> > >> migrate their results correctly. (Writes were okay).
> >>> > >
> >>> > > Can't we move qemu_iovec_destroy to virtio_blk_free_request?
> >> >
> >> > You would still have to add more code to differentiate reads and
> >> > writes---I think.
> > Yeah, but the extra field will not be needed.
>
> Can you post an alternative patch? One small complication is that
> is_write is in mrb but not in mrb->reqs[x]. virtio_blk_rw_complete is
> already doing
>
> int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
> bool is_read = !(p & VIRTIO_BLK_T_OUT);
>
> but only in a slow path.
OK, so it looks like a new field is the simplest way to achieve.
There is another problem with your patch - read_size is not initialized in
non-RW paths like scsi and flush.
I think the optimization for write is a separate thing, though. Shouldn't below
patch already fix the migration issue?
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 000c38d..ee6e198 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -92,13 +92,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
next = req->mr_next;
trace_virtio_blk_rw_complete(req, ret);
- if (req->qiov.nalloc != -1) {
- /* If nalloc is != 1 req->qiov is a local copy of the original
- * external iovec. It was allocated in submit_merged_requests
- * to be able to merge requests. */
- qemu_iovec_destroy(&req->qiov);
- }
-
if (ret) {
int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
bool is_read = !(p & VIRTIO_BLK_T_OUT);
@@ -109,6 +102,13 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
+
+ if (req->qiov.nalloc != -1) {
+ /* This means req->qiov is a local copy of the original external
+ * iovec. It was allocated in virtio_blk_submit_multireq in order
+ * to merge requests. */
+ qemu_iovec_destroy(&req->qiov);
+ }
virtio_blk_free_request(req);
}
}
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
2015-04-02 16:26 ` Fam Zheng
@ 2015-04-02 16:36 ` Paolo Bonzini
2015-04-03 1:22 ` Wen Congyang
2015-04-03 2:47 ` Bin Wu
2 siblings, 0 replies; 9+ messages in thread
From: Paolo Bonzini @ 2015-04-02 16:36 UTC (permalink / raw)
To: Fam Zheng
Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
peter.huangpeng
On 02/04/2015 18:26, Fam Zheng wrote:
> There is another problem with your patch - read_size is not initialized in
> non-RW paths like scsi and flush.
Right, but...
> I think the optimization for write is a separate thing, though. Shouldn't below
> patch already fix the migration issue?
... it also doesn't cover SCSI, does it?
Paolo
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
2015-04-02 16:26 ` Fam Zheng
2015-04-02 16:36 ` Paolo Bonzini
@ 2015-04-03 1:22 ` Wen Congyang
2015-04-03 2:47 ` Bin Wu
2 siblings, 0 replies; 9+ messages in thread
From: Wen Congyang @ 2015-04-03 1:22 UTC (permalink / raw)
To: Fam Zheng, Paolo Bonzini
Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
peter.huangpeng
On 04/03/2015 12:26 AM, Fam Zheng wrote:
> On Thu, 04/02 17:21, Paolo Bonzini wrote:
>>
>>
>> On 02/04/2015 17:16, Fam Zheng wrote:
>>>>>>>>> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
>>>>>>>>> the zero size ultimately is used to compute virtqueue_push's len
>>>>>>>>> argument. Therefore, reads from virtio-blk devices did not
>>>>>>>>> migrate their results correctly. (Writes were okay).
>>>>>>>
>>>>>>> Can't we move qemu_iovec_destroy to virtio_blk_free_request?
>>>>>
>>>>> You would still have to add more code to differentiate reads and
>>>>> writes---I think.
>>> Yeah, but the extra field will not be needed.
>>
>> Can you post an alternative patch? One small complication is that
>> is_write is in mrb but not in mrb->reqs[x]. virtio_blk_rw_complete is
>> already doing
>>
>> int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
>> bool is_read = !(p & VIRTIO_BLK_T_OUT);
>>
>> but only in a slow path.
>
> OK, so it looks like a new field is the simplest way to achieve.
>
> There is another problem with your patch - read_size is not initialized in
> non-RW paths like scsi and flush.
>
> I think the optimization for write is a separate thing, though. Shouldn't below
> patch already fix the migration issue?
>
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 000c38d..ee6e198 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -92,13 +92,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
> next = req->mr_next;
> trace_virtio_blk_rw_complete(req, ret);
>
> - if (req->qiov.nalloc != -1) {
> - /* If nalloc is != 1 req->qiov is a local copy of the original
> - * external iovec. It was allocated in submit_merged_requests
> - * to be able to merge requests. */
> - qemu_iovec_destroy(&req->qiov);
> - }
> -
> if (ret) {
> int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
> bool is_read = !(p & VIRTIO_BLK_T_OUT);
> @@ -109,6 +102,13 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>
> virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
> block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
> +
> + if (req->qiov.nalloc != -1) {
> + /* This means req->qiov is a local copy of the original external
> + * iovec. It was allocated in virtio_blk_submit_multireq in order
> + * to merge requests. */
> + qemu_iovec_destroy(&req->qiov);
> + }
We will not come here on I/O failure. It will cause memory leak.
Thanks
Wen Congyang
> virtio_blk_free_request(req);
> }
> }
>
>
> .
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
2015-04-02 16:26 ` Fam Zheng
2015-04-02 16:36 ` Paolo Bonzini
2015-04-03 1:22 ` Wen Congyang
@ 2015-04-03 2:47 ` Bin Wu
2 siblings, 0 replies; 9+ messages in thread
From: Bin Wu @ 2015-04-03 2:47 UTC (permalink / raw)
To: Fam Zheng, Paolo Bonzini
Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
peter.huangpeng
On 2015/4/3 0:26, Fam Zheng wrote:
> On Thu, 04/02 17:21, Paolo Bonzini wrote:
>>
>>
>> On 02/04/2015 17:16, Fam Zheng wrote:
>>>>>>>>> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
>>>>>>>>> the zero size ultimately is used to compute virtqueue_push's len
>>>>>>>>> argument. Therefore, reads from virtio-blk devices did not
>>>>>>>>> migrate their results correctly. (Writes were okay).
>>>>>>>
>>>>>>> Can't we move qemu_iovec_destroy to virtio_blk_free_request?
>>>>>
>>>>> You would still have to add more code to differentiate reads and
>>>>> writes---I think.
>>> Yeah, but the extra field will not be needed.
>>
>> Can you post an alternative patch? One small complication is that
>> is_write is in mrb but not in mrb->reqs[x]. virtio_blk_rw_complete is
>> already doing
>>
>> int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
>> bool is_read = !(p & VIRTIO_BLK_T_OUT);
>>
>> but only in a slow path.
>
> OK, so it looks like a new field is the simplest way to achieve.
>
> There is another problem with your patch - read_size is not initialized in
> non-RW paths like scsi and flush.
>
> I think the optimization for write is a separate thing, though. Shouldn't below
> patch already fix the migration issue?
>
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 000c38d..ee6e198 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -92,13 +92,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
> next = req->mr_next;
> trace_virtio_blk_rw_complete(req, ret);
>
> - if (req->qiov.nalloc != -1) {
> - /* If nalloc is != 1 req->qiov is a local copy of the original
> - * external iovec. It was allocated in submit_merged_requests
> - * to be able to merge requests. */
> - qemu_iovec_destroy(&req->qiov);
> - }
> -
> if (ret) {
> int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
> bool is_read = !(p & VIRTIO_BLK_T_OUT);
> @@ -109,6 +102,13 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>
> virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
> block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
> +
> + if (req->qiov.nalloc != -1) {
> + /* This means req->qiov is a local copy of the original external
> + * iovec. It was allocated in virtio_blk_submit_multireq in order
> + * to merge requests. */
> + qemu_iovec_destroy(&req->qiov);
> + }
> virtio_blk_free_request(req);
> }
> }
>
>
>
> .
>
Can we allocate a new request for the merged requests?
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 000c38d..d39381f 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -92,11 +92,10 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
next = req->mr_next;
trace_virtio_blk_rw_complete(req, ret);
- if (req->qiov.nalloc != -1) {
- /* If nalloc is != 1 req->qiov is a local copy of the original
- * external iovec. It was allocated in submit_merged_requests
- * to be able to merge requests. */
+ if (req->in == NULL) {
qemu_iovec_destroy(&req->qiov);
+ virtio_blk_free_request(req);
+ continue;
}
if (ret) {
@@ -313,29 +312,33 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
int start, int num_reqs, int niov)
{
- QEMUIOVector *qiov = &mrb->reqs[start]->qiov;
+ VirtIOBlockReq *merged_request;
+ QEMUIOVector *qiov;
int64_t sector_num = mrb->reqs[start]->sector_num;
- int nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE;
+ int nb_sectors = 0;
bool is_write = mrb->is_write;
if (num_reqs > 1) {
int i;
- struct iovec *tmp_iov = qiov->iov;
- int tmp_niov = qiov->niov;
- /* mrb->reqs[start]->qiov was initialized from external so we can't
- * modifiy it here. We need to initialize it locally and then add the
- * external iovecs. */
- qemu_iovec_init(qiov, niov)
+ merged_request = virtio_blk_alloc_request(mrb->reqs[start]->dev);
- for (i = 0; i < tmp_niov; i++) {
- qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len);
- }
+ /* use the 'in' field to judge whether the request is
+ a merged request */
+ merged_request->in = NULL;
+
+ qiov = &merged_request->qiov;
+ qemu_iovec_init(qiov, niov);
- for (i = start + 1; i < start + num_reqs; i++) {
+ for (i = start; i < start + num_reqs; i++) {
qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0,
mrb->reqs[i]->qiov.size);
- mrb->reqs[i - 1]->mr_next = mrb->reqs[i];
+ if (i > start) {
+ mrb->reqs[i - 1]->mr_next = mrb->reqs[i];
+ } else {
+ merged_request->mr_next = mrb->reqs[i];
+ }
+
nb_sectors += mrb->reqs[i]->qiov.size / BDRV_SECTOR_SIZE;
}
assert(nb_sectors == qiov->size / BDRV_SECTOR_SIZE);
@@ -345,14 +348,18 @@ static inline void submit_requests(BlockBackend *blk,
MultiReqBuffer *mrb,
block_acct_merge_done(blk_get_stats(blk),
is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ,
num_reqs - 1);
+ } else {
+ merged_request = mrb->reqs[start];
+ qiov = &mrb->reqs[start]->qiov;
+ nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE;
}
if (is_write) {
blk_aio_writev(blk, sector_num, qiov, nb_sectors,
- virtio_blk_rw_complete, mrb->reqs[start]);
+ virtio_blk_rw_complete, merged_request);
} else {
blk_aio_readv(blk, sector_num, qiov, nb_sectors,
- virtio_blk_rw_complete, mrb->reqs[start]);
+ virtio_blk_rw_complete, merged_request);
}
}
--
Bin Wu
^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2015-04-03 2:48 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-02 13:37 [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory Paolo Bonzini
2015-04-02 14:39 ` Fam Zheng
2015-04-02 14:51 ` Paolo Bonzini
2015-04-02 15:16 ` Fam Zheng
2015-04-02 15:21 ` Paolo Bonzini
2015-04-02 16:26 ` Fam Zheng
2015-04-02 16:36 ` Paolo Bonzini
2015-04-03 1:22 ` Wen Congyang
2015-04-03 2:47 ` Bin Wu
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.