All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
@ 2015-04-02 13:37 Paolo Bonzini
  2015-04-02 14:39 ` Fam Zheng
  0 siblings, 1 reply; 9+ messages in thread
From: Paolo Bonzini @ 2015-04-02 13:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst, dgilbert,
	peter.huangpeng, arei.gonglei, stefanha, amit.shah, dgibson

After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
the zero size ultimately is used to compute virtqueue_push's len
argument.  Therefore, reads from virtio-blk devices did not
migrate their results correctly.  (Writes were okay).

Save the size in submit_requests, and use it when the request is
completed.

Based on a patch by Wen Congyang.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/block/dataplane/virtio-blk.c |  2 +-
 hw/block/virtio-blk.c           | 16 +++++++++++++++-
 include/hw/virtio/virtio-blk.h  |  1 +
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index cd41478..b37ede3 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -78,7 +78,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
     stb_p(&req->in->status, status);
 
     vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
-               req->qiov.size + sizeof(*req->in));
+               req->read_size + sizeof(*req->in));
 
     /* Suppress notification to guest by BH and its scheduled
      * flag because requests are completed as a batch after io
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 000c38d..2f00dc4 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
     VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
     req->dev = s;
     req->qiov.size = 0;
+    req->read_size = 0;
     req->next = NULL;
     req->mr_next = NULL;
     return req;
@@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
     trace_virtio_blk_req_complete(req, status);
 
     stb_p(&req->in->status, status);
-    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
+    virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in));
     virtio_notify(vdev, s->vq);
 }
 
@@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
         if (ret) {
             int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
             bool is_read = !(p & VIRTIO_BLK_T_OUT);
+            /* Note that memory may be dirtied on read failure.  If the
+             * virtio request is not completed here, as is the case for
+             * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
+             * correctly during live migration.  While this is ugly,
+             * it is acceptable because the device is free to write to
+             * the memory until the request is completed (which will
+             * happen on the other side of the migration).
+             */
             if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
                 continue;
             }
@@ -348,9 +357,14 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
     }
 
     if (is_write) {
+        mrb->reqs[start]->read_size = 0;
         blk_aio_writev(blk, sector_num, qiov, nb_sectors,
                        virtio_blk_rw_complete, mrb->reqs[start]);
     } else {
+        /* Save old qiov->size, which will be used in
+         * virtio_blk_complete_request()
+         */
+        mrb->reqs[start]->read_size = qiov->size;
         blk_aio_readv(blk, sector_num, qiov, nb_sectors,
                       virtio_blk_rw_complete, mrb->reqs[start]);
     }
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index b3ffcd9..d73ec06 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
     struct virtio_blk_inhdr *in;
     struct virtio_blk_outhdr out;
     QEMUIOVector qiov;
+    size_t read_size;
     struct VirtIOBlockReq *next;
     struct VirtIOBlockReq *mr_next;
     BlockAcctCookie acct;
-- 
2.3.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
  2015-04-02 13:37 [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory Paolo Bonzini
@ 2015-04-02 14:39 ` Fam Zheng
  2015-04-02 14:51   ` Paolo Bonzini
  0 siblings, 1 reply; 9+ messages in thread
From: Fam Zheng @ 2015-04-02 14:39 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
	qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
	peter.huangpeng

On Thu, 04/02 15:37, Paolo Bonzini wrote:
> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
> the zero size ultimately is used to compute virtqueue_push's len
> argument.  Therefore, reads from virtio-blk devices did not
> migrate their results correctly.  (Writes were okay).

Can't we move qemu_iovec_destroy to virtio_blk_free_request?

Fam

> 
> Save the size in submit_requests, and use it when the request is
> completed.
> 
> Based on a patch by Wen Congyang.
> 
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  hw/block/dataplane/virtio-blk.c |  2 +-
>  hw/block/virtio-blk.c           | 16 +++++++++++++++-
>  include/hw/virtio/virtio-blk.h  |  1 +
>  3 files changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> index cd41478..b37ede3 100644
> --- a/hw/block/dataplane/virtio-blk.c
> +++ b/hw/block/dataplane/virtio-blk.c
> @@ -78,7 +78,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
>      stb_p(&req->in->status, status);
>  
>      vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
> -               req->qiov.size + sizeof(*req->in));
> +               req->read_size + sizeof(*req->in));
>  
>      /* Suppress notification to guest by BH and its scheduled
>       * flag because requests are completed as a batch after io
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 000c38d..2f00dc4 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
>      VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
>      req->dev = s;
>      req->qiov.size = 0;
> +    req->read_size = 0;
>      req->next = NULL;
>      req->mr_next = NULL;
>      return req;
> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
>      trace_virtio_blk_req_complete(req, status);
>  
>      stb_p(&req->in->status, status);
> -    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
> +    virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in));
>      virtio_notify(vdev, s->vq);
>  }
>  
> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>          if (ret) {
>              int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
>              bool is_read = !(p & VIRTIO_BLK_T_OUT);
> +            /* Note that memory may be dirtied on read failure.  If the
> +             * virtio request is not completed here, as is the case for
> +             * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
> +             * correctly during live migration.  While this is ugly,
> +             * it is acceptable because the device is free to write to
> +             * the memory until the request is completed (which will
> +             * happen on the other side of the migration).
> +             */
>              if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
>                  continue;
>              }
> @@ -348,9 +357,14 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
>      }
>  
>      if (is_write) {
> +        mrb->reqs[start]->read_size = 0;
>          blk_aio_writev(blk, sector_num, qiov, nb_sectors,
>                         virtio_blk_rw_complete, mrb->reqs[start]);
>      } else {
> +        /* Save old qiov->size, which will be used in
> +         * virtio_blk_complete_request()
> +         */
> +        mrb->reqs[start]->read_size = qiov->size;
>          blk_aio_readv(blk, sector_num, qiov, nb_sectors,
>                        virtio_blk_rw_complete, mrb->reqs[start]);
>      }
> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
> index b3ffcd9..d73ec06 100644
> --- a/include/hw/virtio/virtio-blk.h
> +++ b/include/hw/virtio/virtio-blk.h
> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
>      struct virtio_blk_inhdr *in;
>      struct virtio_blk_outhdr out;
>      QEMUIOVector qiov;
> +    size_t read_size;
>      struct VirtIOBlockReq *next;
>      struct VirtIOBlockReq *mr_next;
>      BlockAcctCookie acct;
> -- 
> 2.3.4
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
  2015-04-02 14:39 ` Fam Zheng
@ 2015-04-02 14:51   ` Paolo Bonzini
  2015-04-02 15:16     ` Fam Zheng
  0 siblings, 1 reply; 9+ messages in thread
From: Paolo Bonzini @ 2015-04-02 14:51 UTC (permalink / raw)
  To: Fam Zheng
  Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
	qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
	peter.huangpeng



On 02/04/2015 16:39, Fam Zheng wrote:
> On Thu, 04/02 15:37, Paolo Bonzini wrote:
>> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
>> the zero size ultimately is used to compute virtqueue_push's len
>> argument.  Therefore, reads from virtio-blk devices did not
>> migrate their results correctly.  (Writes were okay).
> 
> Can't we move qemu_iovec_destroy to virtio_blk_free_request?

You would still have to add more code to differentiate reads and
writes---I think.

Paolo

> Fam
> 
>>
>> Save the size in submit_requests, and use it when the request is
>> completed.
>>
>> Based on a patch by Wen Congyang.
>>
>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  hw/block/dataplane/virtio-blk.c |  2 +-
>>  hw/block/virtio-blk.c           | 16 +++++++++++++++-
>>  include/hw/virtio/virtio-blk.h  |  1 +
>>  3 files changed, 17 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
>> index cd41478..b37ede3 100644
>> --- a/hw/block/dataplane/virtio-blk.c
>> +++ b/hw/block/dataplane/virtio-blk.c
>> @@ -78,7 +78,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
>>      stb_p(&req->in->status, status);
>>  
>>      vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
>> -               req->qiov.size + sizeof(*req->in));
>> +               req->read_size + sizeof(*req->in));
>>  
>>      /* Suppress notification to guest by BH and its scheduled
>>       * flag because requests are completed as a batch after io
>> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
>> index 000c38d..2f00dc4 100644
>> --- a/hw/block/virtio-blk.c
>> +++ b/hw/block/virtio-blk.c
>> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
>>      VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
>>      req->dev = s;
>>      req->qiov.size = 0;
>> +    req->read_size = 0;
>>      req->next = NULL;
>>      req->mr_next = NULL;
>>      return req;
>> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
>>      trace_virtio_blk_req_complete(req, status);
>>  
>>      stb_p(&req->in->status, status);
>> -    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
>> +    virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in));
>>      virtio_notify(vdev, s->vq);
>>  }
>>  
>> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>>          if (ret) {
>>              int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
>>              bool is_read = !(p & VIRTIO_BLK_T_OUT);
>> +            /* Note that memory may be dirtied on read failure.  If the
>> +             * virtio request is not completed here, as is the case for
>> +             * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
>> +             * correctly during live migration.  While this is ugly,
>> +             * it is acceptable because the device is free to write to
>> +             * the memory until the request is completed (which will
>> +             * happen on the other side of the migration).
>> +             */
>>              if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
>>                  continue;
>>              }
>> @@ -348,9 +357,14 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
>>      }
>>  
>>      if (is_write) {
>> +        mrb->reqs[start]->read_size = 0;
>>          blk_aio_writev(blk, sector_num, qiov, nb_sectors,
>>                         virtio_blk_rw_complete, mrb->reqs[start]);
>>      } else {
>> +        /* Save old qiov->size, which will be used in
>> +         * virtio_blk_complete_request()
>> +         */
>> +        mrb->reqs[start]->read_size = qiov->size;
>>          blk_aio_readv(blk, sector_num, qiov, nb_sectors,
>>                        virtio_blk_rw_complete, mrb->reqs[start]);
>>      }
>> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
>> index b3ffcd9..d73ec06 100644
>> --- a/include/hw/virtio/virtio-blk.h
>> +++ b/include/hw/virtio/virtio-blk.h
>> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
>>      struct virtio_blk_inhdr *in;
>>      struct virtio_blk_outhdr out;
>>      QEMUIOVector qiov;
>> +    size_t read_size;
>>      struct VirtIOBlockReq *next;
>>      struct VirtIOBlockReq *mr_next;
>>      BlockAcctCookie acct;
>> -- 
>> 2.3.4
>>
>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
  2015-04-02 14:51   ` Paolo Bonzini
@ 2015-04-02 15:16     ` Fam Zheng
  2015-04-02 15:21       ` Paolo Bonzini
  0 siblings, 1 reply; 9+ messages in thread
From: Fam Zheng @ 2015-04-02 15:16 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
	qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
	peter.huangpeng

On Thu, 04/02 16:51, Paolo Bonzini wrote:
> 
> 
> On 02/04/2015 16:39, Fam Zheng wrote:
> > On Thu, 04/02 15:37, Paolo Bonzini wrote:
> >> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
> >> the zero size ultimately is used to compute virtqueue_push's len
> >> argument.  Therefore, reads from virtio-blk devices did not
> >> migrate their results correctly.  (Writes were okay).
> > 
> > Can't we move qemu_iovec_destroy to virtio_blk_free_request?
> 
> You would still have to add more code to differentiate reads and
> writes---I think.

Yeah, but the extra field will not be needed.

Fam

> 
> Paolo
> 
> > Fam
> > 
> >>
> >> Save the size in submit_requests, and use it when the request is
> >> completed.
> >>
> >> Based on a patch by Wen Congyang.
> >>
> >> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> >> ---
> >>  hw/block/dataplane/virtio-blk.c |  2 +-
> >>  hw/block/virtio-blk.c           | 16 +++++++++++++++-
> >>  include/hw/virtio/virtio-blk.h  |  1 +
> >>  3 files changed, 17 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> >> index cd41478..b37ede3 100644
> >> --- a/hw/block/dataplane/virtio-blk.c
> >> +++ b/hw/block/dataplane/virtio-blk.c
> >> @@ -78,7 +78,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
> >>      stb_p(&req->in->status, status);
> >>  
> >>      vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
> >> -               req->qiov.size + sizeof(*req->in));
> >> +               req->read_size + sizeof(*req->in));
> >>  
> >>      /* Suppress notification to guest by BH and its scheduled
> >>       * flag because requests are completed as a batch after io
> >> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> >> index 000c38d..2f00dc4 100644
> >> --- a/hw/block/virtio-blk.c
> >> +++ b/hw/block/virtio-blk.c
> >> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
> >>      VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
> >>      req->dev = s;
> >>      req->qiov.size = 0;
> >> +    req->read_size = 0;
> >>      req->next = NULL;
> >>      req->mr_next = NULL;
> >>      return req;
> >> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
> >>      trace_virtio_blk_req_complete(req, status);
> >>  
> >>      stb_p(&req->in->status, status);
> >> -    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
> >> +    virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in));
> >>      virtio_notify(vdev, s->vq);
> >>  }
> >>  
> >> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
> >>          if (ret) {
> >>              int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
> >>              bool is_read = !(p & VIRTIO_BLK_T_OUT);
> >> +            /* Note that memory may be dirtied on read failure.  If the
> >> +             * virtio request is not completed here, as is the case for
> >> +             * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
> >> +             * correctly during live migration.  While this is ugly,
> >> +             * it is acceptable because the device is free to write to
> >> +             * the memory until the request is completed (which will
> >> +             * happen on the other side of the migration).
> >> +             */
> >>              if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
> >>                  continue;
> >>              }
> >> @@ -348,9 +357,14 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
> >>      }
> >>  
> >>      if (is_write) {
> >> +        mrb->reqs[start]->read_size = 0;
> >>          blk_aio_writev(blk, sector_num, qiov, nb_sectors,
> >>                         virtio_blk_rw_complete, mrb->reqs[start]);
> >>      } else {
> >> +        /* Save old qiov->size, which will be used in
> >> +         * virtio_blk_complete_request()
> >> +         */
> >> +        mrb->reqs[start]->read_size = qiov->size;
> >>          blk_aio_readv(blk, sector_num, qiov, nb_sectors,
> >>                        virtio_blk_rw_complete, mrb->reqs[start]);
> >>      }
> >> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
> >> index b3ffcd9..d73ec06 100644
> >> --- a/include/hw/virtio/virtio-blk.h
> >> +++ b/include/hw/virtio/virtio-blk.h
> >> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
> >>      struct virtio_blk_inhdr *in;
> >>      struct virtio_blk_outhdr out;
> >>      QEMUIOVector qiov;
> >> +    size_t read_size;
> >>      struct VirtIOBlockReq *next;
> >>      struct VirtIOBlockReq *mr_next;
> >>      BlockAcctCookie acct;
> >> -- 
> >> 2.3.4
> >>
> >>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
  2015-04-02 15:16     ` Fam Zheng
@ 2015-04-02 15:21       ` Paolo Bonzini
  2015-04-02 16:26         ` Fam Zheng
  0 siblings, 1 reply; 9+ messages in thread
From: Paolo Bonzini @ 2015-04-02 15:21 UTC (permalink / raw)
  To: Fam Zheng
  Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
	qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
	peter.huangpeng



On 02/04/2015 17:16, Fam Zheng wrote:
>>>> > >> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
>>>> > >> the zero size ultimately is used to compute virtqueue_push's len
>>>> > >> argument.  Therefore, reads from virtio-blk devices did not
>>>> > >> migrate their results correctly.  (Writes were okay).
>>> > > 
>>> > > Can't we move qemu_iovec_destroy to virtio_blk_free_request?
>> > 
>> > You would still have to add more code to differentiate reads and
>> > writes---I think.
> Yeah, but the extra field will not be needed.

Can you post an alternative patch?  One small complication is that
is_write is in mrb but not in mrb->reqs[x].  virtio_blk_rw_complete is
already doing

    int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
    bool is_read = !(p & VIRTIO_BLK_T_OUT);

but only in a slow path.

Paolo

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
  2015-04-02 15:21       ` Paolo Bonzini
@ 2015-04-02 16:26         ` Fam Zheng
  2015-04-02 16:36           ` Paolo Bonzini
                             ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Fam Zheng @ 2015-04-02 16:26 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
	qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
	peter.huangpeng

On Thu, 04/02 17:21, Paolo Bonzini wrote:
> 
> 
> On 02/04/2015 17:16, Fam Zheng wrote:
> >>>> > >> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
> >>>> > >> the zero size ultimately is used to compute virtqueue_push's len
> >>>> > >> argument.  Therefore, reads from virtio-blk devices did not
> >>>> > >> migrate their results correctly.  (Writes were okay).
> >>> > > 
> >>> > > Can't we move qemu_iovec_destroy to virtio_blk_free_request?
> >> > 
> >> > You would still have to add more code to differentiate reads and
> >> > writes---I think.
> > Yeah, but the extra field will not be needed.
> 
> Can you post an alternative patch?  One small complication is that
> is_write is in mrb but not in mrb->reqs[x].  virtio_blk_rw_complete is
> already doing
> 
>     int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
>     bool is_read = !(p & VIRTIO_BLK_T_OUT);
> 
> but only in a slow path.

OK, so it looks like a new field is the simplest way to achieve.

There is another problem with your patch - read_size is not initialized in
non-RW paths like scsi and flush.

I think the optimization for write is a separate thing, though. Shouldn't below
patch already fix the migration issue?

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 000c38d..ee6e198 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -92,13 +92,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
         next = req->mr_next;
         trace_virtio_blk_rw_complete(req, ret);
 
-        if (req->qiov.nalloc != -1) {
-            /* If nalloc is != 1 req->qiov is a local copy of the original
-             * external iovec. It was allocated in submit_merged_requests
-             * to be able to merge requests. */
-            qemu_iovec_destroy(&req->qiov);
-        }
-
         if (ret) {
             int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
             bool is_read = !(p & VIRTIO_BLK_T_OUT);
@@ -109,6 +102,13 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
 
         virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
         block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
+
+        if (req->qiov.nalloc != -1) {
+            /* This means req->qiov is a local copy of the original external
+             * iovec. It was allocated in virtio_blk_submit_multireq in order
+             * to merge requests. */
+            qemu_iovec_destroy(&req->qiov);
+        }
         virtio_blk_free_request(req);
     }
 }

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
  2015-04-02 16:26         ` Fam Zheng
@ 2015-04-02 16:36           ` Paolo Bonzini
  2015-04-03  1:22           ` Wen Congyang
  2015-04-03  2:47           ` Bin Wu
  2 siblings, 0 replies; 9+ messages in thread
From: Paolo Bonzini @ 2015-04-02 16:36 UTC (permalink / raw)
  To: Fam Zheng
  Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
	qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
	peter.huangpeng



On 02/04/2015 18:26, Fam Zheng wrote:
> There is another problem with your patch - read_size is not initialized in
> non-RW paths like scsi and flush.

Right, but...

> I think the optimization for write is a separate thing, though. Shouldn't below
> patch already fix the migration issue?

... it also doesn't cover SCSI, does it?

Paolo

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
  2015-04-02 16:26         ` Fam Zheng
  2015-04-02 16:36           ` Paolo Bonzini
@ 2015-04-03  1:22           ` Wen Congyang
  2015-04-03  2:47           ` Bin Wu
  2 siblings, 0 replies; 9+ messages in thread
From: Wen Congyang @ 2015-04-03  1:22 UTC (permalink / raw)
  To: Fam Zheng, Paolo Bonzini
  Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
	qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
	peter.huangpeng

On 04/03/2015 12:26 AM, Fam Zheng wrote:
> On Thu, 04/02 17:21, Paolo Bonzini wrote:
>>
>>
>> On 02/04/2015 17:16, Fam Zheng wrote:
>>>>>>>>> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
>>>>>>>>> the zero size ultimately is used to compute virtqueue_push's len
>>>>>>>>> argument.  Therefore, reads from virtio-blk devices did not
>>>>>>>>> migrate their results correctly.  (Writes were okay).
>>>>>>>
>>>>>>> Can't we move qemu_iovec_destroy to virtio_blk_free_request?
>>>>>
>>>>> You would still have to add more code to differentiate reads and
>>>>> writes---I think.
>>> Yeah, but the extra field will not be needed.
>>
>> Can you post an alternative patch?  One small complication is that
>> is_write is in mrb but not in mrb->reqs[x].  virtio_blk_rw_complete is
>> already doing
>>
>>     int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
>>     bool is_read = !(p & VIRTIO_BLK_T_OUT);
>>
>> but only in a slow path.
> 
> OK, so it looks like a new field is the simplest way to achieve.
> 
> There is another problem with your patch - read_size is not initialized in
> non-RW paths like scsi and flush.
> 
> I think the optimization for write is a separate thing, though. Shouldn't below
> patch already fix the migration issue?
> 
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 000c38d..ee6e198 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -92,13 +92,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>          next = req->mr_next;
>          trace_virtio_blk_rw_complete(req, ret);
>  
> -        if (req->qiov.nalloc != -1) {
> -            /* If nalloc is != 1 req->qiov is a local copy of the original
> -             * external iovec. It was allocated in submit_merged_requests
> -             * to be able to merge requests. */
> -            qemu_iovec_destroy(&req->qiov);
> -        }
> -
>          if (ret) {
>              int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
>              bool is_read = !(p & VIRTIO_BLK_T_OUT);
> @@ -109,6 +102,13 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>  
>          virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
>          block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
> +
> +        if (req->qiov.nalloc != -1) {
> +            /* This means req->qiov is a local copy of the original external
> +             * iovec. It was allocated in virtio_blk_submit_multireq in order
> +             * to merge requests. */
> +            qemu_iovec_destroy(&req->qiov);
> +        }

We will not come here on I/O failure. It will cause memory leak.

Thanks
Wen Congyang

>          virtio_blk_free_request(req);
>      }
>  }
> 
> 
> .
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory
  2015-04-02 16:26         ` Fam Zheng
  2015-04-02 16:36           ` Paolo Bonzini
  2015-04-03  1:22           ` Wen Congyang
@ 2015-04-03  2:47           ` Bin Wu
  2 siblings, 0 replies; 9+ messages in thread
From: Bin Wu @ 2015-04-03  2:47 UTC (permalink / raw)
  To: Fam Zheng, Paolo Bonzini
  Cc: kwolf, hangaohuai, zhang.zhanghailiang, lizhijian, mst,
	qemu-devel, dgilbert, arei.gonglei, stefanha, amit.shah, dgibson,
	peter.huangpeng

On 2015/4/3 0:26, Fam Zheng wrote:
> On Thu, 04/02 17:21, Paolo Bonzini wrote:
>>
>>
>> On 02/04/2015 17:16, Fam Zheng wrote:
>>>>>>>>> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
>>>>>>>>> the zero size ultimately is used to compute virtqueue_push's len
>>>>>>>>> argument.  Therefore, reads from virtio-blk devices did not
>>>>>>>>> migrate their results correctly.  (Writes were okay).
>>>>>>>
>>>>>>> Can't we move qemu_iovec_destroy to virtio_blk_free_request?
>>>>>
>>>>> You would still have to add more code to differentiate reads and
>>>>> writes---I think.
>>> Yeah, but the extra field will not be needed.
>>
>> Can you post an alternative patch?  One small complication is that
>> is_write is in mrb but not in mrb->reqs[x].  virtio_blk_rw_complete is
>> already doing
>>
>>     int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
>>     bool is_read = !(p & VIRTIO_BLK_T_OUT);
>>
>> but only in a slow path.
> 
> OK, so it looks like a new field is the simplest way to achieve.
> 
> There is another problem with your patch - read_size is not initialized in
> non-RW paths like scsi and flush.
> 
> I think the optimization for write is a separate thing, though. Shouldn't below
> patch already fix the migration issue?
> 
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 000c38d..ee6e198 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -92,13 +92,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>          next = req->mr_next;
>          trace_virtio_blk_rw_complete(req, ret);
>  
> -        if (req->qiov.nalloc != -1) {
> -            /* If nalloc is != 1 req->qiov is a local copy of the original
> -             * external iovec. It was allocated in submit_merged_requests
> -             * to be able to merge requests. */
> -            qemu_iovec_destroy(&req->qiov);
> -        }
> -
>          if (ret) {
>              int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
>              bool is_read = !(p & VIRTIO_BLK_T_OUT);
> @@ -109,6 +102,13 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>  
>          virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
>          block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
> +
> +        if (req->qiov.nalloc != -1) {
> +            /* This means req->qiov is a local copy of the original external
> +             * iovec. It was allocated in virtio_blk_submit_multireq in order
> +             * to merge requests. */
> +            qemu_iovec_destroy(&req->qiov);
> +        }
>          virtio_blk_free_request(req);
>      }
>  }
> 
> 
> 
> .
> 

Can we allocate a new request for the merged requests?

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 000c38d..d39381f 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -92,11 +92,10 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
         next = req->mr_next;
         trace_virtio_blk_rw_complete(req, ret);

-        if (req->qiov.nalloc != -1) {
-            /* If nalloc is != 1 req->qiov is a local copy of the original
-             * external iovec. It was allocated in submit_merged_requests
-             * to be able to merge requests. */
+        if (req->in == NULL) {
             qemu_iovec_destroy(&req->qiov);
+            virtio_blk_free_request(req);
+            continue;
         }

         if (ret) {
@@ -313,29 +312,33 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
 static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
                                    int start, int num_reqs, int niov)
 {
-    QEMUIOVector *qiov = &mrb->reqs[start]->qiov;
+    VirtIOBlockReq *merged_request;
+    QEMUIOVector *qiov;
     int64_t sector_num = mrb->reqs[start]->sector_num;
-    int nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE;
+    int nb_sectors = 0;
     bool is_write = mrb->is_write;

     if (num_reqs > 1) {
         int i;
-        struct iovec *tmp_iov = qiov->iov;
-        int tmp_niov = qiov->niov;

-        /* mrb->reqs[start]->qiov was initialized from external so we can't
-         * modifiy it here. We need to initialize it locally and then add the
-         * external iovecs. */
-        qemu_iovec_init(qiov, niov)
+        merged_request = virtio_blk_alloc_request(mrb->reqs[start]->dev);

-        for (i = 0; i < tmp_niov; i++) {
-            qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len);
-        }
+        /* use the 'in' field to judge whether the request is
+           a merged request */
+        merged_request->in = NULL;
+
+        qiov = &merged_request->qiov;
+        qemu_iovec_init(qiov, niov);

-        for (i = start + 1; i < start + num_reqs; i++) {
+        for (i = start; i < start + num_reqs; i++) {
             qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0,
                               mrb->reqs[i]->qiov.size);
-            mrb->reqs[i - 1]->mr_next = mrb->reqs[i];
+            if (i > start) {
+                mrb->reqs[i - 1]->mr_next = mrb->reqs[i];
+            } else {
+                merged_request->mr_next = mrb->reqs[i];
+            }
+
             nb_sectors += mrb->reqs[i]->qiov.size / BDRV_SECTOR_SIZE;
         }
         assert(nb_sectors == qiov->size / BDRV_SECTOR_SIZE);
@@ -345,14 +348,18 @@ static inline void submit_requests(BlockBackend *blk,
MultiReqBuffer *mrb,
         block_acct_merge_done(blk_get_stats(blk),
                               is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ,
                               num_reqs - 1);
+    } else {
+        merged_request = mrb->reqs[start];
+        qiov = &mrb->reqs[start]->qiov;
+        nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE;
     }

     if (is_write) {
         blk_aio_writev(blk, sector_num, qiov, nb_sectors,
-                       virtio_blk_rw_complete, mrb->reqs[start]);
+                       virtio_blk_rw_complete, merged_request);
     } else {
         blk_aio_readv(blk, sector_num, qiov, nb_sectors,
-                      virtio_blk_rw_complete, mrb->reqs[start]);
+                      virtio_blk_rw_complete, merged_request);
     }
 }


-- 
Bin Wu

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-04-03  2:48 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-02 13:37 [Qemu-devel] [PATCH] virtio-blk: correctly dirty guest memory Paolo Bonzini
2015-04-02 14:39 ` Fam Zheng
2015-04-02 14:51   ` Paolo Bonzini
2015-04-02 15:16     ` Fam Zheng
2015-04-02 15:21       ` Paolo Bonzini
2015-04-02 16:26         ` Fam Zheng
2015-04-02 16:36           ` Paolo Bonzini
2015-04-03  1:22           ` Wen Congyang
2015-04-03  2:47           ` Bin Wu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.