All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] vhost-vdpa: Do not send empty IOTLB update batches
@ 2021-08-04 14:44 Eugenio Pérez
  2021-08-05  6:16 ` Jason Wang
  0 siblings, 1 reply; 5+ messages in thread
From: Eugenio Pérez @ 2021-08-04 14:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: Jason Wang, Eli Cohen, Cindy Lu, Michael S. Tsirkin

With the introduction of the batch hinting, meaningless batches can be
created with no IOTLB updates if the memory region was skipped by
vhost_vdpa_listener_skipped_section. This is the case of host notifiers
memory regions, but others could fall on this category. This caused the
vdpa device to receive dma mapping settings with no changes, a possibly
expensive operation for nothing.

To avoid that, VHOST_IOTLB_BATCH_BEGIN hint is delayed until we have a
meaningful (not skipped section) mapping or unmapping operation, and
VHOST_IOTLB_BATCH_END is not written unless at least one of _UPDATE /
_INVALIDATE has been issued.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
 include/hw/virtio/vhost-vdpa.h |  1 +
 hw/virtio/vhost-vdpa.c         | 38 +++++++++++++++++++++++-----------
 2 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index e98e327f12..0a7edbe4eb 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -23,6 +23,7 @@ typedef struct vhost_vdpa {
     int device_fd;
     int index;
     uint32_t msg_type;
+    size_t n_iotlb_sent_batch;
     MemoryListener listener;
     struct vhost_dev *dev;
     VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 6ce94a1f4d..2517fc6103 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -89,19 +89,13 @@ static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
     return ret;
 }
 
-static void vhost_vdpa_listener_begin(MemoryListener *listener)
+static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v)
 {
-    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
-    struct vhost_dev *dev = v->dev;
-    struct vhost_msg_v2 msg = {};
     int fd = v->device_fd;
-
-    if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) {
-        return;
-    }
-
-    msg.type = v->msg_type;
-    msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN;
+    struct vhost_msg_v2 msg = {
+        .type = v->msg_type,
+        .iotlb.type = VHOST_IOTLB_BATCH_BEGIN,
+    };
 
     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
         error_report("failed to write, fd=%d, errno=%d (%s)",
@@ -120,6 +114,11 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener)
         return;
     }
 
+    if (v->n_iotlb_sent_batch == 0) {
+        return;
+    }
+
+    v->n_iotlb_sent_batch = 0;
     msg.type = v->msg_type;
     msg.iotlb.type = VHOST_IOTLB_BATCH_END;
 
@@ -170,6 +169,14 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
 
     llsize = int128_sub(llend, int128_make64(iova));
 
+    if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH)) {
+        if (v->n_iotlb_sent_batch == 0) {
+            vhost_vdpa_listener_begin_batch(v);
+        }
+
+        v->n_iotlb_sent_batch++;
+    }
+
     ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
                              vaddr, section->readonly);
     if (ret) {
@@ -221,6 +228,14 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
 
     llsize = int128_sub(llend, int128_make64(iova));
 
+    if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH)) {
+        if (v->n_iotlb_sent_batch == 0) {
+            vhost_vdpa_listener_begin_batch(v);
+        }
+
+        v->n_iotlb_sent_batch++;
+    }
+
     ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
     if (ret) {
         error_report("vhost_vdpa dma unmap error!");
@@ -234,7 +249,6 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
  * depends on the addnop().
  */
 static const MemoryListener vhost_vdpa_memory_listener = {
-    .begin = vhost_vdpa_listener_begin,
     .commit = vhost_vdpa_listener_commit,
     .region_add = vhost_vdpa_listener_region_add,
     .region_del = vhost_vdpa_listener_region_del,
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH] vhost-vdpa: Do not send empty IOTLB update batches
  2021-08-04 14:44 [RFC PATCH] vhost-vdpa: Do not send empty IOTLB update batches Eugenio Pérez
@ 2021-08-05  6:16 ` Jason Wang
  2021-08-05  7:04   ` Eugenio Perez Martin
  0 siblings, 1 reply; 5+ messages in thread
From: Jason Wang @ 2021-08-05  6:16 UTC (permalink / raw)
  To: Eugenio Pérez; +Cc: Eli Cohen, qemu-devel, Cindy Lu, Michael S. Tsirkin

On Wed, Aug 4, 2021 at 10:44 PM Eugenio Pérez <eperezma@redhat.com> wrote:
>
> With the introduction of the batch hinting, meaningless batches can be
> created with no IOTLB updates if the memory region was skipped by
> vhost_vdpa_listener_skipped_section. This is the case of host notifiers
> memory regions, but others could fall on this category. This caused the
> vdpa device to receive dma mapping settings with no changes, a possibly
> expensive operation for nothing.
>
> To avoid that, VHOST_IOTLB_BATCH_BEGIN hint is delayed until we have a
> meaningful (not skipped section) mapping or unmapping operation, and
> VHOST_IOTLB_BATCH_END is not written unless at least one of _UPDATE /
> _INVALIDATE has been issued.

Hi Eugeni:

This should work but it looks to me it's better to optimize the kernel.

E.g to make sure we don't send set_map() if there is no real updating
between batch start and end.

This makes sure that it can work for all kinds of userspace (not only for Qemu).

Another optimization is to batch the memory region transaction by adding:

memory_region_transaction_begin() and memory_region_transaction_end() in

both vhost_vdpa_host_notifiers_init() and vhost_vdpa_host_notifiers_uninit().

This can make sure only at least one memory transactions when
adding/removing host notifier regions.

Thanks

>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
>  include/hw/virtio/vhost-vdpa.h |  1 +
>  hw/virtio/vhost-vdpa.c         | 38 +++++++++++++++++++++++-----------
>  2 files changed, 27 insertions(+), 12 deletions(-)
>
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> index e98e327f12..0a7edbe4eb 100644
> --- a/include/hw/virtio/vhost-vdpa.h
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -23,6 +23,7 @@ typedef struct vhost_vdpa {
>      int device_fd;
>      int index;
>      uint32_t msg_type;
> +    size_t n_iotlb_sent_batch;
>      MemoryListener listener;
>      struct vhost_dev *dev;
>      VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 6ce94a1f4d..2517fc6103 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -89,19 +89,13 @@ static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
>      return ret;
>  }
>
> -static void vhost_vdpa_listener_begin(MemoryListener *listener)
> +static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v)
>  {
> -    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
> -    struct vhost_dev *dev = v->dev;
> -    struct vhost_msg_v2 msg = {};
>      int fd = v->device_fd;
> -
> -    if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) {
> -        return;
> -    }
> -
> -    msg.type = v->msg_type;
> -    msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN;
> +    struct vhost_msg_v2 msg = {
> +        .type = v->msg_type,
> +        .iotlb.type = VHOST_IOTLB_BATCH_BEGIN,
> +    };
>
>      if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
>          error_report("failed to write, fd=%d, errno=%d (%s)",
> @@ -120,6 +114,11 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener)
>          return;
>      }
>
> +    if (v->n_iotlb_sent_batch == 0) {
> +        return;
> +    }
> +
> +    v->n_iotlb_sent_batch = 0;
>      msg.type = v->msg_type;
>      msg.iotlb.type = VHOST_IOTLB_BATCH_END;
>
> @@ -170,6 +169,14 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
>
>      llsize = int128_sub(llend, int128_make64(iova));
>
> +    if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH)) {
> +        if (v->n_iotlb_sent_batch == 0) {
> +            vhost_vdpa_listener_begin_batch(v);
> +        }
> +
> +        v->n_iotlb_sent_batch++;
> +    }
> +
>      ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
>                               vaddr, section->readonly);
>      if (ret) {
> @@ -221,6 +228,14 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
>
>      llsize = int128_sub(llend, int128_make64(iova));
>
> +    if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH)) {
> +        if (v->n_iotlb_sent_batch == 0) {
> +            vhost_vdpa_listener_begin_batch(v);
> +        }
> +
> +        v->n_iotlb_sent_batch++;
> +    }
> +
>      ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
>      if (ret) {
>          error_report("vhost_vdpa dma unmap error!");
> @@ -234,7 +249,6 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
>   * depends on the addnop().
>   */
>  static const MemoryListener vhost_vdpa_memory_listener = {
> -    .begin = vhost_vdpa_listener_begin,
>      .commit = vhost_vdpa_listener_commit,
>      .region_add = vhost_vdpa_listener_region_add,
>      .region_del = vhost_vdpa_listener_region_del,
> --
> 2.27.0
>



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH] vhost-vdpa: Do not send empty IOTLB update batches
  2021-08-05  6:16 ` Jason Wang
@ 2021-08-05  7:04   ` Eugenio Perez Martin
  2021-08-05  7:10     ` Jason Wang
  0 siblings, 1 reply; 5+ messages in thread
From: Eugenio Perez Martin @ 2021-08-05  7:04 UTC (permalink / raw)
  To: Jason Wang; +Cc: Eli Cohen, qemu-devel, Cindy Lu, Michael S. Tsirkin

On Thu, Aug 5, 2021 at 8:16 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Wed, Aug 4, 2021 at 10:44 PM Eugenio Pérez <eperezma@redhat.com> wrote:
> >
> > With the introduction of the batch hinting, meaningless batches can be
> > created with no IOTLB updates if the memory region was skipped by
> > vhost_vdpa_listener_skipped_section. This is the case of host notifiers
> > memory regions, but others could fall on this category. This caused the
> > vdpa device to receive dma mapping settings with no changes, a possibly
> > expensive operation for nothing.
> >
> > To avoid that, VHOST_IOTLB_BATCH_BEGIN hint is delayed until we have a
> > meaningful (not skipped section) mapping or unmapping operation, and
> > VHOST_IOTLB_BATCH_END is not written unless at least one of _UPDATE /
> > _INVALIDATE has been issued.
>
> Hi Eugeni:
>
> This should work but it looks to me it's better to optimize the kernel.
>
> E.g to make sure we don't send set_map() if there is no real updating
> between batch start and end.
>

Hi Jason,

I think we should do both in parallel anyway. We also obtain an
(unmeasured at this moment) decrease in startup time for qemu with
vdpa this way, for example. I consider that this particular RFC has
room to improve or change totally of course.

I've made these changes in the kernel too, just counting the number of
memory updates and not calling set_map if no actual changes have been
made.

> This makes sure that it can work for all kinds of userspace (not only for Qemu).
>
> Another optimization is to batch the memory region transaction by adding:
>
> memory_region_transaction_begin() and memory_region_transaction_end() in
>
> both vhost_vdpa_host_notifiers_init() and vhost_vdpa_host_notifiers_uninit().
>
> This can make sure only at least one memory transactions when
> adding/removing host notifier regions.
>

That solves the updates about memory regions, but it does not solve
the case where updating memory regions are not interesting to the
device. In other words, where all the changes meet
vhost_vdpa_listener_skipped_section() == true. I did not spend a lot
of time trying to raise these though, maybe it happens when
hot-plugging a device, for example?

We could abstract these changes in memory_region_transaction_begin() /
memory_region_transaction_end() wrappers though.

Thanks!

> Thanks
>
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > ---
> >  include/hw/virtio/vhost-vdpa.h |  1 +
> >  hw/virtio/vhost-vdpa.c         | 38 +++++++++++++++++++++++-----------
> >  2 files changed, 27 insertions(+), 12 deletions(-)
> >
> > diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> > index e98e327f12..0a7edbe4eb 100644
> > --- a/include/hw/virtio/vhost-vdpa.h
> > +++ b/include/hw/virtio/vhost-vdpa.h
> > @@ -23,6 +23,7 @@ typedef struct vhost_vdpa {
> >      int device_fd;
> >      int index;
> >      uint32_t msg_type;
> > +    size_t n_iotlb_sent_batch;
> >      MemoryListener listener;
> >      struct vhost_dev *dev;
> >      VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > index 6ce94a1f4d..2517fc6103 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -89,19 +89,13 @@ static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
> >      return ret;
> >  }
> >
> > -static void vhost_vdpa_listener_begin(MemoryListener *listener)
> > +static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v)
> >  {
> > -    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
> > -    struct vhost_dev *dev = v->dev;
> > -    struct vhost_msg_v2 msg = {};
> >      int fd = v->device_fd;
> > -
> > -    if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) {
> > -        return;
> > -    }
> > -
> > -    msg.type = v->msg_type;
> > -    msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN;
> > +    struct vhost_msg_v2 msg = {
> > +        .type = v->msg_type,
> > +        .iotlb.type = VHOST_IOTLB_BATCH_BEGIN,
> > +    };
> >
> >      if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
> >          error_report("failed to write, fd=%d, errno=%d (%s)",
> > @@ -120,6 +114,11 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener)
> >          return;
> >      }
> >
> > +    if (v->n_iotlb_sent_batch == 0) {
> > +        return;
> > +    }
> > +
> > +    v->n_iotlb_sent_batch = 0;
> >      msg.type = v->msg_type;
> >      msg.iotlb.type = VHOST_IOTLB_BATCH_END;
> >
> > @@ -170,6 +169,14 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
> >
> >      llsize = int128_sub(llend, int128_make64(iova));
> >
> > +    if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH)) {
> > +        if (v->n_iotlb_sent_batch == 0) {
> > +            vhost_vdpa_listener_begin_batch(v);
> > +        }
> > +
> > +        v->n_iotlb_sent_batch++;
> > +    }
> > +
> >      ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
> >                               vaddr, section->readonly);
> >      if (ret) {
> > @@ -221,6 +228,14 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
> >
> >      llsize = int128_sub(llend, int128_make64(iova));
> >
> > +    if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH)) {
> > +        if (v->n_iotlb_sent_batch == 0) {
> > +            vhost_vdpa_listener_begin_batch(v);
> > +        }
> > +
> > +        v->n_iotlb_sent_batch++;
> > +    }
> > +
> >      ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
> >      if (ret) {
> >          error_report("vhost_vdpa dma unmap error!");
> > @@ -234,7 +249,6 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
> >   * depends on the addnop().
> >   */
> >  static const MemoryListener vhost_vdpa_memory_listener = {
> > -    .begin = vhost_vdpa_listener_begin,
> >      .commit = vhost_vdpa_listener_commit,
> >      .region_add = vhost_vdpa_listener_region_add,
> >      .region_del = vhost_vdpa_listener_region_del,
> > --
> > 2.27.0
> >
>



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH] vhost-vdpa: Do not send empty IOTLB update batches
  2021-08-05  7:04   ` Eugenio Perez Martin
@ 2021-08-05  7:10     ` Jason Wang
  2021-08-11 16:40       ` Eugenio Perez Martin
  0 siblings, 1 reply; 5+ messages in thread
From: Jason Wang @ 2021-08-05  7:10 UTC (permalink / raw)
  To: Eugenio Perez Martin; +Cc: Eli Cohen, qemu-devel, Cindy Lu, Michael S. Tsirkin


在 2021/8/5 下午3:04, Eugenio Perez Martin 写道:
> On Thu, Aug 5, 2021 at 8:16 AM Jason Wang <jasowang@redhat.com> wrote:
>> On Wed, Aug 4, 2021 at 10:44 PM Eugenio Pérez <eperezma@redhat.com> wrote:
>>> With the introduction of the batch hinting, meaningless batches can be
>>> created with no IOTLB updates if the memory region was skipped by
>>> vhost_vdpa_listener_skipped_section. This is the case of host notifiers
>>> memory regions, but others could fall on this category. This caused the
>>> vdpa device to receive dma mapping settings with no changes, a possibly
>>> expensive operation for nothing.
>>>
>>> To avoid that, VHOST_IOTLB_BATCH_BEGIN hint is delayed until we have a
>>> meaningful (not skipped section) mapping or unmapping operation, and
>>> VHOST_IOTLB_BATCH_END is not written unless at least one of _UPDATE /
>>> _INVALIDATE has been issued.
>> Hi Eugeni:
>>
>> This should work but it looks to me it's better to optimize the kernel.
>>
>> E.g to make sure we don't send set_map() if there is no real updating
>> between batch start and end.
>>
> Hi Jason,
>
> I think we should do both in parallel anyway.


Ok, I'm fine with this.


>   We also obtain an
> (unmeasured at this moment) decrease in startup time for qemu with
> vdpa this way, for example. I consider that this particular RFC has
> room to improve or change totally of course.
>
> I've made these changes in the kernel too, just counting the number of
> memory updates and not calling set_map if no actual changes have been
> made.


Right, that is what we want to have.


>
>> This makes sure that it can work for all kinds of userspace (not only for Qemu).
>>
>> Another optimization is to batch the memory region transaction by adding:
>>
>> memory_region_transaction_begin() and memory_region_transaction_end() in
>>
>> both vhost_vdpa_host_notifiers_init() and vhost_vdpa_host_notifiers_uninit().
>>
>> This can make sure only at least one memory transactions when
>> adding/removing host notifier regions.
>>
> That solves the updates about memory regions, but it does not solve
> the case where updating memory regions are not interesting to the
> device.


Kind of, I guess with this we only get one more set_map().


> In other words, where all the changes meet
> vhost_vdpa_listener_skipped_section() == true. I did not spend a lot
> of time trying to raise these though, maybe it happens when
> hot-plugging a device, for example?


Yes, so transaction is per device optimization that can't help in this case.


>
> We could abstract these changes in memory_region_transaction_begin() /
> memory_region_transaction_end() wrappers though.
>
> Thanks!
>
>> Thanks
>>
>>> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
>>> ---
>>>   include/hw/virtio/vhost-vdpa.h |  1 +
>>>   hw/virtio/vhost-vdpa.c         | 38 +++++++++++++++++++++++-----------
>>>   2 files changed, 27 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
>>> index e98e327f12..0a7edbe4eb 100644
>>> --- a/include/hw/virtio/vhost-vdpa.h
>>> +++ b/include/hw/virtio/vhost-vdpa.h
>>> @@ -23,6 +23,7 @@ typedef struct vhost_vdpa {
>>>       int device_fd;
>>>       int index;
>>>       uint32_t msg_type;
>>> +    size_t n_iotlb_sent_batch;


Not a native speaker but we probably need a better name, e.g "n_mr_updated?"


>>>       MemoryListener listener;
>>>       struct vhost_dev *dev;
>>>       VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
>>> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
>>> index 6ce94a1f4d..2517fc6103 100644
>>> --- a/hw/virtio/vhost-vdpa.c
>>> +++ b/hw/virtio/vhost-vdpa.c
>>> @@ -89,19 +89,13 @@ static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
>>>       return ret;
>>>   }
>>>
>>> -static void vhost_vdpa_listener_begin(MemoryListener *listener)
>>> +static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v)
>>>   {
>>> -    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
>>> -    struct vhost_dev *dev = v->dev;
>>> -    struct vhost_msg_v2 msg = {};
>>>       int fd = v->device_fd;
>>> -
>>> -    if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) {
>>> -        return;
>>> -    }
>>> -
>>> -    msg.type = v->msg_type;
>>> -    msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN;
>>> +    struct vhost_msg_v2 msg = {
>>> +        .type = v->msg_type,
>>> +        .iotlb.type = VHOST_IOTLB_BATCH_BEGIN,
>>> +    };
>>>
>>>       if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
>>>           error_report("failed to write, fd=%d, errno=%d (%s)",
>>> @@ -120,6 +114,11 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener)
>>>           return;
>>>       }
>>>
>>> +    if (v->n_iotlb_sent_batch == 0) {
>>> +        return;
>>> +    }
>>> +
>>> +    v->n_iotlb_sent_batch = 0;
>>>       msg.type = v->msg_type;
>>>       msg.iotlb.type = VHOST_IOTLB_BATCH_END;
>>>
>>> @@ -170,6 +169,14 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
>>>
>>>       llsize = int128_sub(llend, int128_make64(iova));
>>>
>>> +    if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH)) {
>>> +        if (v->n_iotlb_sent_batch == 0) {
>>> +            vhost_vdpa_listener_begin_batch(v);
>>> +        }
>>> +
>>> +        v->n_iotlb_sent_batch++;
>>> +    }


Let abstract this as a helper to be reused by region_del.

Other looks good.

Thanks


>>> +
>>>       ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
>>>                                vaddr, section->readonly);
>>>       if (ret) {
>>> @@ -221,6 +228,14 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
>>>
>>>       llsize = int128_sub(llend, int128_make64(iova));
>>>
>>> +    if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH)) {
>>> +        if (v->n_iotlb_sent_batch == 0) {
>>> +            vhost_vdpa_listener_begin_batch(v);
>>> +        }
>>> +
>>> +        v->n_iotlb_sent_batch++;
>>> +    }
>>> +
>>>       ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
>>>       if (ret) {
>>>           error_report("vhost_vdpa dma unmap error!");
>>> @@ -234,7 +249,6 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
>>>    * depends on the addnop().
>>>    */
>>>   static const MemoryListener vhost_vdpa_memory_listener = {
>>> -    .begin = vhost_vdpa_listener_begin,
>>>       .commit = vhost_vdpa_listener_commit,
>>>       .region_add = vhost_vdpa_listener_region_add,
>>>       .region_del = vhost_vdpa_listener_region_del,
>>> --
>>> 2.27.0
>>>



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH] vhost-vdpa: Do not send empty IOTLB update batches
  2021-08-05  7:10     ` Jason Wang
@ 2021-08-11 16:40       ` Eugenio Perez Martin
  0 siblings, 0 replies; 5+ messages in thread
From: Eugenio Perez Martin @ 2021-08-11 16:40 UTC (permalink / raw)
  To: Jason Wang; +Cc: Eli Cohen, qemu-devel, Cindy Lu, Michael S. Tsirkin

On Thu, Aug 5, 2021 at 9:10 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2021/8/5 下午3:04, Eugenio Perez Martin 写道:
> > On Thu, Aug 5, 2021 at 8:16 AM Jason Wang <jasowang@redhat.com> wrote:
> >> On Wed, Aug 4, 2021 at 10:44 PM Eugenio Pérez <eperezma@redhat.com> wrote:
> >>> With the introduction of the batch hinting, meaningless batches can be
> >>> created with no IOTLB updates if the memory region was skipped by
> >>> vhost_vdpa_listener_skipped_section. This is the case of host notifiers
> >>> memory regions, but others could fall on this category. This caused the
> >>> vdpa device to receive dma mapping settings with no changes, a possibly
> >>> expensive operation for nothing.
> >>>
> >>> To avoid that, VHOST_IOTLB_BATCH_BEGIN hint is delayed until we have a
> >>> meaningful (not skipped section) mapping or unmapping operation, and
> >>> VHOST_IOTLB_BATCH_END is not written unless at least one of _UPDATE /
> >>> _INVALIDATE has been issued.
> >> Hi Eugeni:
> >>
> >> This should work but it looks to me it's better to optimize the kernel.
> >>
> >> E.g to make sure we don't send set_map() if there is no real updating
> >> between batch start and end.
> >>
> > Hi Jason,
> >
> > I think we should do both in parallel anyway.
>
>
> Ok, I'm fine with this.
>
>
> >   We also obtain an
> > (unmeasured at this moment) decrease in startup time for qemu with
> > vdpa this way, for example. I consider that this particular RFC has
> > room to improve or change totally of course.
> >
> > I've made these changes in the kernel too, just counting the number of
> > memory updates and not calling set_map if no actual changes have been
> > made.
>
>
> Right, that is what we want to have.
>
>
> >
> >> This makes sure that it can work for all kinds of userspace (not only for Qemu).
> >>
> >> Another optimization is to batch the memory region transaction by adding:
> >>
> >> memory_region_transaction_begin() and memory_region_transaction_end() in
> >>
> >> both vhost_vdpa_host_notifiers_init() and vhost_vdpa_host_notifiers_uninit().
> >>
> >> This can make sure only at least one memory transactions when
> >> adding/removing host notifier regions.
> >>
> > That solves the updates about memory regions, but it does not solve
> > the case where updating memory regions are not interesting to the
> > device.
>
>
> Kind of, I guess with this we only get one more set_map().
>
>
> > In other words, where all the changes meet
> > vhost_vdpa_listener_skipped_section() == true. I did not spend a lot
> > of time trying to raise these though, maybe it happens when
> > hot-plugging a device, for example?
>
>
> Yes, so transaction is per device optimization that can't help in this case.
>

I've left it out, since we already obtain 0 IOTLB update commits with
this approach. Let me know if you think it should be included.

>
> >
> > We could abstract these changes in memory_region_transaction_begin() /
> > memory_region_transaction_end() wrappers though.
> >
> > Thanks!
> >
> >> Thanks
> >>
> >>> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> >>> ---
> >>>   include/hw/virtio/vhost-vdpa.h |  1 +
> >>>   hw/virtio/vhost-vdpa.c         | 38 +++++++++++++++++++++++-----------
> >>>   2 files changed, 27 insertions(+), 12 deletions(-)
> >>>
> >>> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> >>> index e98e327f12..0a7edbe4eb 100644
> >>> --- a/include/hw/virtio/vhost-vdpa.h
> >>> +++ b/include/hw/virtio/vhost-vdpa.h
> >>> @@ -23,6 +23,7 @@ typedef struct vhost_vdpa {
> >>>       int device_fd;
> >>>       int index;
> >>>       uint32_t msg_type;
> >>> +    size_t n_iotlb_sent_batch;
>
>
> Not a native speaker but we probably need a better name, e.g "n_mr_updated?"
>

I totally agree.

>
> >>>       MemoryListener listener;
> >>>       struct vhost_dev *dev;
> >>>       VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
> >>> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> >>> index 6ce94a1f4d..2517fc6103 100644
> >>> --- a/hw/virtio/vhost-vdpa.c
> >>> +++ b/hw/virtio/vhost-vdpa.c
> >>> @@ -89,19 +89,13 @@ static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
> >>>       return ret;
> >>>   }
> >>>
> >>> -static void vhost_vdpa_listener_begin(MemoryListener *listener)
> >>> +static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v)
> >>>   {
> >>> -    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
> >>> -    struct vhost_dev *dev = v->dev;
> >>> -    struct vhost_msg_v2 msg = {};
> >>>       int fd = v->device_fd;
> >>> -
> >>> -    if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) {
> >>> -        return;
> >>> -    }
> >>> -
> >>> -    msg.type = v->msg_type;
> >>> -    msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN;
> >>> +    struct vhost_msg_v2 msg = {
> >>> +        .type = v->msg_type,
> >>> +        .iotlb.type = VHOST_IOTLB_BATCH_BEGIN,
> >>> +    };
> >>>
> >>>       if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
> >>>           error_report("failed to write, fd=%d, errno=%d (%s)",
> >>> @@ -120,6 +114,11 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener)
> >>>           return;
> >>>       }
> >>>
> >>> +    if (v->n_iotlb_sent_batch == 0) {
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    v->n_iotlb_sent_batch = 0;
> >>>       msg.type = v->msg_type;
> >>>       msg.iotlb.type = VHOST_IOTLB_BATCH_END;
> >>>
> >>> @@ -170,6 +169,14 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
> >>>
> >>>       llsize = int128_sub(llend, int128_make64(iova));
> >>>
> >>> +    if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH)) {
> >>> +        if (v->n_iotlb_sent_batch == 0) {
> >>> +            vhost_vdpa_listener_begin_batch(v);
> >>> +        }
> >>> +
> >>> +        v->n_iotlb_sent_batch++;
> >>> +    }
>
>
> Let abstract this as a helper to be reused by region_del.
>
> Other looks good.
>
> Thanks
>

I sent a PATCH v2 instead of a non-RFC v1 by mistake. Please let me
know if I should do something to change it.

Thanks!

>
> >>> +
> >>>       ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
> >>>                                vaddr, section->readonly);
> >>>       if (ret) {
> >>> @@ -221,6 +228,14 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
> >>>
> >>>       llsize = int128_sub(llend, int128_make64(iova));
> >>>
> >>> +    if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH)) {
> >>> +        if (v->n_iotlb_sent_batch == 0) {
> >>> +            vhost_vdpa_listener_begin_batch(v);
> >>> +        }
> >>> +
> >>> +        v->n_iotlb_sent_batch++;
> >>> +    }
> >>> +
> >>>       ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
> >>>       if (ret) {
> >>>           error_report("vhost_vdpa dma unmap error!");
> >>> @@ -234,7 +249,6 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
> >>>    * depends on the addnop().
> >>>    */
> >>>   static const MemoryListener vhost_vdpa_memory_listener = {
> >>> -    .begin = vhost_vdpa_listener_begin,
> >>>       .commit = vhost_vdpa_listener_commit,
> >>>       .region_add = vhost_vdpa_listener_region_add,
> >>>       .region_del = vhost_vdpa_listener_region_del,
> >>> --
> >>> 2.27.0
> >>>
>



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-08-11 16:42 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-04 14:44 [RFC PATCH] vhost-vdpa: Do not send empty IOTLB update batches Eugenio Pérez
2021-08-05  6:16 ` Jason Wang
2021-08-05  7:04   ` Eugenio Perez Martin
2021-08-05  7:10     ` Jason Wang
2021-08-11 16:40       ` Eugenio Perez Martin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.