netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC 0/5] In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced.
@ 2022-07-21  8:43 Guo Zhi
  2022-07-21  8:43 ` [RFC 1/5] vhost: reorder used descriptors in a batch Guo Zhi
                   ` (5 more replies)
  0 siblings, 6 replies; 24+ messages in thread
From: Guo Zhi @ 2022-07-21  8:43 UTC (permalink / raw)
  To: eperezma, jasowang, sgarzare, mst
  Cc: netdev, linux-kernel, kvm, virtualization, Guo Zhi

When this feature has been negotiated, virtio driver will use
descriptors in ring order: starting from offset 0 in the table, and
wrapping around at the end of the table. Vhost devices will always use
descriptors in the same order in which they have been made available.
This can reduce virtio accesses to used ring.

Based on updated virtio-spec, this series realized IN_ORDER prototype
in virtio driver and vhost.

Guo Zhi (5):
  vhost: reorder used descriptors in a batch
  vhost: announce VIRTIO_F_IN_ORDER support
  vhost_test: batch used buffer
  virtio: get desc id in order
  virtio: annouce VIRTIO_F_IN_ORDER support

 drivers/vhost/test.c         | 15 +++++++++++-
 drivers/vhost/vhost.c        | 44 ++++++++++++++++++++++++++++++++++--
 drivers/vhost/vhost.h        |  4 ++++
 drivers/virtio/virtio_ring.c | 39 +++++++++++++++++++++++++-------
 4 files changed, 91 insertions(+), 11 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [RFC 1/5] vhost: reorder used descriptors in a batch
  2022-07-21  8:43 [RFC 0/5] In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced Guo Zhi
@ 2022-07-21  8:43 ` Guo Zhi
  2022-07-22  7:07   ` Eugenio Perez Martin
  2022-07-26  7:36   ` Jason Wang
  2022-07-21  8:43 ` [RFC 2/5] vhost: announce VIRTIO_F_IN_ORDER support Guo Zhi
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 24+ messages in thread
From: Guo Zhi @ 2022-07-21  8:43 UTC (permalink / raw)
  To: eperezma, jasowang, sgarzare, mst
  Cc: netdev, linux-kernel, kvm, virtualization, Guo Zhi

Device may not use descriptors in order, for example, NIC and SCSI may
not call __vhost_add_used_n with buffers in order.  It's the task of
__vhost_add_used_n to order them.  This commit reorder the buffers using
vq->heads, only the batch is begin from the expected start point and is
continuous can the batch be exposed to driver.  And only writing out a
single used ring for a batch of descriptors, according to VIRTIO 1.1
spec.

Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
---
 drivers/vhost/vhost.c | 44 +++++++++++++++++++++++++++++++++++++++++--
 drivers/vhost/vhost.h |  3 +++
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 40097826c..e2e77e29f 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -317,6 +317,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 	vq->used_flags = 0;
 	vq->log_used = false;
 	vq->log_addr = -1ull;
+	vq->next_used_head_idx = 0;
 	vq->private_data = NULL;
 	vq->acked_features = 0;
 	vq->acked_backend_features = 0;
@@ -398,6 +399,8 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
 					  GFP_KERNEL);
 		if (!vq->indirect || !vq->log || !vq->heads)
 			goto err_nomem;
+
+		memset(vq->heads, 0, sizeof(*vq->heads) * dev->iov_limit);
 	}
 	return 0;
 
@@ -2374,12 +2377,49 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
 			    unsigned count)
 {
 	vring_used_elem_t __user *used;
+	struct vring_desc desc;
 	u16 old, new;
 	int start;
+	int begin, end, i;
+	int copy_n = count;
+
+	if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
+		/* calculate descriptor chain length for each used buffer */
+		for (i = 0; i < count; i++) {
+			begin = heads[i].id;
+			end = begin;
+			vq->heads[begin].len = 0;
+			do {
+				vq->heads[begin].len += 1;
+				if (unlikely(vhost_get_desc(vq, &desc, end))) {
+					vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
+					       end, vq->desc + end);
+					return -EFAULT;
+				}
+			} while ((end = next_desc(vq, &desc)) != -1);
+		}
+
+		count = 0;
+		/* sort and batch continuous used ring entry */
+		while (vq->heads[vq->next_used_head_idx].len != 0) {
+			count++;
+			i = vq->next_used_head_idx;
+			vq->next_used_head_idx = (vq->next_used_head_idx +
+						  vq->heads[vq->next_used_head_idx].len)
+						  % vq->num;
+			vq->heads[i].len = 0;
+		}
+		/* only write out a single used ring entry with the id corresponding
+		 * to the head entry of the descriptor chain describing the last buffer
+		 * in the batch.
+		 */
+		heads[0].id = i;
+		copy_n = 1;
+	}
 
 	start = vq->last_used_idx & (vq->num - 1);
 	used = vq->used->ring + start;
-	if (vhost_put_used(vq, heads, start, count)) {
+	if (vhost_put_used(vq, heads, start, copy_n)) {
 		vq_err(vq, "Failed to write used");
 		return -EFAULT;
 	}
@@ -2410,7 +2450,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
 
 	start = vq->last_used_idx & (vq->num - 1);
 	n = vq->num - start;
-	if (n < count) {
+	if (n < count && !vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
 		r = __vhost_add_used_n(vq, heads, n);
 		if (r < 0)
 			return r;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index d9109107a..7b2c0fbb5 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -107,6 +107,9 @@ struct vhost_virtqueue {
 	bool log_used;
 	u64 log_addr;
 
+	/* Sort heads in order */
+	u16 next_used_head_idx;
+
 	struct iovec iov[UIO_MAXIOV];
 	struct iovec iotlb_iov[64];
 	struct iovec *indirect;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [RFC 2/5] vhost: announce VIRTIO_F_IN_ORDER support
  2022-07-21  8:43 [RFC 0/5] In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced Guo Zhi
  2022-07-21  8:43 ` [RFC 1/5] vhost: reorder used descriptors in a batch Guo Zhi
@ 2022-07-21  8:43 ` Guo Zhi
  2022-07-21  8:43 ` [RFC 3/5] vhost_test: batch used buffer Guo Zhi
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 24+ messages in thread
From: Guo Zhi @ 2022-07-21  8:43 UTC (permalink / raw)
  To: eperezma, jasowang, sgarzare, mst
  Cc: netdev, linux-kernel, kvm, virtualization, Guo Zhi

In order feature is supported by default in vhost.

Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
---
 drivers/vhost/vhost.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 7b2c0fbb5..b425c8f50 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -241,6 +241,7 @@ enum {
 			 (1ULL << VIRTIO_RING_F_EVENT_IDX) |
 			 (1ULL << VHOST_F_LOG_ALL) |
 			 (1ULL << VIRTIO_F_ANY_LAYOUT) |
+			 (1ULL << VIRTIO_F_IN_ORDER) |
 			 (1ULL << VIRTIO_F_VERSION_1)
 };
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [RFC 3/5] vhost_test: batch used buffer
  2022-07-21  8:43 [RFC 0/5] In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced Guo Zhi
  2022-07-21  8:43 ` [RFC 1/5] vhost: reorder used descriptors in a batch Guo Zhi
  2022-07-21  8:43 ` [RFC 2/5] vhost: announce VIRTIO_F_IN_ORDER support Guo Zhi
@ 2022-07-21  8:43 ` Guo Zhi
  2022-07-22  7:12   ` Eugenio Perez Martin
  2022-07-21  8:43 ` [RFC 4/5] virtio: get desc id in order Guo Zhi
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 24+ messages in thread
From: Guo Zhi @ 2022-07-21  8:43 UTC (permalink / raw)
  To: eperezma, jasowang, sgarzare, mst
  Cc: netdev, linux-kernel, kvm, virtualization, Guo Zhi

Only add to used ring when a batch a buffer have all been used.  And if
in order feature negotiated, add randomness to the used buffer's order,
test the ability of vhost to reorder batched buffer.

Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
---
 drivers/vhost/test.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index bc8e7fb1e..1c9c40c11 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -43,6 +43,9 @@ struct vhost_test {
 static void handle_vq(struct vhost_test *n)
 {
 	struct vhost_virtqueue *vq = &n->vqs[VHOST_TEST_VQ];
+	struct vring_used_elem *heads = kmalloc(sizeof(*heads)
+			* vq->num, GFP_KERNEL);
+	int batch_idx = 0;
 	unsigned out, in;
 	int head;
 	size_t len, total_len = 0;
@@ -84,11 +87,21 @@ static void handle_vq(struct vhost_test *n)
 			vq_err(vq, "Unexpected 0 len for TX\n");
 			break;
 		}
-		vhost_add_used_and_signal(&n->dev, vq, head, 0);
+		heads[batch_idx].id = cpu_to_vhost32(vq, head);
+		heads[batch_idx++].len = cpu_to_vhost32(vq, len);
 		total_len += len;
 		if (unlikely(vhost_exceeds_weight(vq, 0, total_len)))
 			break;
 	}
+	if (batch_idx) {
+		if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER) && batch_idx >= 2) {
+			vhost_add_used_and_signal_n(&n->dev, vq, &heads[batch_idx / 2],
+						    batch_idx - batch_idx / 2);
+			vhost_add_used_and_signal_n(&n->dev, vq, heads, batch_idx / 2);
+		} else {
+			vhost_add_used_and_signal_n(&n->dev, vq, heads, batch_idx);
+		}
+	}
 
 	mutex_unlock(&vq->mutex);
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [RFC 4/5] virtio: get desc id in order
  2022-07-21  8:43 [RFC 0/5] In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced Guo Zhi
                   ` (2 preceding siblings ...)
  2022-07-21  8:43 ` [RFC 3/5] vhost_test: batch used buffer Guo Zhi
@ 2022-07-21  8:43 ` Guo Zhi
  2022-07-26  8:07   ` Jason Wang
  2022-07-21  8:43 ` [RFC 5/5] virtio: annouce VIRTIO_F_IN_ORDER support Guo Zhi
  2022-07-21  9:17 ` [RFC 0/5] In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced Jason Wang
  5 siblings, 1 reply; 24+ messages in thread
From: Guo Zhi @ 2022-07-21  8:43 UTC (permalink / raw)
  To: eperezma, jasowang, sgarzare, mst
  Cc: netdev, linux-kernel, kvm, virtualization, Guo Zhi

If in order feature negotiated, we can skip the used ring to get
buffer's desc id sequentially.

Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
---
 drivers/virtio/virtio_ring.c | 37 ++++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a5ec724c0..4d57a4edc 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -144,6 +144,9 @@ struct vring_virtqueue {
 			/* DMA address and size information */
 			dma_addr_t queue_dma_addr;
 			size_t queue_size_in_bytes;
+
+			/* In order feature batch begin here */
+			u16 next_batch_desc_begin;
 		} split;
 
 		/* Available for packed ring */
@@ -700,8 +703,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
 	}
 
 	vring_unmap_one_split(vq, i);
-	vq->split.desc_extra[i].next = vq->free_head;
-	vq->free_head = head;
+	if (!virtio_has_feature(vq->vq.vdev, VIRTIO_F_IN_ORDER)) {
+		vq->split.desc_extra[i].next = vq->free_head;
+		vq->free_head = head;
+	}
 
 	/* Plus final descriptor */
 	vq->vq.num_free++;
@@ -743,7 +748,8 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	void *ret;
-	unsigned int i;
+	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
+	unsigned int i, j;
 	u16 last_used;
 
 	START_USE(vq);
@@ -762,11 +768,24 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
 	/* Only get used array entries after they have been exposed by host. */
 	virtio_rmb(vq->weak_barriers);
 
-	last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
-	i = virtio32_to_cpu(_vq->vdev,
-			vq->split.vring.used->ring[last_used].id);
-	*len = virtio32_to_cpu(_vq->vdev,
-			vq->split.vring.used->ring[last_used].len);
+	if (virtio_has_feature(_vq->vdev, VIRTIO_F_IN_ORDER)) {
+		/* Skip used ring and get used desc in order*/
+		i = vq->split.next_batch_desc_begin;
+		j = i;
+		while (vq->split.vring.desc[j].flags & nextflag)
+			j = (j + 1) % vq->split.vring.num;
+		/* move to next */
+		j = (j + 1) % vq->split.vring.num;
+		vq->split.next_batch_desc_begin = j;
+
+		/* TODO: len of buffer */
+	} else {
+		last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
+		i = virtio32_to_cpu(_vq->vdev,
+				    vq->split.vring.used->ring[last_used].id);
+		*len = virtio32_to_cpu(_vq->vdev,
+				       vq->split.vring.used->ring[last_used].len);
+	}
 
 	if (unlikely(i >= vq->split.vring.num)) {
 		BAD_RING(vq, "id %u out of range\n", i);
@@ -2234,6 +2253,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	vq->split.avail_flags_shadow = 0;
 	vq->split.avail_idx_shadow = 0;
 
+	vq->split.next_batch_desc_begin = 0;
+
 	/* No callback?  Tell other side not to bother us. */
 	if (!callback) {
 		vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [RFC 5/5] virtio: annouce VIRTIO_F_IN_ORDER support
  2022-07-21  8:43 [RFC 0/5] In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced Guo Zhi
                   ` (3 preceding siblings ...)
  2022-07-21  8:43 ` [RFC 4/5] virtio: get desc id in order Guo Zhi
@ 2022-07-21  8:43 ` Guo Zhi
  2022-07-21  9:17 ` [RFC 0/5] In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced Jason Wang
  5 siblings, 0 replies; 24+ messages in thread
From: Guo Zhi @ 2022-07-21  8:43 UTC (permalink / raw)
  To: eperezma, jasowang, sgarzare, mst
  Cc: netdev, linux-kernel, kvm, virtualization, Guo Zhi

In order feature is supported by default in virtio.

Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
---
 drivers/virtio/virtio_ring.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 4d57a4edc..458b57df3 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2398,6 +2398,8 @@ void vring_transport_features(struct virtio_device *vdev)
 			break;
 		case VIRTIO_F_ORDER_PLATFORM:
 			break;
+		case VIRTIO_F_IN_ORDER:
+			break;
 		default:
 			/* We don't understand this bit. */
 			__virtio_clear_bit(vdev, i);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [RFC 0/5] In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced.
  2022-07-21  8:43 [RFC 0/5] In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced Guo Zhi
                   ` (4 preceding siblings ...)
  2022-07-21  8:43 ` [RFC 5/5] virtio: annouce VIRTIO_F_IN_ORDER support Guo Zhi
@ 2022-07-21  9:17 ` Jason Wang
  2022-07-21 11:54   ` Guo Zhi
  5 siblings, 1 reply; 24+ messages in thread
From: Jason Wang @ 2022-07-21  9:17 UTC (permalink / raw)
  To: Guo Zhi
  Cc: eperezma, Stefano Garzarella, mst, netdev, linux-kernel, kvm,
	virtualization

On Thu, Jul 21, 2022 at 4:44 PM Guo Zhi <qtxuning1999@sjtu.edu.cn> wrote:
>
> When this feature has been negotiated, virtio driver will use
> descriptors in ring order: starting from offset 0 in the table, and
> wrapping around at the end of the table. Vhost devices will always use
> descriptors in the same order in which they have been made available.
> This can reduce virtio accesses to used ring.
>
> Based on updated virtio-spec, this series realized IN_ORDER prototype
> in virtio driver and vhost.

Thanks a lot for the series.

I wonder if you can share any performance numbers for this?

Thanks

>
> Guo Zhi (5):
>   vhost: reorder used descriptors in a batch
>   vhost: announce VIRTIO_F_IN_ORDER support
>   vhost_test: batch used buffer
>   virtio: get desc id in order
>   virtio: annouce VIRTIO_F_IN_ORDER support
>
>  drivers/vhost/test.c         | 15 +++++++++++-
>  drivers/vhost/vhost.c        | 44 ++++++++++++++++++++++++++++++++++--
>  drivers/vhost/vhost.h        |  4 ++++
>  drivers/virtio/virtio_ring.c | 39 +++++++++++++++++++++++++-------
>  4 files changed, 91 insertions(+), 11 deletions(-)
>
> --
> 2.17.1
>


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 0/5] In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced.
  2022-07-21  9:17 ` [RFC 0/5] In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced Jason Wang
@ 2022-07-21 11:54   ` Guo Zhi
  0 siblings, 0 replies; 24+ messages in thread
From: Guo Zhi @ 2022-07-21 11:54 UTC (permalink / raw)
  To: Jason Wang
  Cc: eperezma, Stefano Garzarella, mst, netdev, linux-kernel, kvm,
	virtualization

On 2022/7/21 17:17, Jason Wang wrote:
> On Thu, Jul 21, 2022 at 4:44 PM Guo Zhi <qtxuning1999@sjtu.edu.cn> wrote:
>> When this feature has been negotiated, virtio driver will use
>> descriptors in ring order: starting from offset 0 in the table, and
>> wrapping around at the end of the table. Vhost devices will always use
>> descriptors in the same order in which they have been made available.
>> This can reduce virtio accesses to used ring.
>>
>> Based on updated virtio-spec, this series realized IN_ORDER prototype
>> in virtio driver and vhost.
> Thanks a lot for the series.
>
> I wonder if you can share any performance numbers for this?
>
> Thanks

As a RFC series, current prototype only support virtio_test, and its 
performance evaluation between

in order and traditional has little difference. We can focus on the 
prototype design at this stage.

I will continue work to support real network driver and device, thus 
share more persuasive performance result.

Thanks.

>> Guo Zhi (5):
>>    vhost: reorder used descriptors in a batch
>>    vhost: announce VIRTIO_F_IN_ORDER support
>>    vhost_test: batch used buffer
>>    virtio: get desc id in order
>>    virtio: annouce VIRTIO_F_IN_ORDER support
>>
>>   drivers/vhost/test.c         | 15 +++++++++++-
>>   drivers/vhost/vhost.c        | 44 ++++++++++++++++++++++++++++++++++--
>>   drivers/vhost/vhost.h        |  4 ++++
>>   drivers/virtio/virtio_ring.c | 39 +++++++++++++++++++++++++-------
>>   4 files changed, 91 insertions(+), 11 deletions(-)
>>
>> --
>> 2.17.1
>>


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 1/5] vhost: reorder used descriptors in a batch
  2022-07-21  8:43 ` [RFC 1/5] vhost: reorder used descriptors in a batch Guo Zhi
@ 2022-07-22  7:07   ` Eugenio Perez Martin
  2022-08-02  3:30     ` Guo Zhi
  2022-07-26  7:36   ` Jason Wang
  1 sibling, 1 reply; 24+ messages in thread
From: Eugenio Perez Martin @ 2022-07-22  7:07 UTC (permalink / raw)
  To: Guo Zhi
  Cc: Jason Wang, Stefano Garzarella, Michael Tsirkin, netdev,
	linux-kernel, kvm list, virtualization

On Thu, Jul 21, 2022 at 10:44 AM Guo Zhi <qtxuning1999@sjtu.edu.cn> wrote:
>
> Device may not use descriptors in order, for example, NIC and SCSI may
> not call __vhost_add_used_n with buffers in order.  It's the task of
> __vhost_add_used_n to order them.  This commit reorder the buffers using
> vq->heads, only the batch is begin from the expected start point and is
> continuous can the batch be exposed to driver.  And only writing out a
> single used ring for a batch of descriptors, according to VIRTIO 1.1
> spec.
>
> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
> ---
>  drivers/vhost/vhost.c | 44 +++++++++++++++++++++++++++++++++++++++++--
>  drivers/vhost/vhost.h |  3 +++
>  2 files changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 40097826c..e2e77e29f 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -317,6 +317,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>         vq->used_flags = 0;
>         vq->log_used = false;
>         vq->log_addr = -1ull;
> +       vq->next_used_head_idx = 0;
>         vq->private_data = NULL;
>         vq->acked_features = 0;
>         vq->acked_backend_features = 0;
> @@ -398,6 +399,8 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>                                           GFP_KERNEL);
>                 if (!vq->indirect || !vq->log || !vq->heads)
>                         goto err_nomem;
> +
> +               memset(vq->heads, 0, sizeof(*vq->heads) * dev->iov_limit);
>         }
>         return 0;
>
> @@ -2374,12 +2377,49 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
>                             unsigned count)
>  {
>         vring_used_elem_t __user *used;
> +       struct vring_desc desc;
>         u16 old, new;
>         int start;
> +       int begin, end, i;
> +       int copy_n = count;
> +
> +       if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
> +               /* calculate descriptor chain length for each used buffer */
> +               for (i = 0; i < count; i++) {
> +                       begin = heads[i].id;
> +                       end = begin;
> +                       vq->heads[begin].len = 0;
> +                       do {
> +                               vq->heads[begin].len += 1;
> +                               if (unlikely(vhost_get_desc(vq, &desc, end))) {
> +                                       vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
> +                                              end, vq->desc + end);
> +                                       return -EFAULT;
> +                               }
> +                       } while ((end = next_desc(vq, &desc)) != -1);
> +               }
> +
> +               count = 0;
> +               /* sort and batch continuous used ring entry */
> +               while (vq->heads[vq->next_used_head_idx].len != 0) {
> +                       count++;
> +                       i = vq->next_used_head_idx;
> +                       vq->next_used_head_idx = (vq->next_used_head_idx +
> +                                                 vq->heads[vq->next_used_head_idx].len)
> +                                                 % vq->num;
> +                       vq->heads[i].len = 0;
> +               }

You're iterating vq->heads with two different indexes here.

The first loop is working with indexes [0, count), which is fine if
heads is a "cache" and everything can be overwritten (as it used to be
before this patch).

The other loop trusts in vq->next_used_head_idx, which is saved between calls.

So both uses are going to conflict with each other.

A proposal for checking this is to push the data in the chains
incrementally at the virtio_test driver, and check that they are
returned properly. Like, the first buffer in the chain has the value
of N, the second one N+1, and so on.

Let's split saving chains in its own patch.


> +               /* only write out a single used ring entry with the id corresponding
> +                * to the head entry of the descriptor chain describing the last buffer
> +                * in the batch.
> +                */

Let's delay the batching for now, we can add it as an optimization on
top in the case of devices.

My proposal is to define a new struct vring_used_elem_inorder:

struct vring_used_elem_inorder {
    uint16_t written'
    uint16_t num;
}

And create a per vq array of them, with vq->num size. Let's call it
used_inorder for example.

Everytime the device uses a buffer chain of N buffers, written L and
first descriptor id D, it stores vq->used_inorder[D] = { .written = L,
.num = N }. .num == 0 means the buffer is not available.

After storing that information, you have your next_used_head_idx. You
can check if vq->used_inorder[next_used_head_idx] is used (.num != 0).
In case is not, there is no need to perform any actions for now.

In case it is, you iterate vq->used_inorder. First you write as used
next_used_head_idx. After that, next_used_head_idx increments by .num,
and we need to clean .num. If vq->used_inorder[vq->next_used_head_idx]
is used too, repeat.

I think we could even squash vq->heads and vq->used_inorder with some
tricks, because a chain's length would always be bigger or equal than
used descriptor one, but to store in a different array would be more
clear.

> +               heads[0].id = i;
> +               copy_n = 1;

The device must not write anything to the used ring if the next
descriptor has not been used. I'm failing to trace how this works when
the second half of the batch in vhost/test.c is used here.

Thanks!


> +       }
>
>         start = vq->last_used_idx & (vq->num - 1);
>         used = vq->used->ring + start;
> -       if (vhost_put_used(vq, heads, start, count)) {
> +       if (vhost_put_used(vq, heads, start, copy_n)) {
>                 vq_err(vq, "Failed to write used");
>                 return -EFAULT;
>         }
> @@ -2410,7 +2450,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
>
>         start = vq->last_used_idx & (vq->num - 1);
>         n = vq->num - start;
> -       if (n < count) {
> +       if (n < count && !vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>                 r = __vhost_add_used_n(vq, heads, n);
>                 if (r < 0)
>                         return r;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index d9109107a..7b2c0fbb5 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -107,6 +107,9 @@ struct vhost_virtqueue {
>         bool log_used;
>         u64 log_addr;
>
> +       /* Sort heads in order */
> +       u16 next_used_head_idx;
> +
>         struct iovec iov[UIO_MAXIOV];
>         struct iovec iotlb_iov[64];
>         struct iovec *indirect;
> --
> 2.17.1
>


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 3/5] vhost_test: batch used buffer
  2022-07-21  8:43 ` [RFC 3/5] vhost_test: batch used buffer Guo Zhi
@ 2022-07-22  7:12   ` Eugenio Perez Martin
  2022-08-02  2:47     ` Guo Zhi
                       ` (2 more replies)
  0 siblings, 3 replies; 24+ messages in thread
From: Eugenio Perez Martin @ 2022-07-22  7:12 UTC (permalink / raw)
  To: Guo Zhi
  Cc: Jason Wang, Stefano Garzarella, Michael Tsirkin, netdev,
	linux-kernel, kvm list, virtualization

On Thu, Jul 21, 2022 at 10:44 AM Guo Zhi <qtxuning1999@sjtu.edu.cn> wrote:
>
> Only add to used ring when a batch a buffer have all been used.  And if
> in order feature negotiated, add randomness to the used buffer's order,
> test the ability of vhost to reorder batched buffer.
>
> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
> ---
>  drivers/vhost/test.c | 15 ++++++++++++++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> index bc8e7fb1e..1c9c40c11 100644
> --- a/drivers/vhost/test.c
> +++ b/drivers/vhost/test.c
> @@ -43,6 +43,9 @@ struct vhost_test {
>  static void handle_vq(struct vhost_test *n)
>  {
>         struct vhost_virtqueue *vq = &n->vqs[VHOST_TEST_VQ];
> +       struct vring_used_elem *heads = kmalloc(sizeof(*heads)
> +                       * vq->num, GFP_KERNEL);
> +       int batch_idx = 0;
>         unsigned out, in;
>         int head;
>         size_t len, total_len = 0;
> @@ -84,11 +87,21 @@ static void handle_vq(struct vhost_test *n)
>                         vq_err(vq, "Unexpected 0 len for TX\n");
>                         break;
>                 }
> -               vhost_add_used_and_signal(&n->dev, vq, head, 0);
> +               heads[batch_idx].id = cpu_to_vhost32(vq, head);
> +               heads[batch_idx++].len = cpu_to_vhost32(vq, len);
>                 total_len += len;
>                 if (unlikely(vhost_exceeds_weight(vq, 0, total_len)))
>                         break;
>         }
> +       if (batch_idx) {
> +               if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER) && batch_idx >= 2) {

Maybe to add a module parameter to test this? Instead of trusting in
feature negotiation, "unorder_used=1" or something like that.

vhost.c:vhost_add_used_and_signal_n should support receiving buffers
in order or out of order whether F_IN_ORDER is negotiated or not.

Thanks!

> +                       vhost_add_used_and_signal_n(&n->dev, vq, &heads[batch_idx / 2],
> +                                                   batch_idx - batch_idx / 2);
> +                       vhost_add_used_and_signal_n(&n->dev, vq, heads, batch_idx / 2);
> +               } else {
> +                       vhost_add_used_and_signal_n(&n->dev, vq, heads, batch_idx);
> +               }
> +       }
>
>         mutex_unlock(&vq->mutex);
>  }
> --
> 2.17.1
>


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 1/5] vhost: reorder used descriptors in a batch
  2022-07-21  8:43 ` [RFC 1/5] vhost: reorder used descriptors in a batch Guo Zhi
  2022-07-22  7:07   ` Eugenio Perez Martin
@ 2022-07-26  7:36   ` Jason Wang
       [not found]     ` <2a8838c4-2e6f-6de7-dcdc-572699ff3dc9@sjtu.edu.cn>
  2022-08-02 13:54     ` Guo Zhi
  1 sibling, 2 replies; 24+ messages in thread
From: Jason Wang @ 2022-07-26  7:36 UTC (permalink / raw)
  To: Guo Zhi, eperezma, sgarzare, mst
  Cc: netdev, linux-kernel, kvm, virtualization


在 2022/7/21 16:43, Guo Zhi 写道:
> Device may not use descriptors in order, for example, NIC and SCSI may
> not call __vhost_add_used_n with buffers in order.  It's the task of
> __vhost_add_used_n to order them.


I'm not sure this is ture. Having ooo descriptors is probably by design 
to have better performance.

This might be obvious for device that may have elevator or QOS stuffs.

I suspect the right thing to do here is, for the device that can't 
perform better in the case of IN_ORDER, let's simply not offer IN_ORDER 
(zerocopy or scsi). And for the device we know it can perform better, 
non-zercopy ethernet device we can do that.


>   This commit reorder the buffers using
> vq->heads, only the batch is begin from the expected start point and is
> continuous can the batch be exposed to driver.  And only writing out a
> single used ring for a batch of descriptors, according to VIRTIO 1.1
> spec.


So this sounds more like a "workaround" of the device that can't consume 
buffer in order, I suspect it can help in performance.

More below.


>
> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
> ---
>   drivers/vhost/vhost.c | 44 +++++++++++++++++++++++++++++++++++++++++--
>   drivers/vhost/vhost.h |  3 +++
>   2 files changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 40097826c..e2e77e29f 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -317,6 +317,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>   	vq->used_flags = 0;
>   	vq->log_used = false;
>   	vq->log_addr = -1ull;
> +	vq->next_used_head_idx = 0;
>   	vq->private_data = NULL;
>   	vq->acked_features = 0;
>   	vq->acked_backend_features = 0;
> @@ -398,6 +399,8 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>   					  GFP_KERNEL);
>   		if (!vq->indirect || !vq->log || !vq->heads)
>   			goto err_nomem;
> +
> +		memset(vq->heads, 0, sizeof(*vq->heads) * dev->iov_limit);
>   	}
>   	return 0;
>   
> @@ -2374,12 +2377,49 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
>   			    unsigned count)
>   {
>   	vring_used_elem_t __user *used;
> +	struct vring_desc desc;
>   	u16 old, new;
>   	int start;
> +	int begin, end, i;
> +	int copy_n = count;
> +
> +	if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {


How do you guarantee that ids of heads are contiguous?


> +		/* calculate descriptor chain length for each used buffer */


I'm a little bit confused about this comment, we have heads[i].len for this?


> +		for (i = 0; i < count; i++) {
> +			begin = heads[i].id;
> +			end = begin;
> +			vq->heads[begin].len = 0;


Does this work for e.g RX virtqueue?


> +			do {
> +				vq->heads[begin].len += 1;
> +				if (unlikely(vhost_get_desc(vq, &desc, end))) {


Let's try hard to avoid more userspace copy here, it's the source of 
performance regression.

Thanks


> +					vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
> +					       end, vq->desc + end);
> +					return -EFAULT;
> +				}
> +			} while ((end = next_desc(vq, &desc)) != -1);
> +		}
> +
> +		count = 0;
> +		/* sort and batch continuous used ring entry */
> +		while (vq->heads[vq->next_used_head_idx].len != 0) {
> +			count++;
> +			i = vq->next_used_head_idx;
> +			vq->next_used_head_idx = (vq->next_used_head_idx +
> +						  vq->heads[vq->next_used_head_idx].len)
> +						  % vq->num;
> +			vq->heads[i].len = 0;
> +		}
> +		/* only write out a single used ring entry with the id corresponding
> +		 * to the head entry of the descriptor chain describing the last buffer
> +		 * in the batch.
> +		 */
> +		heads[0].id = i;
> +		copy_n = 1;
> +	}
>   
>   	start = vq->last_used_idx & (vq->num - 1);
>   	used = vq->used->ring + start;
> -	if (vhost_put_used(vq, heads, start, count)) {
> +	if (vhost_put_used(vq, heads, start, copy_n)) {
>   		vq_err(vq, "Failed to write used");
>   		return -EFAULT;
>   	}
> @@ -2410,7 +2450,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
>   
>   	start = vq->last_used_idx & (vq->num - 1);
>   	n = vq->num - start;
> -	if (n < count) {
> +	if (n < count && !vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>   		r = __vhost_add_used_n(vq, heads, n);
>   		if (r < 0)
>   			return r;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index d9109107a..7b2c0fbb5 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -107,6 +107,9 @@ struct vhost_virtqueue {
>   	bool log_used;
>   	u64 log_addr;
>   
> +	/* Sort heads in order */
> +	u16 next_used_head_idx;
> +
>   	struct iovec iov[UIO_MAXIOV];
>   	struct iovec iotlb_iov[64];
>   	struct iovec *indirect;


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 4/5] virtio: get desc id in order
  2022-07-21  8:43 ` [RFC 4/5] virtio: get desc id in order Guo Zhi
@ 2022-07-26  8:07   ` Jason Wang
  2022-07-28  8:12     ` Guo Zhi
  2022-08-11  8:49     ` Guo Zhi
  0 siblings, 2 replies; 24+ messages in thread
From: Jason Wang @ 2022-07-26  8:07 UTC (permalink / raw)
  To: Guo Zhi, eperezma, sgarzare, mst
  Cc: netdev, linux-kernel, kvm, virtualization


在 2022/7/21 16:43, Guo Zhi 写道:
> If in order feature negotiated, we can skip the used ring to get
> buffer's desc id sequentially.


Let's rename the patch to something like "in order support for virtio_ring"


>
> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
> ---
>   drivers/virtio/virtio_ring.c | 37 ++++++++++++++++++++++++++++--------
>   1 file changed, 29 insertions(+), 8 deletions(-)


I don't see packed support in this patch, we need to implement that.


>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index a5ec724c0..4d57a4edc 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -144,6 +144,9 @@ struct vring_virtqueue {
>   			/* DMA address and size information */
>   			dma_addr_t queue_dma_addr;
>   			size_t queue_size_in_bytes;
> +
> +			/* In order feature batch begin here */
> +			u16 next_batch_desc_begin;
>   		} split;
>   
>   		/* Available for packed ring */
> @@ -700,8 +703,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
>   	}
>   
>   	vring_unmap_one_split(vq, i);
> -	vq->split.desc_extra[i].next = vq->free_head;
> -	vq->free_head = head;
> +	if (!virtio_has_feature(vq->vq.vdev, VIRTIO_F_IN_ORDER)) {
> +		vq->split.desc_extra[i].next = vq->free_head;
> +		vq->free_head = head;
> +	}


Let's add a comment to explain why we don't need anything if in order is 
neogitated.


>   
>   	/* Plus final descriptor */
>   	vq->vq.num_free++;
> @@ -743,7 +748,8 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>   {
>   	struct vring_virtqueue *vq = to_vvq(_vq);
>   	void *ret;
> -	unsigned int i;
> +	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
> +	unsigned int i, j;
>   	u16 last_used;
>   
>   	START_USE(vq);
> @@ -762,11 +768,24 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>   	/* Only get used array entries after they have been exposed by host. */
>   	virtio_rmb(vq->weak_barriers);
>   
> -	last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
> -	i = virtio32_to_cpu(_vq->vdev,
> -			vq->split.vring.used->ring[last_used].id);
> -	*len = virtio32_to_cpu(_vq->vdev,
> -			vq->split.vring.used->ring[last_used].len);
> +	if (virtio_has_feature(_vq->vdev, VIRTIO_F_IN_ORDER)) {
> +		/* Skip used ring and get used desc in order*/
> +		i = vq->split.next_batch_desc_begin;
> +		j = i;
> +		while (vq->split.vring.desc[j].flags & nextflag)


Let's don't depend on the descriptor ring which is under the control of 
the malicious hypervisor.

Let's use desc_extra that is not visible by the hypervisor. More can be 
seen in this commit:

72b5e8958738 ("virtio-ring: store DMA metadata in desc_extra for split 
virtqueue")


> +			j = (j + 1) % vq->split.vring.num;
> +		/* move to next */
> +		j = (j + 1) % vq->split.vring.num;
> +		vq->split.next_batch_desc_begin = j;


I'm not sure I get the logic here, basically I think we should check 
buffer instead of descriptor here.

So if vring.used->ring[last_used].id != last_used, we know all 
[last_used, vring.used->ring[last_used].id] have been used in a batch?


> +
> +		/* TODO: len of buffer */


So spec said:

"

The skipped buffers (for which no used ring entry was written) are 
assumed to have been used (read or written) by the device completely.


"

Thanks


> +	} else {
> +		last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
> +		i = virtio32_to_cpu(_vq->vdev,
> +				    vq->split.vring.used->ring[last_used].id);
> +		*len = virtio32_to_cpu(_vq->vdev,
> +				       vq->split.vring.used->ring[last_used].len);
> +	}
>   
>   	if (unlikely(i >= vq->split.vring.num)) {
>   		BAD_RING(vq, "id %u out of range\n", i);
> @@ -2234,6 +2253,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
>   	vq->split.avail_flags_shadow = 0;
>   	vq->split.avail_idx_shadow = 0;
>   
> +	vq->split.next_batch_desc_begin = 0;
> +
>   	/* No callback?  Tell other side not to bother us. */
>   	if (!callback) {
>   		vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 4/5] virtio: get desc id in order
  2022-07-26  8:07   ` Jason Wang
@ 2022-07-28  8:12     ` Guo Zhi
  2022-08-11  8:49     ` Guo Zhi
  1 sibling, 0 replies; 24+ messages in thread
From: Guo Zhi @ 2022-07-28  8:12 UTC (permalink / raw)
  To: Jason Wang, eperezma, sgarzare, mst
  Cc: netdev, linux-kernel, kvm, virtualization

On 2022/7/26 16:07, Jason Wang wrote:
>
> 在 2022/7/21 16:43, Guo Zhi 写道:
>> If in order feature negotiated, we can skip the used ring to get
>> buffer's desc id sequentially.
>
>
> Let's rename the patch to something like "in order support for 
> virtio_ring"
>
>
>>
>> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
>> ---
>>   drivers/virtio/virtio_ring.c | 37 ++++++++++++++++++++++++++++--------
>>   1 file changed, 29 insertions(+), 8 deletions(-)
>
>
> I don't see packed support in this patch, we need to implement that.
>
It will be implemented later.
>
>>
>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>> index a5ec724c0..4d57a4edc 100644
>> --- a/drivers/virtio/virtio_ring.c
>> +++ b/drivers/virtio/virtio_ring.c
>> @@ -144,6 +144,9 @@ struct vring_virtqueue {
>>               /* DMA address and size information */
>>               dma_addr_t queue_dma_addr;
>>               size_t queue_size_in_bytes;
>> +
>> +            /* In order feature batch begin here */
>> +            u16 next_batch_desc_begin;
>>           } split;
>>             /* Available for packed ring */
>> @@ -700,8 +703,10 @@ static void detach_buf_split(struct 
>> vring_virtqueue *vq, unsigned int head,
>>       }
>>         vring_unmap_one_split(vq, i);
>> -    vq->split.desc_extra[i].next = vq->free_head;
>> -    vq->free_head = head;
>> +    if (!virtio_has_feature(vq->vq.vdev, VIRTIO_F_IN_ORDER)) {
>> +        vq->split.desc_extra[i].next = vq->free_head;
>> +        vq->free_head = head;
>> +    }
>
>
> Let's add a comment to explain why we don't need anything if in order 
> is neogitated.
>
LGTM.
>
>>         /* Plus final descriptor */
>>       vq->vq.num_free++;
>> @@ -743,7 +748,8 @@ static void *virtqueue_get_buf_ctx_split(struct 
>> virtqueue *_vq,
>>   {
>>       struct vring_virtqueue *vq = to_vvq(_vq);
>>       void *ret;
>> -    unsigned int i;
>> +    __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, 
>> VRING_DESC_F_NEXT);
>> +    unsigned int i, j;
>>       u16 last_used;
>>         START_USE(vq);
>> @@ -762,11 +768,24 @@ static void *virtqueue_get_buf_ctx_split(struct 
>> virtqueue *_vq,
>>       /* Only get used array entries after they have been exposed by 
>> host. */
>>       virtio_rmb(vq->weak_barriers);
>>   -    last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>> -    i = virtio32_to_cpu(_vq->vdev,
>> -            vq->split.vring.used->ring[last_used].id);
>> -    *len = virtio32_to_cpu(_vq->vdev,
>> -            vq->split.vring.used->ring[last_used].len);
>> +    if (virtio_has_feature(_vq->vdev, VIRTIO_F_IN_ORDER)) {
>> +        /* Skip used ring and get used desc in order*/
>> +        i = vq->split.next_batch_desc_begin;
>> +        j = i;
>> +        while (vq->split.vring.desc[j].flags & nextflag)
>
>
> Let's don't depend on the descriptor ring which is under the control 
> of the malicious hypervisor.
>
> Let's use desc_extra that is not visible by the hypervisor. More can 
> be seen in this commit:
>
> 72b5e8958738 ("virtio-ring: store DMA metadata in desc_extra for split 
> virtqueue")
>
LGTM, I will use desc_extra in new version patch.
>
>> +            j = (j + 1) % vq->split.vring.num;
>> +        /* move to next */
>> +        j = (j + 1) % vq->split.vring.num;
>> +        vq->split.next_batch_desc_begin = j;
>
>
> I'm not sure I get the logic here, basically I think we should check 
> buffer instead of descriptor here.

Because the vq->last_used_idx != vq->split.vring.used->idx, So the 
virtio driver know these has at least one used descriptor. the 
descriptor's id is vq->split.next_batch_desc_begin because of in order. 
Then we have to traverse the descriptor chain and point 
vq->split.next_batch_desc_begin to next used descriptor.

Thanks.

>
> So if vring.used->ring[last_used].id != last_used, we know all 
> [last_used, vring.used->ring[last_used].id] have been used in a batch?
>
>
>> +
>> +        /* TODO: len of buffer */
>
>
> So spec said:
>
> "
>
> The skipped buffers (for which no used ring entry was written) are 
> assumed to have been used (read or written) by the device completely.
>
>
> "
>
> Thanks
>
The driver will need len in used ring to get buffer size. However in 
order will not write len of each buffer in used ring. So I will tried 
pass len of buffer in device header.
>
>> +    } else {
>> +        last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>> +        i = virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].id);
>> +        *len = virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].len);
>> +    }
>>         if (unlikely(i >= vq->split.vring.num)) {
>>           BAD_RING(vq, "id %u out of range\n", i);
>> @@ -2234,6 +2253,8 @@ struct virtqueue 
>> *__vring_new_virtqueue(unsigned int index,
>>       vq->split.avail_flags_shadow = 0;
>>       vq->split.avail_idx_shadow = 0;
>>   +    vq->split.next_batch_desc_begin = 0;
>> +
>>       /* No callback?  Tell other side not to bother us. */
>>       if (!callback) {
>>           vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
>


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 1/5] vhost: reorder used descriptors in a batch
       [not found]     ` <2a8838c4-2e6f-6de7-dcdc-572699ff3dc9@sjtu.edu.cn>
@ 2022-07-29  7:32       ` Jason Wang
  2022-08-02  3:09         ` Guo Zhi
  2022-08-02 14:12         ` Guo Zhi
  0 siblings, 2 replies; 24+ messages in thread
From: Jason Wang @ 2022-07-29  7:32 UTC (permalink / raw)
  To: Guo Zhi
  Cc: eperezma, Stefano Garzarella, mst, netdev, linux-kernel, kvm,
	virtualization

On Thu, Jul 28, 2022 at 4:26 PM Guo Zhi <qtxuning1999@sjtu.edu.cn> wrote:
>
> On 2022/7/26 15:36, Jason Wang wrote:
>
>
> 在 2022/7/21 16:43, Guo Zhi 写道:
>
> Device may not use descriptors in order, for example, NIC and SCSI may
> not call __vhost_add_used_n with buffers in order.  It's the task of
> __vhost_add_used_n to order them.
>
>
>
> I'm not sure this is ture. Having ooo descriptors is probably by design to have better performance.
>
> This might be obvious for device that may have elevator or QOS stuffs.
>
> I suspect the right thing to do here is, for the device that can't perform better in the case of IN_ORDER, let's simply not offer IN_ORDER (zerocopy or scsi). And for the device we know it can perform better, non-zercopy ethernet device we can do that.
>
>
>   This commit reorder the buffers using
> vq->heads, only the batch is begin from the expected start point and is
> continuous can the batch be exposed to driver.  And only writing out a
> single used ring for a batch of descriptors, according to VIRTIO 1.1
> spec.
>
>
>
> So this sounds more like a "workaround" of the device that can't consume buffer in order, I suspect it can help in performance.
>
> More below.
>
>
>
> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
> ---
>   drivers/vhost/vhost.c | 44 +++++++++++++++++++++++++++++++++++++++++--
>   drivers/vhost/vhost.h |  3 +++
>   2 files changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 40097826c..e2e77e29f 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -317,6 +317,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>       vq->used_flags = 0;
>       vq->log_used = false;
>       vq->log_addr = -1ull;
> +    vq->next_used_head_idx = 0;
>       vq->private_data = NULL;
>       vq->acked_features = 0;
>       vq->acked_backend_features = 0;
> @@ -398,6 +399,8 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>                         GFP_KERNEL);
>           if (!vq->indirect || !vq->log || !vq->heads)
>               goto err_nomem;
> +
> +        memset(vq->heads, 0, sizeof(*vq->heads) * dev->iov_limit);
>       }
>       return 0;
>   @@ -2374,12 +2377,49 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
>                   unsigned count)
>   {
>       vring_used_elem_t __user *used;
> +    struct vring_desc desc;
>       u16 old, new;
>       int start;
> +    int begin, end, i;
> +    int copy_n = count;
> +
> +    if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>
>
>
> How do you guarantee that ids of heads are contiguous?
>
> There is no need to be contiguous for ids of heads.
>
> For example, I have three buffer { .id = 0, 15}, {.id = 20, 30} {.id = 15, 20} for vhost_add_used_n. Then I will let the vq->heads[0].len=15. vq->heads[15].len=5, vq->heads[20].len=10 as reorder. Once I found there is no hold in the batched descriptors. I will expose them to driver.

So spec said:

"If VIRTIO_F_IN_ORDER has been negotiated, driver uses descriptors in
ring order: starting from offset 0 in the table, and wrapping around
at the end of the table."

And

"VIRTIO_F_IN_ORDER(35)This feature indicates that all buffers are used
by the device in the same order in which they have been made
available."

This means your example is not an IN_ORDER device.

The driver should submit buffers (assuming each buffer have one
descriptor) in order {id = 0, 15}, {id = 1, 30} and {id = 2, 20}.

And even if it is submitted in order, we can not use a batch because:

"The skipped buffers (for which no used ring entry was written) are
assumed to have been used (read or written) by the device completely."

This means for TX we are probably ok, but for rx, unless we know the
buffers were written completely, we can't write them in a batch.

I'd suggest to do cross testing for this series:

1) testing vhost IN_ORDER support with DPDK virtio PMD
2) testing virtio IN_ORDER with DPDK vhost-user via testpmd

Thanks


>
>
> +        /* calculate descriptor chain length for each used buffer */
>
>
>
> I'm a little bit confused about this comment, we have heads[i].len for this?
>
> Maybe I should not use vq->heads, some misleading.
>
>
> +        for (i = 0; i < count; i++) {
> +            begin = heads[i].id;
> +            end = begin;
> +            vq->heads[begin].len = 0;
>
>
>
> Does this work for e.g RX virtqueue?
>
>
> +            do {
> +                vq->heads[begin].len += 1;
> +                if (unlikely(vhost_get_desc(vq, &desc, end))) {
>
>
>
> Let's try hard to avoid more userspace copy here, it's the source of performance regression.
>
> Thanks
>
>
> +                    vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
> +                           end, vq->desc + end);
> +                    return -EFAULT;
> +                }
> +            } while ((end = next_desc(vq, &desc)) != -1);
> +        }
> +
> +        count = 0;
> +        /* sort and batch continuous used ring entry */
> +        while (vq->heads[vq->next_used_head_idx].len != 0) {
> +            count++;
> +            i = vq->next_used_head_idx;
> +            vq->next_used_head_idx = (vq->next_used_head_idx +
> +                          vq->heads[vq->next_used_head_idx].len)
> +                          % vq->num;
> +            vq->heads[i].len = 0;
> +        }
> +        /* only write out a single used ring entry with the id corresponding
> +         * to the head entry of the descriptor chain describing the last buffer
> +         * in the batch.
> +         */
> +        heads[0].id = i;
> +        copy_n = 1;
> +    }
>         start = vq->last_used_idx & (vq->num - 1);
>       used = vq->used->ring + start;
> -    if (vhost_put_used(vq, heads, start, count)) {
> +    if (vhost_put_used(vq, heads, start, copy_n)) {
>           vq_err(vq, "Failed to write used");
>           return -EFAULT;
>       }
> @@ -2410,7 +2450,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
>         start = vq->last_used_idx & (vq->num - 1);
>       n = vq->num - start;
> -    if (n < count) {
> +    if (n < count && !vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>           r = __vhost_add_used_n(vq, heads, n);
>           if (r < 0)
>               return r;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index d9109107a..7b2c0fbb5 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -107,6 +107,9 @@ struct vhost_virtqueue {
>       bool log_used;
>       u64 log_addr;
>   +    /* Sort heads in order */
> +    u16 next_used_head_idx;
> +
>       struct iovec iov[UIO_MAXIOV];
>       struct iovec iotlb_iov[64];
>       struct iovec *indirect;
>
>
>


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 3/5] vhost_test: batch used buffer
  2022-07-22  7:12   ` Eugenio Perez Martin
@ 2022-08-02  2:47     ` Guo Zhi
  2022-08-02  3:08     ` Guo Zhi
       [not found]     ` <1D1ABF88-B503-4BE0-AC83-3326EAA62510@sjtu.edu.cn>
  2 siblings, 0 replies; 24+ messages in thread
From: Guo Zhi @ 2022-08-02  2:47 UTC (permalink / raw)
  To: eperezma
  Cc: jasowang, sgarzare, Michael Tsirkin, netdev, linux-kernel,
	kvm list, virtualization



----- Original Message -----
From: "eperezma" <eperezma@redhat.com>
To: "Guo Zhi" <qtxuning1999@sjtu.edu.cn>
Cc: "jasowang" <jasowang@redhat.com>, "sgarzare" <sgarzare@redhat.com>, "Michael Tsirkin" <mst@redhat.com>, "netdev" <netdev@vger.kernel.org>, "linux-kernel" <linux-kernel@vger.kernel.org>, "kvm list" <kvm@vger.kernel.org>, "virtualization" <virtualization@lists.linux-foundation.org>
Sent: Friday, July 22, 2022 3:12:47 PM
Subject: Re: [RFC 3/5] vhost_test: batch used buffer

On Thu, Jul 21, 2022 at 10:44 AM Guo Zhi <qtxuning1999@sjtu.edu.cn> wrote:
> 
> Only add to used ring when a batch a buffer have all been used.  And if
> in order feature negotiated, add randomness to the used buffer's order,
> test the ability of vhost to reorder batched buffer.
> 
> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
> ---
> drivers/vhost/test.c | 15 ++++++++++++++-
> 1 file changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> index bc8e7fb1e..1c9c40c11 100644
> --- a/drivers/vhost/test.c
> +++ b/drivers/vhost/test.c
> @@ -43,6 +43,9 @@ struct vhost_test {
> static void handle_vq(struct vhost_test *n)
> {
>        struct vhost_virtqueue *vq = &n->vqs[VHOST_TEST_VQ];
> +       struct vring_used_elem *heads = kmalloc(sizeof(*heads)
> +                       * vq->num, GFP_KERNEL);
> +       int batch_idx = 0;
>        unsigned out, in;
>        int head;
>        size_t len, total_len = 0;
> @@ -84,11 +87,21 @@ static void handle_vq(struct vhost_test *n)
>                        vq_err(vq, "Unexpected 0 len for TX\n");
>                        break;
>                }
> -               vhost_add_used_and_signal(&n->dev, vq, head, 0);
> +               heads[batch_idx].id = cpu_to_vhost32(vq, head);
> +               heads[batch_idx++].len = cpu_to_vhost32(vq, len);
>                total_len += len;
>                if (unlikely(vhost_exceeds_weight(vq, 0, total_len)))
>                        break;
>        }
> +       if (batch_idx) {
> +               if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER) && batch_idx >= 2) {

Maybe to add a module parameter to test this? Instead of trusting in
feature negotiation, "unorder_used=1" or something like that.

vhost.c:vhost_add_used_and_signal_n should support receiving buffers
in order or out of order whether F_IN_ORDER is negotiated or not.

Thanks!



> +                       vhost_add_used_and_signal_n(&n->dev, vq, &heads[batch_idx / 2],
> +                                                   batch_idx - batch_idx / 2);
> +                       vhost_add_used_and_signal_n(&n->dev, vq, heads, batch_idx / 2);
> +               } else {
> +                       vhost_add_used_and_signal_n(&n->dev, vq, heads, batch_idx);
> +               }
> +       }
> 
>        mutex_unlock(&vq->mutex);
> }
> --
> 2.17.1

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 3/5] vhost_test: batch used buffer
  2022-07-22  7:12   ` Eugenio Perez Martin
  2022-08-02  2:47     ` Guo Zhi
@ 2022-08-02  3:08     ` Guo Zhi
       [not found]     ` <1D1ABF88-B503-4BE0-AC83-3326EAA62510@sjtu.edu.cn>
  2 siblings, 0 replies; 24+ messages in thread
From: Guo Zhi @ 2022-08-02  3:08 UTC (permalink / raw)
  To: eperezma
  Cc: jasowang, sgarzare, Michael Tsirkin, netdev, linux-kernel,
	kvm list, virtualization



----- Original Message -----
From: "eperezma" <eperezma@redhat.com>
To: "Guo Zhi" <qtxuning1999@sjtu.edu.cn>
Cc: "jasowang" <jasowang@redhat.com>, "sgarzare" <sgarzare@redhat.com>, "Michael Tsirkin" <mst@redhat.com>, "netdev" <netdev@vger.kernel.org>, "linux-kernel" <linux-kernel@vger.kernel.org>, "kvm list" <kvm@vger.kernel.org>, "virtualization" <virtualization@lists.linux-foundation.org>
Sent: Friday, July 22, 2022 3:12:47 PM
Subject: Re: [RFC 3/5] vhost_test: batch used buffer

On Thu, Jul 21, 2022 at 10:44 AM Guo Zhi <qtxuning1999@sjtu.edu.cn> wrote:
>
> Only add to used ring when a batch a buffer have all been used.  And if
> in order feature negotiated, add randomness to the used buffer's order,
> test the ability of vhost to reorder batched buffer.
>
> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
> ---
>  drivers/vhost/test.c | 15 ++++++++++++++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> index bc8e7fb1e..1c9c40c11 100644
> --- a/drivers/vhost/test.c
> +++ b/drivers/vhost/test.c
> @@ -43,6 +43,9 @@ struct vhost_test {
>  static void handle_vq(struct vhost_test *n)
>  {
>         struct vhost_virtqueue *vq = &n->vqs[VHOST_TEST_VQ];
> +       struct vring_used_elem *heads = kmalloc(sizeof(*heads)
> +                       * vq->num, GFP_KERNEL);
> +       int batch_idx = 0;
>         unsigned out, in;
>         int head;
>         size_t len, total_len = 0;
> @@ -84,11 +87,21 @@ static void handle_vq(struct vhost_test *n)
>                         vq_err(vq, "Unexpected 0 len for TX\n");
>                         break;
>                 }
> -               vhost_add_used_and_signal(&n->dev, vq, head, 0);
> +               heads[batch_idx].id = cpu_to_vhost32(vq, head);
> +               heads[batch_idx++].len = cpu_to_vhost32(vq, len);
>                 total_len += len;
>                 if (unlikely(vhost_exceeds_weight(vq, 0, total_len)))
>                         break;
>         }
> +       if (batch_idx) {
> +               if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER) && batch_idx >= 2) {

Maybe to add a module parameter to test this? Instead of trusting in
feature negotiation, "unorder_used=1" or something like that.

vhost.c:vhost_add_used_and_signal_n should support receiving buffers
in order or out of order whether F_IN_ORDER is negotiated or not.

Thanks!

Maybe to add a module parameter to test this? Instead of trusting in
feature negotiation, "unorder_used=1" or something like that.

vhost.c:vhost_add_used_and_signal_n should support receiving buffers
in order or out of order whether F_IN_ORDER is negotiated or not.

Thanks!

> +                       vhost_add_used_and_signal_n(&n->dev, vq, &heads[batch_idx / 2],
> +                                                   batch_idx - batch_idx / 2);
> +                       vhost_add_used_and_signal_n(&n->dev, vq, heads, batch_idx / 2);
> +               } else {
> +                       vhost_add_used_and_signal_n(&n->dev, vq, heads, batch_idx);
> +               }
> +       }
>
>         mutex_unlock(&vq->mutex);
>  }
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 1/5] vhost: reorder used descriptors in a batch
  2022-07-29  7:32       ` Jason Wang
@ 2022-08-02  3:09         ` Guo Zhi
  2022-08-02 14:12         ` Guo Zhi
  1 sibling, 0 replies; 24+ messages in thread
From: Guo Zhi @ 2022-08-02  3:09 UTC (permalink / raw)
  To: jasowang
  Cc: eperezma, sgarzare, Michael Tsirkin, netdev, linux-kernel,
	kvm list, virtualization



----- Original Message -----
From: "jasowang" <jasowang@redhat.com>
To: "Guo Zhi" <qtxuning1999@sjtu.edu.cn>
Cc: "eperezma" <eperezma@redhat.com>, "sgarzare" <sgarzare@redhat.com>, "Michael Tsirkin" <mst@redhat.com>, "netdev" <netdev@vger.kernel.org>, "linux-kernel" <linux-kernel@vger.kernel.org>, "kvm list" <kvm@vger.kernel.org>, "virtualization" <virtualization@lists.linux-foundation.org>
Sent: Friday, July 29, 2022 3:32:02 PM
Subject: Re: [RFC 1/5] vhost: reorder used descriptors in a batch

On Thu, Jul 28, 2022 at 4:26 PM Guo Zhi <qtxuning1999@sjtu.edu.cn> wrote:
>
> On 2022/7/26 15:36, Jason Wang wrote:
>
>
> 在 2022/7/21 16:43, Guo Zhi 写道:
>
> Device may not use descriptors in order, for example, NIC and SCSI may
> not call __vhost_add_used_n with buffers in order.  It's the task of
> __vhost_add_used_n to order them.
>
>
>
> I'm not sure this is ture. Having ooo descriptors is probably by design to have better performance.
>
> This might be obvious for device that may have elevator or QOS stuffs.
>
> I suspect the right thing to do here is, for the device that can't perform better in the case of IN_ORDER, let's simply not offer IN_ORDER (zerocopy or scsi). And for the device we know it can perform better, non-zercopy ethernet device we can do that.
>
>
>   This commit reorder the buffers using
> vq->heads, only the batch is begin from the expected start point and is
> continuous can the batch be exposed to driver.  And only writing out a
> single used ring for a batch of descriptors, according to VIRTIO 1.1
> spec.
>
>
>
> So this sounds more like a "workaround" of the device that can't consume buffer in order, I suspect it can help in performance.
>
> More below.
>
>
>
> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
> ---
>   drivers/vhost/vhost.c | 44 +++++++++++++++++++++++++++++++++++++++++--
>   drivers/vhost/vhost.h |  3 +++
>   2 files changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 40097826c..e2e77e29f 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -317,6 +317,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>       vq->used_flags = 0;
>       vq->log_used = false;
>       vq->log_addr = -1ull;
> +    vq->next_used_head_idx = 0;
>       vq->private_data = NULL;
>       vq->acked_features = 0;
>       vq->acked_backend_features = 0;
> @@ -398,6 +399,8 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>                         GFP_KERNEL);
>           if (!vq->indirect || !vq->log || !vq->heads)
>               goto err_nomem;
> +
> +        memset(vq->heads, 0, sizeof(*vq->heads) * dev->iov_limit);
>       }
>       return 0;
>   @@ -2374,12 +2377,49 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
>                   unsigned count)
>   {
>       vring_used_elem_t __user *used;
> +    struct vring_desc desc;
>       u16 old, new;
>       int start;
> +    int begin, end, i;
> +    int copy_n = count;
> +
> +    if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>
>
>
> How do you guarantee that ids of heads are contiguous?
>
> There is no need to be contiguous for ids of heads.
>
> For example, I have three buffer { .id = 0, 15}, {.id = 20, 30} {.id = 15, 20} for vhost_add_used_n. Then I will let the vq->heads[0].len=15. vq->heads[15].len=5, vq->heads[20].len=10 as reorder. Once I found there is no hold in the batched descriptors. I will expose them to driver.

So spec said:

"If VIRTIO_F_IN_ORDER has been negotiated, driver uses descriptors in
ring order: starting from offset 0 in the table, and wrapping around
at the end of the table."

And

"VIRTIO_F_IN_ORDER(35)This feature indicates that all buffers are used
by the device in the same order in which they have been made
available."

This means your example is not an IN_ORDER device.

The driver should submit buffers (assuming each buffer have one
descriptor) in order {id = 0, 15}, {id = 1, 30} and {id = 2, 20}.

And even if it is submitted in order, we can not use a batch because:

"The skipped buffers (for which no used ring entry was written) are
assumed to have been used (read or written) by the device completely."

This means for TX we are probably ok, but for rx, unless we know the
buffers were written completely, we can't write them in a batch.

I'd suggest to do cross testing for this series:

1) testing vhost IN_ORDER support with DPDK virtio PMD
2) testing virtio IN_ORDER with DPDK vhost-user via testpmd

Thanks



Hi, You can regard the reorder feature in vhost is a "workaround” solution for the device that can't consume buffer in order,
If that device support in order feature, The reorder in vhost will not be used.

Cross testing with DPDK will be done in the future, Thanks!

>
>
> +        /* calculate descriptor chain length for each used buffer */
>
>
>
> I'm a little bit confused about this comment, we have heads[i].len for this?
>
> Maybe I should not use vq->heads, some misleading.
>
>
> +        for (i = 0; i < count; i++) {
> +            begin = heads[i].id;
> +            end = begin;
> +            vq->heads[begin].len = 0;
>
>
>
> Does this work for e.g RX virtqueue?
>
>
> +            do {
> +                vq->heads[begin].len += 1;
> +                if (unlikely(vhost_get_desc(vq, &desc, end))) {
>
>
>
> Let's try hard to avoid more userspace copy here, it's the source of performance regression.
>
> Thanks
>
>
> +                    vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
> +                           end, vq->desc + end);
> +                    return -EFAULT;
> +                }
> +            } while ((end = next_desc(vq, &desc)) != -1);
> +        }
> +
> +        count = 0;
> +        /* sort and batch continuous used ring entry */
> +        while (vq->heads[vq->next_used_head_idx].len != 0) {
> +            count++;
> +            i = vq->next_used_head_idx;
> +            vq->next_used_head_idx = (vq->next_used_head_idx +
> +                          vq->heads[vq->next_used_head_idx].len)
> +                          % vq->num;
> +            vq->heads[i].len = 0;
> +        }
> +        /* only write out a single used ring entry with the id corresponding
> +         * to the head entry of the descriptor chain describing the last buffer
> +         * in the batch.
> +         */
> +        heads[0].id = i;
> +        copy_n = 1;
> +    }
>         start = vq->last_used_idx & (vq->num - 1);
>       used = vq->used->ring + start;
> -    if (vhost_put_used(vq, heads, start, count)) {
> +    if (vhost_put_used(vq, heads, start, copy_n)) {
>           vq_err(vq, "Failed to write used");
>           return -EFAULT;
>       }
> @@ -2410,7 +2450,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
>         start = vq->last_used_idx & (vq->num - 1);
>       n = vq->num - start;
> -    if (n < count) {
> +    if (n < count && !vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>           r = __vhost_add_used_n(vq, heads, n);
>           if (r < 0)
>               return r;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index d9109107a..7b2c0fbb5 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -107,6 +107,9 @@ struct vhost_virtqueue {
>       bool log_used;
>       u64 log_addr;
>   +    /* Sort heads in order */
> +    u16 next_used_head_idx;
> +
>       struct iovec iov[UIO_MAXIOV];
>       struct iovec iotlb_iov[64];
>       struct iovec *indirect;
>
>
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 1/5] vhost: reorder used descriptors in a batch
  2022-07-22  7:07   ` Eugenio Perez Martin
@ 2022-08-02  3:30     ` Guo Zhi
  0 siblings, 0 replies; 24+ messages in thread
From: Guo Zhi @ 2022-08-02  3:30 UTC (permalink / raw)
  To: eperezma
  Cc: jasowang, sgarzare, Michael Tsirkin, netdev, linux-kernel,
	kvm list, virtualization



----- Original Message -----
> From: "eperezma" <eperezma@redhat.com>
> To: "Guo Zhi" <qtxuning1999@sjtu.edu.cn>
> Cc: "jasowang" <jasowang@redhat.com>, "sgarzare" <sgarzare@redhat.com>, "Michael Tsirkin" <mst@redhat.com>, "netdev"
> <netdev@vger.kernel.org>, "linux-kernel" <linux-kernel@vger.kernel.org>, "kvm list" <kvm@vger.kernel.org>,
> "virtualization" <virtualization@lists.linux-foundation.org>
> Sent: Friday, July 22, 2022 3:07:17 PM
> Subject: Re: [RFC 1/5] vhost: reorder used descriptors in a batch

> On Thu, Jul 21, 2022 at 10:44 AM Guo Zhi <qtxuning1999@sjtu.edu.cn> wrote:
>>
>> Device may not use descriptors in order, for example, NIC and SCSI may
>> not call __vhost_add_used_n with buffers in order.  It's the task of
>> __vhost_add_used_n to order them.  This commit reorder the buffers using
>> vq->heads, only the batch is begin from the expected start point and is
>> continuous can the batch be exposed to driver.  And only writing out a
>> single used ring for a batch of descriptors, according to VIRTIO 1.1
>> spec.
>>
>> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
>> ---
>>  drivers/vhost/vhost.c | 44 +++++++++++++++++++++++++++++++++++++++++--
>>  drivers/vhost/vhost.h |  3 +++
>>  2 files changed, 45 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index 40097826c..e2e77e29f 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -317,6 +317,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>>         vq->used_flags = 0;
>>         vq->log_used = false;
>>         vq->log_addr = -1ull;
>> +       vq->next_used_head_idx = 0;
>>         vq->private_data = NULL;
>>         vq->acked_features = 0;
>>         vq->acked_backend_features = 0;
>> @@ -398,6 +399,8 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>>                                           GFP_KERNEL);
>>                 if (!vq->indirect || !vq->log || !vq->heads)
>>                         goto err_nomem;
>> +
>> +               memset(vq->heads, 0, sizeof(*vq->heads) * dev->iov_limit);
>>         }
>>         return 0;
>>
>> @@ -2374,12 +2377,49 @@ static int __vhost_add_used_n(struct vhost_virtqueue
>> *vq,
>>                             unsigned count)
>>  {
>>         vring_used_elem_t __user *used;
>> +       struct vring_desc desc;
>>         u16 old, new;
>>         int start;
>> +       int begin, end, i;
>> +       int copy_n = count;
>> +
>> +       if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>> +               /* calculate descriptor chain length for each used buffer */
>> +               for (i = 0; i < count; i++) {
>> +                       begin = heads[i].id;
>> +                       end = begin;
>> +                       vq->heads[begin].len = 0;
>> +                       do {
>> +                               vq->heads[begin].len += 1;
>> +                               if (unlikely(vhost_get_desc(vq, &desc, end))) {
>> +                                       vq_err(vq, "Failed to get descriptor:
>> idx %d addr %p\n",
>> +                                              end, vq->desc + end);
>> +                                       return -EFAULT;
>> +                               }
>> +                       } while ((end = next_desc(vq, &desc)) != -1);
>> +               }
>> +
>> +               count = 0;
>> +               /* sort and batch continuous used ring entry */
>> +               while (vq->heads[vq->next_used_head_idx].len != 0) {
>> +                       count++;
>> +                       i = vq->next_used_head_idx;
>> +                       vq->next_used_head_idx = (vq->next_used_head_idx +
>> +
>> vq->heads[vq->next_used_head_idx].len)
>> +                                                 % vq->num;
>> +                       vq->heads[i].len = 0;
>> +               }
> 
> You're iterating vq->heads with two different indexes here.
> 
> The first loop is working with indexes [0, count), which is fine if
> heads is a "cache" and everything can be overwritten (as it used to be
> before this patch).
> 
> The other loop trusts in vq->next_used_head_idx, which is saved between calls.
> 
> So both uses are going to conflict with each other.
> 

The first loop is to calculate the length of each descriptor, and the next is to find
the begin point of next batch. The next loop contains the first loop.

> A proposal for checking this is to push the data in the chains
> incrementally at the virtio_test driver, and check that they are
> returned properly. Like, the first buffer in the chain has the value
> of N, the second one N+1, and so on.
> 

LGTM. I'll try this to enhance the test.

> Let's split saving chains in its own patch.
> 
> 
>> +               /* only write out a single used ring entry with the id
>> corresponding
>> +                * to the head entry of the descriptor chain describing the last
>> buffer
>> +                * in the batch.
>> +                */
> 
> Let's delay the batching for now, we can add it as an optimization on
> top in the case of devices.
> 
> My proposal is to define a new struct vring_used_elem_inorder:
> 
> struct vring_used_elem_inorder {
>    uint16_t written'
>    uint16_t num;
> }
> 
> And create a per vq array of them, with vq->num size. Let's call it
> used_inorder for example.
> 
> Everytime the device uses a buffer chain of N buffers, written L and
> first descriptor id D, it stores vq->used_inorder[D] = { .written = L,
> .num = N }. .num == 0 means the buffer is not available.
> 
> After storing that information, you have your next_used_head_idx. You
> can check if vq->used_inorder[next_used_head_idx] is used (.num != 0).
> In case is not, there is no need to perform any actions for now.
> 
> In case it is, you iterate vq->used_inorder. First you write as used
> next_used_head_idx. After that, next_used_head_idx increments by .num,
> and we need to clean .num. If vq->used_inorder[vq->next_used_head_idx]
> is used too, repeat.
> 
> I think we could even squash vq->heads and vq->used_inorder with some
> tricks, because a chain's length would always be bigger or equal than
> used descriptor one, but to store in a different array would be more
> clear.
> 

I think this algorithm is the same with that in the patch. But it is better
to add a struct named vring_used_elem_inorder instead of vq->heads, which 
is more clear.

>> +               heads[0].id = i;
>> +               copy_n = 1;
> 
> The device must not write anything to the used ring if the next
> descriptor has not been used. I'm failing to trace how this works when
> the second half of the batch in vhost/test.c is used here.
> 
> Thanks!
> 
> 

Sorry for my mistake, I forgot add the check if(count == 0) in the patch.

>> +       }
>>
>>         start = vq->last_used_idx & (vq->num - 1);
>>         used = vq->used->ring + start;
>> -       if (vhost_put_used(vq, heads, start, count)) {
>> +       if (vhost_put_used(vq, heads, start, copy_n)) {
>>                 vq_err(vq, "Failed to write used");
>>                 return -EFAULT;
>>         }
>> @@ -2410,7 +2450,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct
>> vring_used_elem *heads,
>>
>>         start = vq->last_used_idx & (vq->num - 1);
>>         n = vq->num - start;
>> -       if (n < count) {
>> +       if (n < count && !vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>>                 r = __vhost_add_used_n(vq, heads, n);
>>                 if (r < 0)
>>                         return r;
>> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
>> index d9109107a..7b2c0fbb5 100644
>> --- a/drivers/vhost/vhost.h
>> +++ b/drivers/vhost/vhost.h
>> @@ -107,6 +107,9 @@ struct vhost_virtqueue {
>>         bool log_used;
>>         u64 log_addr;
>>
>> +       /* Sort heads in order */
>> +       u16 next_used_head_idx;
>> +
>>         struct iovec iov[UIO_MAXIOV];
>>         struct iovec iotlb_iov[64];
>>         struct iovec *indirect;
>> --
>> 2.17.1
>>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 3/5] vhost_test: batch used buffer
       [not found]     ` <1D1ABF88-B503-4BE0-AC83-3326EAA62510@sjtu.edu.cn>
@ 2022-08-02  7:45       ` Stefano Garzarella
  0 siblings, 0 replies; 24+ messages in thread
From: Stefano Garzarella @ 2022-08-02  7:45 UTC (permalink / raw)
  To: Zhi Guo
  Cc: Eugenio Perez Martin, Jason Wang, Michael Tsirkin, netdev,
	linux-kernel, kvm list, virtualization

On Tue, Aug 2, 2022 at 4:45 AM Zhi Guo <qtxuning1999@sjtu.edu.cn> wrote:
>
>
>
> 2022年7月22日 下午3:12,Eugenio Perez Martin <eperezma@redhat.com> 写道:
>
> On Thu, Jul 21, 2022 at 10:44 AM Guo Zhi <qtxuning1999@sjtu.edu.cn> wrote:
>
>
> Only add to used ring when a batch a buffer have all been used.  And if
> in order feature negotiated, add randomness to the used buffer's order,
> test the ability of vhost to reorder batched buffer.
>
> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
> ---
> drivers/vhost/test.c | 15 ++++++++++++++-
> 1 file changed, 14 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> index bc8e7fb1e..1c9c40c11 100644
> --- a/drivers/vhost/test.c
> +++ b/drivers/vhost/test.c
> @@ -43,6 +43,9 @@ struct vhost_test {
> static void handle_vq(struct vhost_test *n)
> {
>        struct vhost_virtqueue *vq = &n->vqs[VHOST_TEST_VQ];
> +       struct vring_used_elem *heads = kmalloc(sizeof(*heads)
> +                       * vq->num, GFP_KERNEL);
> +       int batch_idx = 0;
>        unsigned out, in;
>        int head;
>        size_t len, total_len = 0;
> @@ -84,11 +87,21 @@ static void handle_vq(struct vhost_test *n)
>                        vq_err(vq, "Unexpected 0 len for TX\n");
>                        break;
>                }
> -               vhost_add_used_and_signal(&n->dev, vq, head, 0);
> +               heads[batch_idx].id = cpu_to_vhost32(vq, head);
> +               heads[batch_idx++].len = cpu_to_vhost32(vq, len);
>                total_len += len;
>                if (unlikely(vhost_exceeds_weight(vq, 0, total_len)))
>                        break;
>        }
> +       if (batch_idx) {
> +               if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER) && batch_idx >= 2) {
>
>
> Maybe to add a module parameter to test this? Instead of trusting in
> feature negotiation, "unorder_used=1" or something like that.
>
> vhost.c:vhost_add_used_and_signal_n should support receiving buffers
> in order or out of order whether F_IN_ORDER is negotiated or not.
>
> Thanks!
>
> That’s a good idea, The reorder feature in vhost is a "workaround” solution for the device that can't consume buffer in order,
> If that device support in order feature, The reorder in vhost will not be used.
> So we can add a parameter in vhost_test can config in order or not in order usage for used descriptors.
> A global parameter in vhost_test.c is enough?

Maybe a module parameter is easier to use (or a sysfs file), and to
test we don't need to recompile the module every time.

In view of having a CI, it's definitely easier to set the module
parameter than to recompile it.

Thanks,
Stefano


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 1/5] vhost: reorder used descriptors in a batch
  2022-07-26  7:36   ` Jason Wang
       [not found]     ` <2a8838c4-2e6f-6de7-dcdc-572699ff3dc9@sjtu.edu.cn>
@ 2022-08-02 13:54     ` Guo Zhi
  1 sibling, 0 replies; 24+ messages in thread
From: Guo Zhi @ 2022-08-02 13:54 UTC (permalink / raw)
  To: jasowang
  Cc: eperezma, sgarzare, Michael Tsirkin, netdev, linux-kernel,
	kvm list, virtualization

----- Original Message -----
> From: "jasowang" <jasowang@redhat.com>
> To: "Guo Zhi" <qtxuning1999@sjtu.edu.cn>, "eperezma" <eperezma@redhat.com>, "sgarzare" <sgarzare@redhat.com>, "Michael
> Tsirkin" <mst@redhat.com>
> Cc: "netdev" <netdev@vger.kernel.org>, "linux-kernel" <linux-kernel@vger.kernel.org>, "kvm list" <kvm@vger.kernel.org>,
> "virtualization" <virtualization@lists.linux-foundation.org>
> Sent: Tuesday, July 26, 2022 3:36:01 PM
> Subject: Re: [RFC 1/5] vhost: reorder used descriptors in a batch

> 在 2022/7/21 16:43, Guo Zhi 写道:
>> Device may not use descriptors in order, for example, NIC and SCSI may
>> not call __vhost_add_used_n with buffers in order.  It's the task of
>> __vhost_add_used_n to order them.
> 
> 
> I'm not sure this is ture. Having ooo descriptors is probably by design
> to have better performance.
> 
> This might be obvious for device that may have elevator or QOS stuffs.
> 
> I suspect the right thing to do here is, for the device that can't
> perform better in the case of IN_ORDER, let's simply not offer IN_ORDER
> (zerocopy or scsi). And for the device we know it can perform better,
> non-zercopy ethernet device we can do that.
> 

Hi, it seems that you don't like define in order feature as a transparent feature.

If we move the in_order treatment to the device specific code (net.c, scsi.c, ...):

The in_order feature bit would be declared in net.c, and not in vhost.c, Only specific device(eg, net, vsock) support in order feature and expose used descriptors in order. 
The code of vhost.c would be untouched or almost untouched, and only the code in net.c,scsi.c needs to be modified, the device will do batching job by itself.
This can achieve the best performance for that device which use desceriptors in order.

If this is better, I will send a new version patches for this RFC.

> 
>>   This commit reorder the buffers using
>> vq->heads, only the batch is begin from the expected start point and is
>> continuous can the batch be exposed to driver.  And only writing out a
>> single used ring for a batch of descriptors, according to VIRTIO 1.1
>> spec.
> 
> 
> So this sounds more like a "workaround" of the device that can't consume
> buffer in order, I suspect it can help in performance.
> 
> More below.
> 
> 
>>
>> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
>> ---
>>   drivers/vhost/vhost.c | 44 +++++++++++++++++++++++++++++++++++++++++--
>>   drivers/vhost/vhost.h |  3 +++
>>   2 files changed, 45 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index 40097826c..e2e77e29f 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -317,6 +317,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>>   	vq->used_flags = 0;
>>   	vq->log_used = false;
>>   	vq->log_addr = -1ull;
>> +	vq->next_used_head_idx = 0;
>>   	vq->private_data = NULL;
>>   	vq->acked_features = 0;
>>   	vq->acked_backend_features = 0;
>> @@ -398,6 +399,8 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>>   					  GFP_KERNEL);
>>   		if (!vq->indirect || !vq->log || !vq->heads)
>>   			goto err_nomem;
>> +
>> +		memset(vq->heads, 0, sizeof(*vq->heads) * dev->iov_limit);
>>   	}
>>   	return 0;
>>   
>> @@ -2374,12 +2377,49 @@ static int __vhost_add_used_n(struct vhost_virtqueue
>> *vq,
>>   			    unsigned count)
>>   {
>>   	vring_used_elem_t __user *used;
>> +	struct vring_desc desc;
>>   	u16 old, new;
>>   	int start;
>> +	int begin, end, i;
>> +	int copy_n = count;
>> +
>> +	if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
> 
> 
> How do you guarantee that ids of heads are contiguous?
> 
> 
>> +		/* calculate descriptor chain length for each used buffer */
> 
> 
> I'm a little bit confused about this comment, we have heads[i].len for this?
> 
> 
>> +		for (i = 0; i < count; i++) {
>> +			begin = heads[i].id;
>> +			end = begin;
>> +			vq->heads[begin].len = 0;
> 
> 
> Does this work for e.g RX virtqueue?
> 
> 
>> +			do {
>> +				vq->heads[begin].len += 1;
>> +				if (unlikely(vhost_get_desc(vq, &desc, end))) {
> 
> 
> Let's try hard to avoid more userspace copy here, it's the source of
> performance regression.
> 
> Thanks
> 
> 
>> +					vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
>> +					       end, vq->desc + end);
>> +					return -EFAULT;
>> +				}
>> +			} while ((end = next_desc(vq, &desc)) != -1);
>> +		}
>> +
>> +		count = 0;
>> +		/* sort and batch continuous used ring entry */
>> +		while (vq->heads[vq->next_used_head_idx].len != 0) {
>> +			count++;
>> +			i = vq->next_used_head_idx;
>> +			vq->next_used_head_idx = (vq->next_used_head_idx +
>> +						  vq->heads[vq->next_used_head_idx].len)
>> +						  % vq->num;
>> +			vq->heads[i].len = 0;
>> +		}
>> +		/* only write out a single used ring entry with the id corresponding
>> +		 * to the head entry of the descriptor chain describing the last buffer
>> +		 * in the batch.
>> +		 */
>> +		heads[0].id = i;
>> +		copy_n = 1;
>> +	}
>>   
>>   	start = vq->last_used_idx & (vq->num - 1);
>>   	used = vq->used->ring + start;
>> -	if (vhost_put_used(vq, heads, start, count)) {
>> +	if (vhost_put_used(vq, heads, start, copy_n)) {
>>   		vq_err(vq, "Failed to write used");
>>   		return -EFAULT;
>>   	}
>> @@ -2410,7 +2450,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct
>> vring_used_elem *heads,
>>   
>>   	start = vq->last_used_idx & (vq->num - 1);
>>   	n = vq->num - start;
>> -	if (n < count) {
>> +	if (n < count && !vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>>   		r = __vhost_add_used_n(vq, heads, n);
>>   		if (r < 0)
>>   			return r;
>> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
>> index d9109107a..7b2c0fbb5 100644
>> --- a/drivers/vhost/vhost.h
>> +++ b/drivers/vhost/vhost.h
>> @@ -107,6 +107,9 @@ struct vhost_virtqueue {
>>   	bool log_used;
>>   	u64 log_addr;
>>   
>> +	/* Sort heads in order */
>> +	u16 next_used_head_idx;
>> +
>>   	struct iovec iov[UIO_MAXIOV];
>>   	struct iovec iotlb_iov[64];
>>   	struct iovec *indirect;

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 1/5] vhost: reorder used descriptors in a batch
  2022-07-29  7:32       ` Jason Wang
  2022-08-02  3:09         ` Guo Zhi
@ 2022-08-02 14:12         ` Guo Zhi
  2022-08-04  5:04           ` Jason Wang
  1 sibling, 1 reply; 24+ messages in thread
From: Guo Zhi @ 2022-08-02 14:12 UTC (permalink / raw)
  To: jasowang
  Cc: eperezma, sgarzare, Michael Tsirkin, netdev, linux-kernel,
	kvm list, virtualization



----- Original Message -----
> From: "jasowang" <jasowang@redhat.com>
> To: "Guo Zhi" <qtxuning1999@sjtu.edu.cn>
> Cc: "eperezma" <eperezma@redhat.com>, "sgarzare" <sgarzare@redhat.com>, "Michael Tsirkin" <mst@redhat.com>, "netdev"
> <netdev@vger.kernel.org>, "linux-kernel" <linux-kernel@vger.kernel.org>, "kvm list" <kvm@vger.kernel.org>,
> "virtualization" <virtualization@lists.linux-foundation.org>
> Sent: Friday, July 29, 2022 3:32:02 PM
> Subject: Re: [RFC 1/5] vhost: reorder used descriptors in a batch

> On Thu, Jul 28, 2022 at 4:26 PM Guo Zhi <qtxuning1999@sjtu.edu.cn> wrote:
>>
>> On 2022/7/26 15:36, Jason Wang wrote:
>>
>>
>> 在 2022/7/21 16:43, Guo Zhi 写道:
>>
>> Device may not use descriptors in order, for example, NIC and SCSI may
>> not call __vhost_add_used_n with buffers in order.  It's the task of
>> __vhost_add_used_n to order them.
>>
>>
>>
>> I'm not sure this is ture. Having ooo descriptors is probably by design to have
>> better performance.
>>
>> This might be obvious for device that may have elevator or QOS stuffs.
>>
>> I suspect the right thing to do here is, for the device that can't perform
>> better in the case of IN_ORDER, let's simply not offer IN_ORDER (zerocopy or
>> scsi). And for the device we know it can perform better, non-zercopy ethernet
>> device we can do that.
>>
>>
>>   This commit reorder the buffers using
>> vq->heads, only the batch is begin from the expected start point and is
>> continuous can the batch be exposed to driver.  And only writing out a
>> single used ring for a batch of descriptors, according to VIRTIO 1.1
>> spec.
>>
>>
>>
>> So this sounds more like a "workaround" of the device that can't consume buffer
>> in order, I suspect it can help in performance.
>>
>> More below.
>>
>>
>>
>> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
>> ---
>>   drivers/vhost/vhost.c | 44 +++++++++++++++++++++++++++++++++++++++++--
>>   drivers/vhost/vhost.h |  3 +++
>>   2 files changed, 45 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index 40097826c..e2e77e29f 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -317,6 +317,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>>       vq->used_flags = 0;
>>       vq->log_used = false;
>>       vq->log_addr = -1ull;
>> +    vq->next_used_head_idx = 0;
>>       vq->private_data = NULL;
>>       vq->acked_features = 0;
>>       vq->acked_backend_features = 0;
>> @@ -398,6 +399,8 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>>                         GFP_KERNEL);
>>           if (!vq->indirect || !vq->log || !vq->heads)
>>               goto err_nomem;
>> +
>> +        memset(vq->heads, 0, sizeof(*vq->heads) * dev->iov_limit);
>>       }
>>       return 0;
>>   @@ -2374,12 +2377,49 @@ static int __vhost_add_used_n(struct vhost_virtqueue
>>   *vq,
>>                   unsigned count)
>>   {
>>       vring_used_elem_t __user *used;
>> +    struct vring_desc desc;
>>       u16 old, new;
>>       int start;
>> +    int begin, end, i;
>> +    int copy_n = count;
>> +
>> +    if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>>
>>
>>
>> How do you guarantee that ids of heads are contiguous?
>>
>> There is no need to be contiguous for ids of heads.
>>
>> For example, I have three buffer { .id = 0, 15}, {.id = 20, 30} {.id = 15, 20}
>> for vhost_add_used_n. Then I will let the vq->heads[0].len=15.
>> vq->heads[15].len=5, vq->heads[20].len=10 as reorder. Once I found there is no
>> hold in the batched descriptors. I will expose them to driver.
> 
> So spec said:
> 
> "If VIRTIO_F_IN_ORDER has been negotiated, driver uses descriptors in
> ring order: starting from offset 0 in the table, and wrapping around
> at the end of the table."
> 
> And
> 
> "VIRTIO_F_IN_ORDER(35)This feature indicates that all buffers are used
> by the device in the same order in which they have been made
> available."
> 
> This means your example is not an IN_ORDER device.
> 
> The driver should submit buffers (assuming each buffer have one
> descriptor) in order {id = 0, 15}, {id = 1, 30} and {id = 2, 20}.
> 
> And even if it is submitted in order, we can not use a batch because:
> 
> "The skipped buffers (for which no used ring entry was written) are
> assumed to have been used (read or written) by the device completely."
> 
> This means for TX we are probably ok, but for rx, unless we know the
> buffers were written completely, we can't write them in a batch.
> 
> I'd suggest to do cross testing for this series:
> 
> 1) testing vhost IN_ORDER support with DPDK virtio PMD
> 2) testing virtio IN_ORDER with DPDK vhost-user via testpmd
> 
> Thanks
> 
You are correct, for rx we can't do a batch because we have to let the driver know the length of buffers.

I think these circumstances can offer batch:
1. tx
2. rx with RX_MRGBUF feature, which introduce a header for each received buffer

Consider batch is not a mandatory requirement for in order feature according to spec.
I'd like to let current RFC patch focus on in order implementation, and send another 
patch series to improve performance by batching on above circumstances.

What's your opinon.

Thanks
> 
>>
>>
>> +        /* calculate descriptor chain length for each used buffer */
>>
>>
>>
>> I'm a little bit confused about this comment, we have heads[i].len for this?
>>
>> Maybe I should not use vq->heads, some misleading.
>>
>>
>> +        for (i = 0; i < count; i++) {
>> +            begin = heads[i].id;
>> +            end = begin;
>> +            vq->heads[begin].len = 0;
>>
>>
>>
>> Does this work for e.g RX virtqueue?
>>
>>
>> +            do {
>> +                vq->heads[begin].len += 1;
>> +                if (unlikely(vhost_get_desc(vq, &desc, end))) {
>>
>>
>>
>> Let's try hard to avoid more userspace copy here, it's the source of performance
>> regression.
>>
>> Thanks
>>
>>
>> +                    vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
>> +                           end, vq->desc + end);
>> +                    return -EFAULT;
>> +                }
>> +            } while ((end = next_desc(vq, &desc)) != -1);
>> +        }
>> +
>> +        count = 0;
>> +        /* sort and batch continuous used ring entry */
>> +        while (vq->heads[vq->next_used_head_idx].len != 0) {
>> +            count++;
>> +            i = vq->next_used_head_idx;
>> +            vq->next_used_head_idx = (vq->next_used_head_idx +
>> +                          vq->heads[vq->next_used_head_idx].len)
>> +                          % vq->num;
>> +            vq->heads[i].len = 0;
>> +        }
>> +        /* only write out a single used ring entry with the id corresponding
>> +         * to the head entry of the descriptor chain describing the last buffer
>> +         * in the batch.
>> +         */
>> +        heads[0].id = i;
>> +        copy_n = 1;
>> +    }
>>         start = vq->last_used_idx & (vq->num - 1);
>>       used = vq->used->ring + start;
>> -    if (vhost_put_used(vq, heads, start, count)) {
>> +    if (vhost_put_used(vq, heads, start, copy_n)) {
>>           vq_err(vq, "Failed to write used");
>>           return -EFAULT;
>>       }
>> @@ -2410,7 +2450,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct
>> vring_used_elem *heads,
>>         start = vq->last_used_idx & (vq->num - 1);
>>       n = vq->num - start;
>> -    if (n < count) {
>> +    if (n < count && !vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>>           r = __vhost_add_used_n(vq, heads, n);
>>           if (r < 0)
>>               return r;
>> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
>> index d9109107a..7b2c0fbb5 100644
>> --- a/drivers/vhost/vhost.h
>> +++ b/drivers/vhost/vhost.h
>> @@ -107,6 +107,9 @@ struct vhost_virtqueue {
>>       bool log_used;
>>       u64 log_addr;
>>   +    /* Sort heads in order */
>> +    u16 next_used_head_idx;
>> +
>>       struct iovec iov[UIO_MAXIOV];
>>       struct iovec iotlb_iov[64];
>>       struct iovec *indirect;
>>
>>
>>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 1/5] vhost: reorder used descriptors in a batch
  2022-08-02 14:12         ` Guo Zhi
@ 2022-08-04  5:04           ` Jason Wang
  2022-08-11  8:58             ` Guo Zhi
  0 siblings, 1 reply; 24+ messages in thread
From: Jason Wang @ 2022-08-04  5:04 UTC (permalink / raw)
  To: Guo Zhi
  Cc: eperezma, sgarzare, Michael Tsirkin, netdev, linux-kernel,
	kvm list, virtualization

On Tue, Aug 2, 2022 at 10:12 PM Guo Zhi <qtxuning1999@sjtu.edu.cn> wrote:
>
>
>
> ----- Original Message -----
> > From: "jasowang" <jasowang@redhat.com>
> > To: "Guo Zhi" <qtxuning1999@sjtu.edu.cn>
> > Cc: "eperezma" <eperezma@redhat.com>, "sgarzare" <sgarzare@redhat.com>, "Michael Tsirkin" <mst@redhat.com>, "netdev"
> > <netdev@vger.kernel.org>, "linux-kernel" <linux-kernel@vger.kernel.org>, "kvm list" <kvm@vger.kernel.org>,
> > "virtualization" <virtualization@lists.linux-foundation.org>
> > Sent: Friday, July 29, 2022 3:32:02 PM
> > Subject: Re: [RFC 1/5] vhost: reorder used descriptors in a batch
>
> > On Thu, Jul 28, 2022 at 4:26 PM Guo Zhi <qtxuning1999@sjtu.edu.cn> wrote:
> >>
> >> On 2022/7/26 15:36, Jason Wang wrote:
> >>
> >>
> >> 在 2022/7/21 16:43, Guo Zhi 写道:
> >>
> >> Device may not use descriptors in order, for example, NIC and SCSI may
> >> not call __vhost_add_used_n with buffers in order.  It's the task of
> >> __vhost_add_used_n to order them.
> >>
> >>
> >>
> >> I'm not sure this is ture. Having ooo descriptors is probably by design to have
> >> better performance.
> >>
> >> This might be obvious for device that may have elevator or QOS stuffs.
> >>
> >> I suspect the right thing to do here is, for the device that can't perform
> >> better in the case of IN_ORDER, let's simply not offer IN_ORDER (zerocopy or
> >> scsi). And for the device we know it can perform better, non-zercopy ethernet
> >> device we can do that.
> >>
> >>
> >>   This commit reorder the buffers using
> >> vq->heads, only the batch is begin from the expected start point and is
> >> continuous can the batch be exposed to driver.  And only writing out a
> >> single used ring for a batch of descriptors, according to VIRTIO 1.1
> >> spec.
> >>
> >>
> >>
> >> So this sounds more like a "workaround" of the device that can't consume buffer
> >> in order, I suspect it can help in performance.
> >>
> >> More below.
> >>
> >>
> >>
> >> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
> >> ---
> >>   drivers/vhost/vhost.c | 44 +++++++++++++++++++++++++++++++++++++++++--
> >>   drivers/vhost/vhost.h |  3 +++
> >>   2 files changed, 45 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> >> index 40097826c..e2e77e29f 100644
> >> --- a/drivers/vhost/vhost.c
> >> +++ b/drivers/vhost/vhost.c
> >> @@ -317,6 +317,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> >>       vq->used_flags = 0;
> >>       vq->log_used = false;
> >>       vq->log_addr = -1ull;
> >> +    vq->next_used_head_idx = 0;
> >>       vq->private_data = NULL;
> >>       vq->acked_features = 0;
> >>       vq->acked_backend_features = 0;
> >> @@ -398,6 +399,8 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
> >>                         GFP_KERNEL);
> >>           if (!vq->indirect || !vq->log || !vq->heads)
> >>               goto err_nomem;
> >> +
> >> +        memset(vq->heads, 0, sizeof(*vq->heads) * dev->iov_limit);
> >>       }
> >>       return 0;
> >>   @@ -2374,12 +2377,49 @@ static int __vhost_add_used_n(struct vhost_virtqueue
> >>   *vq,
> >>                   unsigned count)
> >>   {
> >>       vring_used_elem_t __user *used;
> >> +    struct vring_desc desc;
> >>       u16 old, new;
> >>       int start;
> >> +    int begin, end, i;
> >> +    int copy_n = count;
> >> +
> >> +    if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
> >>
> >>
> >>
> >> How do you guarantee that ids of heads are contiguous?
> >>
> >> There is no need to be contiguous for ids of heads.
> >>
> >> For example, I have three buffer { .id = 0, 15}, {.id = 20, 30} {.id = 15, 20}
> >> for vhost_add_used_n. Then I will let the vq->heads[0].len=15.
> >> vq->heads[15].len=5, vq->heads[20].len=10 as reorder. Once I found there is no
> >> hold in the batched descriptors. I will expose them to driver.
> >
> > So spec said:
> >
> > "If VIRTIO_F_IN_ORDER has been negotiated, driver uses descriptors in
> > ring order: starting from offset 0 in the table, and wrapping around
> > at the end of the table."
> >
> > And
> >
> > "VIRTIO_F_IN_ORDER(35)This feature indicates that all buffers are used
> > by the device in the same order in which they have been made
> > available."
> >
> > This means your example is not an IN_ORDER device.
> >
> > The driver should submit buffers (assuming each buffer have one
> > descriptor) in order {id = 0, 15}, {id = 1, 30} and {id = 2, 20}.
> >
> > And even if it is submitted in order, we can not use a batch because:
> >
> > "The skipped buffers (for which no used ring entry was written) are
> > assumed to have been used (read or written) by the device completely."
> >
> > This means for TX we are probably ok, but for rx, unless we know the
> > buffers were written completely, we can't write them in a batch.
> >
> > I'd suggest to do cross testing for this series:
> >
> > 1) testing vhost IN_ORDER support with DPDK virtio PMD
> > 2) testing virtio IN_ORDER with DPDK vhost-user via testpmd
> >
> > Thanks
> >
> You are correct, for rx we can't do a batch because we have to let the driver know the length of buffers.

Note that we can do a batch for rx when we know all the buffers have
been fully written.

>
> I think these circumstances can offer batch:
> 1. tx
> 2. rx with RX_MRGBUF feature, which introduce a header for each received buffer
>
> Consider batch is not a mandatory requirement for in order feature according to spec.
> I'd like to let current RFC patch focus on in order implementation, and send another
> patch series to improve performance by batching on above circumstances.

That's fine, how about simply starting from the patch that offers
IN_ORDER when zerocopy is disabled?

Thanks

>
> What's your opinon.
>
> Thanks
> >
> >>
> >>
> >> +        /* calculate descriptor chain length for each used buffer */
> >>
> >>
> >>
> >> I'm a little bit confused about this comment, we have heads[i].len for this?
> >>
> >> Maybe I should not use vq->heads, some misleading.
> >>
> >>
> >> +        for (i = 0; i < count; i++) {
> >> +            begin = heads[i].id;
> >> +            end = begin;
> >> +            vq->heads[begin].len = 0;
> >>
> >>
> >>
> >> Does this work for e.g RX virtqueue?
> >>
> >>
> >> +            do {
> >> +                vq->heads[begin].len += 1;
> >> +                if (unlikely(vhost_get_desc(vq, &desc, end))) {
> >>
> >>
> >>
> >> Let's try hard to avoid more userspace copy here, it's the source of performance
> >> regression.
> >>
> >> Thanks
> >>
> >>
> >> +                    vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
> >> +                           end, vq->desc + end);
> >> +                    return -EFAULT;
> >> +                }
> >> +            } while ((end = next_desc(vq, &desc)) != -1);
> >> +        }
> >> +
> >> +        count = 0;
> >> +        /* sort and batch continuous used ring entry */
> >> +        while (vq->heads[vq->next_used_head_idx].len != 0) {
> >> +            count++;
> >> +            i = vq->next_used_head_idx;
> >> +            vq->next_used_head_idx = (vq->next_used_head_idx +
> >> +                          vq->heads[vq->next_used_head_idx].len)
> >> +                          % vq->num;
> >> +            vq->heads[i].len = 0;
> >> +        }
> >> +        /* only write out a single used ring entry with the id corresponding
> >> +         * to the head entry of the descriptor chain describing the last buffer
> >> +         * in the batch.
> >> +         */
> >> +        heads[0].id = i;
> >> +        copy_n = 1;
> >> +    }
> >>         start = vq->last_used_idx & (vq->num - 1);
> >>       used = vq->used->ring + start;
> >> -    if (vhost_put_used(vq, heads, start, count)) {
> >> +    if (vhost_put_used(vq, heads, start, copy_n)) {
> >>           vq_err(vq, "Failed to write used");
> >>           return -EFAULT;
> >>       }
> >> @@ -2410,7 +2450,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct
> >> vring_used_elem *heads,
> >>         start = vq->last_used_idx & (vq->num - 1);
> >>       n = vq->num - start;
> >> -    if (n < count) {
> >> +    if (n < count && !vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
> >>           r = __vhost_add_used_n(vq, heads, n);
> >>           if (r < 0)
> >>               return r;
> >> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> >> index d9109107a..7b2c0fbb5 100644
> >> --- a/drivers/vhost/vhost.h
> >> +++ b/drivers/vhost/vhost.h
> >> @@ -107,6 +107,9 @@ struct vhost_virtqueue {
> >>       bool log_used;
> >>       u64 log_addr;
> >>   +    /* Sort heads in order */
> >> +    u16 next_used_head_idx;
> >> +
> >>       struct iovec iov[UIO_MAXIOV];
> >>       struct iovec iotlb_iov[64];
> >>       struct iovec *indirect;
> >>
> >>
> >>
>


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 4/5] virtio: get desc id in order
  2022-07-26  8:07   ` Jason Wang
  2022-07-28  8:12     ` Guo Zhi
@ 2022-08-11  8:49     ` Guo Zhi
  1 sibling, 0 replies; 24+ messages in thread
From: Guo Zhi @ 2022-08-11  8:49 UTC (permalink / raw)
  To: jasowang
  Cc: eperezma, sgarzare, Michael Tsirkin, netdev, linux-kernel,
	kvm list, virtualization



----- Original Message -----
> From: "jasowang" <jasowang@redhat.com>
> To: "Guo Zhi" <qtxuning1999@sjtu.edu.cn>, "eperezma" <eperezma@redhat.com>, "sgarzare" <sgarzare@redhat.com>, "Michael
> Tsirkin" <mst@redhat.com>
> Cc: "netdev" <netdev@vger.kernel.org>, "linux-kernel" <linux-kernel@vger.kernel.org>, "kvm list" <kvm@vger.kernel.org>,
> "virtualization" <virtualization@lists.linux-foundation.org>
> Sent: Tuesday, July 26, 2022 4:07:46 PM
> Subject: Re: [RFC 4/5] virtio: get desc id in order

> 在 2022/7/21 16:43, Guo Zhi 写道:
>> If in order feature negotiated, we can skip the used ring to get
>> buffer's desc id sequentially.
> 
> 
> Let's rename the patch to something like "in order support for virtio_ring"
> 
> 
>>
>> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
>> ---
>>   drivers/virtio/virtio_ring.c | 37 ++++++++++++++++++++++++++++--------
>>   1 file changed, 29 insertions(+), 8 deletions(-)
> 
> 
> I don't see packed support in this patch, we need to implement that.
> 
> 
>>
>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>> index a5ec724c0..4d57a4edc 100644
>> --- a/drivers/virtio/virtio_ring.c
>> +++ b/drivers/virtio/virtio_ring.c
>> @@ -144,6 +144,9 @@ struct vring_virtqueue {
>>   			/* DMA address and size information */
>>   			dma_addr_t queue_dma_addr;
>>   			size_t queue_size_in_bytes;
>> +
>> +			/* In order feature batch begin here */
>> +			u16 next_batch_desc_begin;
>>   		} split;
>>   
>>   		/* Available for packed ring */
>> @@ -700,8 +703,10 @@ static void detach_buf_split(struct vring_virtqueue *vq,
>> unsigned int head,
>>   	}
>>   
>>   	vring_unmap_one_split(vq, i);
>> -	vq->split.desc_extra[i].next = vq->free_head;
>> -	vq->free_head = head;
>> +	if (!virtio_has_feature(vq->vq.vdev, VIRTIO_F_IN_ORDER)) {
>> +		vq->split.desc_extra[i].next = vq->free_head;
>> +		vq->free_head = head;
>> +	}
> 
> 
> Let's add a comment to explain why we don't need anything if in order is
> neogitated.
> 
> 
>>   
>>   	/* Plus final descriptor */
>>   	vq->vq.num_free++;
>> @@ -743,7 +748,8 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue
>> *_vq,
>>   {
>>   	struct vring_virtqueue *vq = to_vvq(_vq);
>>   	void *ret;
>> -	unsigned int i;
>> +	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
>> +	unsigned int i, j;
>>   	u16 last_used;
>>   
>>   	START_USE(vq);
>> @@ -762,11 +768,24 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue
>> *_vq,
>>   	/* Only get used array entries after they have been exposed by host. */
>>   	virtio_rmb(vq->weak_barriers);
>>   
>> -	last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>> -	i = virtio32_to_cpu(_vq->vdev,
>> -			vq->split.vring.used->ring[last_used].id);
>> -	*len = virtio32_to_cpu(_vq->vdev,
>> -			vq->split.vring.used->ring[last_used].len);
>> +	if (virtio_has_feature(_vq->vdev, VIRTIO_F_IN_ORDER)) {
>> +		/* Skip used ring and get used desc in order*/
>> +		i = vq->split.next_batch_desc_begin;
>> +		j = i;
>> +		while (vq->split.vring.desc[j].flags & nextflag)
> 
> 
> Let's don't depend on the descriptor ring which is under the control of
> the malicious hypervisor.
> 
> Let's use desc_extra that is not visible by the hypervisor. More can be
> seen in this commit:
> 
> 72b5e8958738 ("virtio-ring: store DMA metadata in desc_extra for split
> virtqueue")
> 
> 
>> +			j = (j + 1) % vq->split.vring.num;
>> +		/* move to next */
>> +		j = (j + 1) % vq->split.vring.num;
>> +		vq->split.next_batch_desc_begin = j;
> 
> 
> I'm not sure I get the logic here, basically I think we should check
> buffer instead of descriptor here.

I's sorry I don't understand this comment.
In order means device use descriptors in the same order as they been available.
So we should iterate the descriptor table and calculte the next desc which will be used,
because we don't use used ring now.

> 
> So if vring.used->ring[last_used].id != last_used, we know all
> [last_used, vring.used->ring[last_used].id] have been used in a batch?
> 

We don't use used ring for in order feature.
N descriptors in descriptor table from vq->split.next_batch_desc_begin have been used.
N is vq->split.vring.used->idx - vq->last_used_idx (haven't consider ring problem for short).

> 
>> +
>> +		/* TODO: len of buffer */
> 
> 
> So spec said:
> 
> "
> 
> The skipped buffers (for which no used ring entry was written) are
> assumed to have been used (read or written) by the device completely.
> 
> 
> "
> 
> Thanks
> 
> 
>> +	} else {
>> +		last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>> +		i = virtio32_to_cpu(_vq->vdev,
>> +				    vq->split.vring.used->ring[last_used].id);
>> +		*len = virtio32_to_cpu(_vq->vdev,
>> +				       vq->split.vring.used->ring[last_used].len);
>> +	}
>>   
>>   	if (unlikely(i >= vq->split.vring.num)) {
>>   		BAD_RING(vq, "id %u out of range\n", i);
>> @@ -2234,6 +2253,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
>> index,
>>   	vq->split.avail_flags_shadow = 0;
>>   	vq->split.avail_idx_shadow = 0;
>>   
>> +	vq->split.next_batch_desc_begin = 0;
>> +
>>   	/* No callback?  Tell other side not to bother us. */
>>   	if (!callback) {
>>   		vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC 1/5] vhost: reorder used descriptors in a batch
  2022-08-04  5:04           ` Jason Wang
@ 2022-08-11  8:58             ` Guo Zhi
  0 siblings, 0 replies; 24+ messages in thread
From: Guo Zhi @ 2022-08-11  8:58 UTC (permalink / raw)
  To: jasowang
  Cc: eperezma, sgarzare, Michael Tsirkin, netdev, linux-kernel,
	kvm list, virtualization



----- Original Message -----
> From: "jasowang" <jasowang@redhat.com>
> To: "Guo Zhi" <qtxuning1999@sjtu.edu.cn>
> Cc: "eperezma" <eperezma@redhat.com>, "sgarzare" <sgarzare@redhat.com>, "Michael Tsirkin" <mst@redhat.com>, "netdev"
> <netdev@vger.kernel.org>, "linux-kernel" <linux-kernel@vger.kernel.org>, "kvm list" <kvm@vger.kernel.org>,
> "virtualization" <virtualization@lists.linux-foundation.org>
> Sent: Thursday, August 4, 2022 1:04:16 PM
> Subject: Re: [RFC 1/5] vhost: reorder used descriptors in a batch

> On Tue, Aug 2, 2022 at 10:12 PM Guo Zhi <qtxuning1999@sjtu.edu.cn> wrote:
>>
>>
>>
>> ----- Original Message -----
>> > From: "jasowang" <jasowang@redhat.com>
>> > To: "Guo Zhi" <qtxuning1999@sjtu.edu.cn>
>> > Cc: "eperezma" <eperezma@redhat.com>, "sgarzare" <sgarzare@redhat.com>, "Michael
>> > Tsirkin" <mst@redhat.com>, "netdev"
>> > <netdev@vger.kernel.org>, "linux-kernel" <linux-kernel@vger.kernel.org>, "kvm
>> > list" <kvm@vger.kernel.org>,
>> > "virtualization" <virtualization@lists.linux-foundation.org>
>> > Sent: Friday, July 29, 2022 3:32:02 PM
>> > Subject: Re: [RFC 1/5] vhost: reorder used descriptors in a batch
>>
>> > On Thu, Jul 28, 2022 at 4:26 PM Guo Zhi <qtxuning1999@sjtu.edu.cn> wrote:
>> >>
>> >> On 2022/7/26 15:36, Jason Wang wrote:
>> >>
>> >>
>> >> 在 2022/7/21 16:43, Guo Zhi 写道:
>> >>
>> >> Device may not use descriptors in order, for example, NIC and SCSI may
>> >> not call __vhost_add_used_n with buffers in order.  It's the task of
>> >> __vhost_add_used_n to order them.
>> >>
>> >>
>> >>
>> >> I'm not sure this is ture. Having ooo descriptors is probably by design to have
>> >> better performance.
>> >>
>> >> This might be obvious for device that may have elevator or QOS stuffs.
>> >>
>> >> I suspect the right thing to do here is, for the device that can't perform
>> >> better in the case of IN_ORDER, let's simply not offer IN_ORDER (zerocopy or
>> >> scsi). And for the device we know it can perform better, non-zercopy ethernet
>> >> device we can do that.
>> >>
>> >>
>> >>   This commit reorder the buffers using
>> >> vq->heads, only the batch is begin from the expected start point and is
>> >> continuous can the batch be exposed to driver.  And only writing out a
>> >> single used ring for a batch of descriptors, according to VIRTIO 1.1
>> >> spec.
>> >>
>> >>
>> >>
>> >> So this sounds more like a "workaround" of the device that can't consume buffer
>> >> in order, I suspect it can help in performance.
>> >>
>> >> More below.
>> >>
>> >>
>> >>
>> >> Signed-off-by: Guo Zhi <qtxuning1999@sjtu.edu.cn>
>> >> ---
>> >>   drivers/vhost/vhost.c | 44 +++++++++++++++++++++++++++++++++++++++++--
>> >>   drivers/vhost/vhost.h |  3 +++
>> >>   2 files changed, 45 insertions(+), 2 deletions(-)
>> >>
>> >> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> >> index 40097826c..e2e77e29f 100644
>> >> --- a/drivers/vhost/vhost.c
>> >> +++ b/drivers/vhost/vhost.c
>> >> @@ -317,6 +317,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>> >>       vq->used_flags = 0;
>> >>       vq->log_used = false;
>> >>       vq->log_addr = -1ull;
>> >> +    vq->next_used_head_idx = 0;
>> >>       vq->private_data = NULL;
>> >>       vq->acked_features = 0;
>> >>       vq->acked_backend_features = 0;
>> >> @@ -398,6 +399,8 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>> >>                         GFP_KERNEL);
>> >>           if (!vq->indirect || !vq->log || !vq->heads)
>> >>               goto err_nomem;
>> >> +
>> >> +        memset(vq->heads, 0, sizeof(*vq->heads) * dev->iov_limit);
>> >>       }
>> >>       return 0;
>> >>   @@ -2374,12 +2377,49 @@ static int __vhost_add_used_n(struct vhost_virtqueue
>> >>   *vq,
>> >>                   unsigned count)
>> >>   {
>> >>       vring_used_elem_t __user *used;
>> >> +    struct vring_desc desc;
>> >>       u16 old, new;
>> >>       int start;
>> >> +    int begin, end, i;
>> >> +    int copy_n = count;
>> >> +
>> >> +    if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>> >>
>> >>
>> >>
>> >> How do you guarantee that ids of heads are contiguous?
>> >>
>> >> There is no need to be contiguous for ids of heads.
>> >>
>> >> For example, I have three buffer { .id = 0, 15}, {.id = 20, 30} {.id = 15, 20}
>> >> for vhost_add_used_n. Then I will let the vq->heads[0].len=15.
>> >> vq->heads[15].len=5, vq->heads[20].len=10 as reorder. Once I found there is no
>> >> hold in the batched descriptors. I will expose them to driver.
>> >
>> > So spec said:
>> >
>> > "If VIRTIO_F_IN_ORDER has been negotiated, driver uses descriptors in
>> > ring order: starting from offset 0 in the table, and wrapping around
>> > at the end of the table."
>> >
>> > And
>> >
>> > "VIRTIO_F_IN_ORDER(35)This feature indicates that all buffers are used
>> > by the device in the same order in which they have been made
>> > available."
>> >
>> > This means your example is not an IN_ORDER device.
>> >
>> > The driver should submit buffers (assuming each buffer have one
>> > descriptor) in order {id = 0, 15}, {id = 1, 30} and {id = 2, 20}.
>> >
>> > And even if it is submitted in order, we can not use a batch because:
>> >
>> > "The skipped buffers (for which no used ring entry was written) are
>> > assumed to have been used (read or written) by the device completely."
>> >
>> > This means for TX we are probably ok, but for rx, unless we know the
>> > buffers were written completely, we can't write them in a batch.
>> >
>> > I'd suggest to do cross testing for this series:
>> >
>> > 1) testing vhost IN_ORDER support with DPDK virtio PMD
>> > 2) testing virtio IN_ORDER with DPDK vhost-user via testpmd
>> >
>> > Thanks
>> >
>> You are correct, for rx we can't do a batch because we have to let the driver
>> know the length of buffers.
> 
> Note that we can do a batch for rx when we know all the buffers have
> been fully written.
> 
>>
>> I think these circumstances can offer batch:
>> 1. tx
>> 2. rx with RX_MRGBUF feature, which introduce a header for each received buffer
>>
>> Consider batch is not a mandatory requirement for in order feature according to
>> spec.
>> I'd like to let current RFC patch focus on in order implementation, and send
>> another
>> patch series to improve performance by batching on above circumstances.
> 
> That's fine, how about simply starting from the patch that offers
> IN_ORDER when zerocopy is disabled?
> 

Yeah, I'd like to start from vsock device, which doesn't use zerocopy

Thanks
> Thanks
> 
>>
>> What's your opinon.
>>
>> Thanks
>> >
>> >>
>> >>
>> >> +        /* calculate descriptor chain length for each used buffer */
>> >>
>> >>
>> >>
>> >> I'm a little bit confused about this comment, we have heads[i].len for this?
>> >>
>> >> Maybe I should not use vq->heads, some misleading.
>> >>
>> >>
>> >> +        for (i = 0; i < count; i++) {
>> >> +            begin = heads[i].id;
>> >> +            end = begin;
>> >> +            vq->heads[begin].len = 0;
>> >>
>> >>
>> >>
>> >> Does this work for e.g RX virtqueue?
>> >>
>> >>
>> >> +            do {
>> >> +                vq->heads[begin].len += 1;
>> >> +                if (unlikely(vhost_get_desc(vq, &desc, end))) {
>> >>
>> >>
>> >>
>> >> Let's try hard to avoid more userspace copy here, it's the source of performance
>> >> regression.
>> >>
>> >> Thanks
>> >>
>> >>
>> >> +                    vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
>> >> +                           end, vq->desc + end);
>> >> +                    return -EFAULT;
>> >> +                }
>> >> +            } while ((end = next_desc(vq, &desc)) != -1);
>> >> +        }
>> >> +
>> >> +        count = 0;
>> >> +        /* sort and batch continuous used ring entry */
>> >> +        while (vq->heads[vq->next_used_head_idx].len != 0) {
>> >> +            count++;
>> >> +            i = vq->next_used_head_idx;
>> >> +            vq->next_used_head_idx = (vq->next_used_head_idx +
>> >> +                          vq->heads[vq->next_used_head_idx].len)
>> >> +                          % vq->num;
>> >> +            vq->heads[i].len = 0;
>> >> +        }
>> >> +        /* only write out a single used ring entry with the id corresponding
>> >> +         * to the head entry of the descriptor chain describing the last buffer
>> >> +         * in the batch.
>> >> +         */
>> >> +        heads[0].id = i;
>> >> +        copy_n = 1;
>> >> +    }
>> >>         start = vq->last_used_idx & (vq->num - 1);
>> >>       used = vq->used->ring + start;
>> >> -    if (vhost_put_used(vq, heads, start, count)) {
>> >> +    if (vhost_put_used(vq, heads, start, copy_n)) {
>> >>           vq_err(vq, "Failed to write used");
>> >>           return -EFAULT;
>> >>       }
>> >> @@ -2410,7 +2450,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct
>> >> vring_used_elem *heads,
>> >>         start = vq->last_used_idx & (vq->num - 1);
>> >>       n = vq->num - start;
>> >> -    if (n < count) {
>> >> +    if (n < count && !vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>> >>           r = __vhost_add_used_n(vq, heads, n);
>> >>           if (r < 0)
>> >>               return r;
>> >> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
>> >> index d9109107a..7b2c0fbb5 100644
>> >> --- a/drivers/vhost/vhost.h
>> >> +++ b/drivers/vhost/vhost.h
>> >> @@ -107,6 +107,9 @@ struct vhost_virtqueue {
>> >>       bool log_used;
>> >>       u64 log_addr;
>> >>   +    /* Sort heads in order */
>> >> +    u16 next_used_head_idx;
>> >> +
>> >>       struct iovec iov[UIO_MAXIOV];
>> >>       struct iovec iotlb_iov[64];
>> >>       struct iovec *indirect;
>> >>
>> >>
>> >>
>>

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2022-08-11  8:58 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-21  8:43 [RFC 0/5] In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced Guo Zhi
2022-07-21  8:43 ` [RFC 1/5] vhost: reorder used descriptors in a batch Guo Zhi
2022-07-22  7:07   ` Eugenio Perez Martin
2022-08-02  3:30     ` Guo Zhi
2022-07-26  7:36   ` Jason Wang
     [not found]     ` <2a8838c4-2e6f-6de7-dcdc-572699ff3dc9@sjtu.edu.cn>
2022-07-29  7:32       ` Jason Wang
2022-08-02  3:09         ` Guo Zhi
2022-08-02 14:12         ` Guo Zhi
2022-08-04  5:04           ` Jason Wang
2022-08-11  8:58             ` Guo Zhi
2022-08-02 13:54     ` Guo Zhi
2022-07-21  8:43 ` [RFC 2/5] vhost: announce VIRTIO_F_IN_ORDER support Guo Zhi
2022-07-21  8:43 ` [RFC 3/5] vhost_test: batch used buffer Guo Zhi
2022-07-22  7:12   ` Eugenio Perez Martin
2022-08-02  2:47     ` Guo Zhi
2022-08-02  3:08     ` Guo Zhi
     [not found]     ` <1D1ABF88-B503-4BE0-AC83-3326EAA62510@sjtu.edu.cn>
2022-08-02  7:45       ` Stefano Garzarella
2022-07-21  8:43 ` [RFC 4/5] virtio: get desc id in order Guo Zhi
2022-07-26  8:07   ` Jason Wang
2022-07-28  8:12     ` Guo Zhi
2022-08-11  8:49     ` Guo Zhi
2022-07-21  8:43 ` [RFC 5/5] virtio: annouce VIRTIO_F_IN_ORDER support Guo Zhi
2022-07-21  9:17 ` [RFC 0/5] In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced Jason Wang
2022-07-21 11:54   ` Guo Zhi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).