All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC 0/4] virtio helper: collect dma info from virtio core
@ 2023-11-14 11:31 Xuan Zhuo
  2023-11-14 11:31 ` [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma() Xuan Zhuo
                   ` (3 more replies)
  0 siblings, 4 replies; 19+ messages in thread
From: Xuan Zhuo @ 2023-11-14 11:31 UTC (permalink / raw)
  To: virtualization; +Cc: Jason Wang, Michael S . Tsirkin

For the virtio net send queue, if it supports premapped mode,
then we need to collect the dma info of every desc of this buf
to do unmap inside the driver. The key is that one xmit may
contains multiple dma address. (A skb may contain 19 dma address)

Here I introduce a new helper to resolve this. And we also need
to consider the virtqueue_resize and free the unused buffers.

For on the same page of the API design before other job, I post this RFC patch
set.

Thanks.


Xuan Zhuo (4):
  virtio_ring: introduce virtqueue_get_buf_ctx_dma()
  virtio_ring: virtqueue_disable_and_recycle let the callback detach
    bufs
  virtio_ring: introduce virtqueue_detach_unused_buf_premapped
  virtio_net: sq support premapped mode

 drivers/net/virtio/main.c       | 143 ++++++++++++++++++++----
 drivers/net/virtio/virtio_net.h |   2 +
 drivers/virtio/virtio_ring.c    | 187 ++++++++++++++++++++++++--------
 include/linux/virtio.h          |   7 +-
 4 files changed, 268 insertions(+), 71 deletions(-)

--
2.32.0.3.g01195cf9f


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma()
  2023-11-14 11:31 [RFC 0/4] virtio helper: collect dma info from virtio core Xuan Zhuo
@ 2023-11-14 11:31 ` Xuan Zhuo
  2023-11-16  8:11   ` Jason Wang
  2023-11-14 11:31 ` [RFC 2/4] virtio_ring: virtqueue_disable_and_recycle let the callback detach bufs Xuan Zhuo
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 19+ messages in thread
From: Xuan Zhuo @ 2023-11-14 11:31 UTC (permalink / raw)
  To: virtualization; +Cc: Jason Wang, Michael S . Tsirkin

introduce virtqueue_get_buf_ctx_dma() to collect the dma info when
get buf from virtio core for premapped mode.

If the virtio queue is premapped mode, the virtio-net send buf may
have many desc. Every desc dma address need to be unmap. So here we
introduce a new helper to collect the dma address of the buffer from
the virtio core.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 148 ++++++++++++++++++++++++++---------
 include/linux/virtio.h       |   2 +
 2 files changed, 115 insertions(+), 35 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 51d8f3299c10..0b3caee4ef9d 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -362,6 +362,20 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
 	return vq->dma_dev;
 }
 
+static void store_dma_to_sg(struct scatterlist **sgp, dma_addr_t addr, unsigned int length)
+{
+	struct scatterlist *sg;
+
+	sg = *sgp;
+
+	sg->dma_address = addr;
+	sg->length = length;
+
+	sg = sg_next(sg);
+
+	*sgp = sg;
+}
+
 /* Map one sg entry. */
 static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
 			    enum dma_data_direction direction, dma_addr_t *addr)
@@ -441,12 +455,18 @@ static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
  */
 
 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
-					   const struct vring_desc *desc)
+					   const struct vring_desc *desc,
+					   struct scatterlist **sg)
 {
 	u16 flags;
 
-	if (!vq->do_unmap)
+	if (!vq->do_unmap) {
+		if (*sg)
+			store_dma_to_sg(sg,
+					virtio64_to_cpu(vq->vq.vdev, desc->addr),
+					virtio32_to_cpu(vq->vq.vdev, desc->len));
 		return;
+	}
 
 	flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
 
@@ -458,7 +478,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
 }
 
 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
-					  unsigned int i)
+					  unsigned int i, struct scatterlist **sg)
 {
 	struct vring_desc_extra *extra = vq->split.desc_extra;
 	u16 flags;
@@ -475,8 +495,11 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
 				 (flags & VRING_DESC_F_WRITE) ?
 				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 	} else {
-		if (!vq->do_unmap)
+		if (!vq->do_unmap) {
+			if (*sg)
+				store_dma_to_sg(sg, extra[i].addr, extra[i].len);
 			goto out;
+		}
 
 		dma_unmap_page(vring_dma_dev(vq),
 			       extra[i].addr,
@@ -717,10 +740,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 		if (i == err_idx)
 			break;
 		if (indirect) {
-			vring_unmap_one_split_indirect(vq, &desc[i]);
+			vring_unmap_one_split_indirect(vq, &desc[i], NULL);
 			i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 		} else
-			i = vring_unmap_one_split(vq, i);
+			i = vring_unmap_one_split(vq, i, NULL);
 	}
 
 free_indirect:
@@ -763,7 +786,7 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
 }
 
 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
-			     void **ctx)
+			     struct scatterlist *sg, void **ctx)
 {
 	unsigned int i, j;
 	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
@@ -775,12 +798,12 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
 	i = head;
 
 	while (vq->split.vring.desc[i].flags & nextflag) {
-		vring_unmap_one_split(vq, i);
+		vring_unmap_one_split(vq, i, &sg);
 		i = vq->split.desc_extra[i].next;
 		vq->vq.num_free++;
 	}
 
-	vring_unmap_one_split(vq, i);
+	vring_unmap_one_split(vq, i, &sg);
 	vq->split.desc_extra[i].next = vq->free_head;
 	vq->free_head = head;
 
@@ -794,7 +817,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
 
 		/* Free the indirect table, if any, now that it's unmapped. */
 		if (!indir_desc)
-			return;
+			goto end;
 
 		len = vq->split.desc_extra[head].len;
 
@@ -802,9 +825,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
 				VRING_DESC_F_INDIRECT));
 		BUG_ON(len == 0 || len % sizeof(struct vring_desc));
 
-		if (vq->do_unmap) {
+		if (vq->do_unmap || sg) {
 			for (j = 0; j < len / sizeof(struct vring_desc); j++)
-				vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+				vring_unmap_one_split_indirect(vq, &indir_desc[j], &sg);
+
 		}
 
 		kfree(indir_desc);
@@ -812,6 +836,11 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
 	} else if (ctx) {
 		*ctx = vq->split.desc_state[head].indir_desc;
 	}
+
+end:
+	/* sg point to the next. So we mark the last one as the end. */
+	if (!vq->do_unmap && sg)
+		sg_mark_end(sg - 1);
 }
 
 static bool more_used_split(const struct vring_virtqueue *vq)
@@ -822,6 +851,7 @@ static bool more_used_split(const struct vring_virtqueue *vq)
 
 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
 					 unsigned int *len,
+					 struct scatterlist *sg,
 					 void **ctx)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -862,7 +892,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
 
 	/* detach_buf_split clears data, so grab it now. */
 	ret = vq->split.desc_state[i].data;
-	detach_buf_split(vq, i, ctx);
+	detach_buf_split(vq, i, sg, ctx);
 	vq->last_used_idx++;
 	/* If we expect an interrupt for the next entry, tell host
 	 * by writing event index and flush out the write before
@@ -984,7 +1014,7 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
 			continue;
 		/* detach_buf_split clears data, so grab it now. */
 		buf = vq->split.desc_state[i].data;
-		detach_buf_split(vq, i, NULL);
+		detach_buf_split(vq, i, NULL, NULL);
 		vq->split.avail_idx_shadow--;
 		vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
 				vq->split.avail_idx_shadow);
@@ -1221,7 +1251,8 @@ static u16 packed_last_used(u16 last_used_idx)
 }
 
 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
-				     const struct vring_desc_extra *extra)
+				     const struct vring_desc_extra *extra,
+				     struct scatterlist **sg)
 {
 	u16 flags;
 
@@ -1236,8 +1267,11 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
 				 (flags & VRING_DESC_F_WRITE) ?
 				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 	} else {
-		if (!vq->do_unmap)
+		if (!vq->do_unmap) {
+			if (*sg)
+				store_dma_to_sg(sg, extra->addr, extra->len);
 			return;
+		}
 
 		dma_unmap_page(vring_dma_dev(vq),
 			       extra->addr, extra->len,
@@ -1247,12 +1281,17 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
 }
 
 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
-				    const struct vring_packed_desc *desc)
+				    const struct vring_packed_desc *desc,
+				    struct scatterlist **sg)
 {
 	u16 flags;
 
-	if (!vq->do_unmap)
+	if (!vq->do_unmap) {
+		if (*sg)
+			store_dma_to_sg(sg, le64_to_cpu(desc->addr),
+					le32_to_cpu(desc->len));
 		return;
+	}
 
 	flags = le16_to_cpu(desc->flags);
 
@@ -1389,7 +1428,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 	err_idx = i;
 
 	for (i = 0; i < err_idx; i++)
-		vring_unmap_desc_packed(vq, &desc[i]);
+		vring_unmap_desc_packed(vq, &desc[i], NULL);
 
 free_desc:
 	kfree(desc);
@@ -1539,7 +1578,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 	for (n = 0; n < total_sg; n++) {
 		if (i == err_idx)
 			break;
-		vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
+		vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr], NULL);
 		curr = vq->packed.desc_extra[curr].next;
 		i++;
 		if (i >= vq->packed.vring.num)
@@ -1600,7 +1639,9 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
 }
 
 static void detach_buf_packed(struct vring_virtqueue *vq,
-			      unsigned int id, void **ctx)
+			      unsigned int id,
+			      struct scatterlist *sg,
+			      void **ctx)
 {
 	struct vring_desc_state_packed *state = NULL;
 	struct vring_packed_desc *desc;
@@ -1615,13 +1656,10 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
 	vq->free_head = id;
 	vq->vq.num_free += state->num;
 
-	if (unlikely(vq->do_unmap)) {
-		curr = id;
-		for (i = 0; i < state->num; i++) {
-			vring_unmap_extra_packed(vq,
-						 &vq->packed.desc_extra[curr]);
-			curr = vq->packed.desc_extra[curr].next;
-		}
+	curr = id;
+	for (i = 0; i < state->num; i++) {
+		vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr], &sg);
+		curr = vq->packed.desc_extra[curr].next;
 	}
 
 	if (vq->indirect) {
@@ -1630,19 +1668,24 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
 		/* Free the indirect table, if any, now that it's unmapped. */
 		desc = state->indir_desc;
 		if (!desc)
-			return;
+			goto end;
 
-		if (vq->do_unmap) {
+		if (vq->do_unmap || sg) {
 			len = vq->packed.desc_extra[id].len;
 			for (i = 0; i < len / sizeof(struct vring_packed_desc);
 					i++)
-				vring_unmap_desc_packed(vq, &desc[i]);
+				vring_unmap_desc_packed(vq, &desc[i], &sg);
 		}
 		kfree(desc);
 		state->indir_desc = NULL;
 	} else if (ctx) {
 		*ctx = state->indir_desc;
 	}
+
+end:
+	/* sg point to the next. So we mark the last one as the end. */
+	if (!vq->do_unmap && sg)
+		sg_mark_end(sg - 1);
 }
 
 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
@@ -1672,6 +1715,7 @@ static bool more_used_packed(const struct vring_virtqueue *vq)
 
 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
 					  unsigned int *len,
+					  struct scatterlist *sg,
 					  void **ctx)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -1712,7 +1756,7 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
 
 	/* detach_buf_packed clears data, so grab it now. */
 	ret = vq->packed.desc_state[id].data;
-	detach_buf_packed(vq, id, ctx);
+	detach_buf_packed(vq, id, sg, ctx);
 
 	last_used += vq->packed.desc_state[id].num;
 	if (unlikely(last_used >= vq->packed.vring.num)) {
@@ -1877,7 +1921,7 @@ static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
 			continue;
 		/* detach_buf clears data, so grab it now. */
 		buf = vq->packed.desc_state[i].data;
-		detach_buf_packed(vq, i, NULL);
+		detach_buf_packed(vq, i, NULL, NULL);
 		END_USE(vq);
 		return buf;
 	}
@@ -2417,11 +2461,45 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
-				 virtqueue_get_buf_ctx_split(_vq, len, ctx);
+	return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, NULL, ctx) :
+				 virtqueue_get_buf_ctx_split(_vq, len, NULL, ctx);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
 
+/**
+ * virtqueue_get_buf_ctx_dma - get the next used buffer with the dma info
+ * @_vq: the struct virtqueue we're talking about.
+ * @len: the length written into the buffer
+ * @sg: scatterlist array to store the dma info
+ * @ctx: extra context for the token
+ *
+ * If the device wrote data into the buffer, @len will be set to the
+ * amount written.  This means you don't need to clear the buffer
+ * beforehand to ensure there's no data leakage in the case of short
+ * writes.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ *
+ * Only when the vq is in premapped mode and the sg is not null, we store the
+ * dma info of every descriptor of this buf to the sg array. The sg array must
+ * point to a scatterlist array, with the last element marked as the sg last.
+ * Once the function is done, we mark the last sg stored with dma info as the
+ * last one. If the sg array size is too small, some dma info may be missed.
+ *
+ * Returns NULL if there are no used buffers, or the "data" token
+ * handed to virtqueue_add_*().
+ */
+void *virtqueue_get_buf_ctx_dma(struct virtqueue *_vq, unsigned int *len,
+				struct scatterlist *sg, void **ctx)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, sg, ctx) :
+				 virtqueue_get_buf_ctx_split(_vq, len, sg, ctx);
+}
+EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx_dma);
+
 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 {
 	return virtqueue_get_buf_ctx(_vq, len, NULL);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 4cc614a38376..0b919786ade5 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -74,6 +74,8 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
 
 void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
 			    void **ctx);
+void *virtqueue_get_buf_ctx_dma(struct virtqueue *_vq, unsigned int *len,
+				struct scatterlist *sg, void **ctx);
 
 void virtqueue_disable_cb(struct virtqueue *vq);
 
-- 
2.32.0.3.g01195cf9f


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [RFC 2/4] virtio_ring: virtqueue_disable_and_recycle let the callback detach bufs
  2023-11-14 11:31 [RFC 0/4] virtio helper: collect dma info from virtio core Xuan Zhuo
  2023-11-14 11:31 ` [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma() Xuan Zhuo
@ 2023-11-14 11:31 ` Xuan Zhuo
  2023-11-16  8:11   ` Jason Wang
  2023-11-14 11:31 ` [RFC 3/4] virtio_ring: introduce virtqueue_detach_unused_buf_premapped Xuan Zhuo
  2023-11-14 11:31 ` [RFC 4/4] virtio_net: sq support premapped mode Xuan Zhuo
  3 siblings, 1 reply; 19+ messages in thread
From: Xuan Zhuo @ 2023-11-14 11:31 UTC (permalink / raw)
  To: virtualization; +Cc: Jason Wang, Michael S . Tsirkin

Now, inside virtqueue_disable_and_recycle, the recycle() just has two
parameters(vq, buf) after detach operate.

But if we are in premapped mode, we may need to get some dma info when
detach buf like virtqueue_get_buf_ctx_dma().

So we call recycle directly, this callback detaches bufs self. It should
complete the work of detaching all the unused buffers.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/net/virtio/main.c    | 31 ++++++++++++++++++++++---------
 drivers/virtio/virtio_ring.c | 10 ++++------
 include/linux/virtio.h       |  4 ++--
 3 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/drivers/net/virtio/main.c b/drivers/net/virtio/main.c
index 46a8be0c609c..57cbd449637a 100644
--- a/drivers/net/virtio/main.c
+++ b/drivers/net/virtio/main.c
@@ -149,8 +149,8 @@ struct virtio_net_common_hdr {
 	};
 };
 
-static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
-static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
+static void virtnet_rq_free_unused_bufs(struct virtqueue *vq);
+static void virtnet_sq_free_unused_bufs(struct virtqueue *vq);
 
 static bool is_xdp_frame(void *ptr)
 {
@@ -2197,7 +2197,7 @@ static int virtnet_rx_resize(struct virtnet_info *vi,
 	if (running)
 		napi_disable(&rq->napi);
 
-	err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf);
+	err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_bufs);
 	if (err)
 		netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err);
 
@@ -2236,7 +2236,7 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
 
 	__netif_tx_unlock_bh(txq);
 
-	err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
+	err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_bufs);
 	if (err)
 		netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err);
 
@@ -3850,22 +3850,35 @@ static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf)
 	virtnet_rq_free_buf(vi, rq, buf);
 }
 
-static void free_unused_bufs(struct virtnet_info *vi)
+static void virtnet_sq_free_unused_bufs(struct virtqueue *vq)
+{
+	void *buf;
+
+	while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
+		virtnet_sq_free_unused_buf(vq, buf);
+}
+
+static void virtnet_rq_free_unused_bufs(struct virtqueue *vq)
 {
 	void *buf;
+
+	while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
+		virtnet_rq_free_unused_buf(vq, buf);
+}
+
+static void free_unused_bufs(struct virtnet_info *vi)
+{
 	int i;
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
 		struct virtqueue *vq = vi->sq[i].vq;
-		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
-			virtnet_sq_free_unused_buf(vq, buf);
+		virtnet_sq_free_unused_bufs(vq);
 		cond_resched();
 	}
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
 		struct virtqueue *vq = vi->rq[i].vq;
-		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
-			virtnet_rq_free_unused_buf(vq, buf);
+		virtnet_rq_free_unused_bufs(vq);
 		cond_resched();
 	}
 }
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 0b3caee4ef9d..18f021095afd 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2197,11 +2197,10 @@ static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
 }
 
 static int virtqueue_disable_and_recycle(struct virtqueue *_vq,
-					 void (*recycle)(struct virtqueue *vq, void *buf))
+					 void (*recycle)(struct virtqueue *vq))
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct virtio_device *vdev = vq->vq.vdev;
-	void *buf;
 	int err;
 
 	if (!vq->we_own_ring)
@@ -2217,8 +2216,7 @@ static int virtqueue_disable_and_recycle(struct virtqueue *_vq,
 	if (err)
 		return err;
 
-	while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
-		recycle(_vq, buf);
+	recycle(_vq);
 
 	return 0;
 }
@@ -2814,7 +2812,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma);
  *
  */
 int virtqueue_resize(struct virtqueue *_vq, u32 num,
-		     void (*recycle)(struct virtqueue *vq, void *buf))
+		     void (*recycle)(struct virtqueue *vq))
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	int err;
@@ -2905,7 +2903,7 @@ EXPORT_SYMBOL_GPL(virtqueue_set_dma_premapped);
  * -EPERM: Operation not permitted
  */
 int virtqueue_reset(struct virtqueue *_vq,
-		    void (*recycle)(struct virtqueue *vq, void *buf))
+		    void (*recycle)(struct virtqueue *vq))
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	int err;
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 0b919786ade5..225271940223 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -101,9 +101,9 @@ dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *vq);
 dma_addr_t virtqueue_get_used_addr(const struct virtqueue *vq);
 
 int virtqueue_resize(struct virtqueue *vq, u32 num,
-		     void (*recycle)(struct virtqueue *vq, void *buf));
+		     void (*recycle)(struct virtqueue *vq));
 int virtqueue_reset(struct virtqueue *vq,
-		    void (*recycle)(struct virtqueue *vq, void *buf));
+		    void (*recycle)(struct virtqueue *vq));
 
 /**
  * struct virtio_device - representation of a device using virtio
-- 
2.32.0.3.g01195cf9f


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [RFC 3/4] virtio_ring: introduce virtqueue_detach_unused_buf_premapped
  2023-11-14 11:31 [RFC 0/4] virtio helper: collect dma info from virtio core Xuan Zhuo
  2023-11-14 11:31 ` [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma() Xuan Zhuo
  2023-11-14 11:31 ` [RFC 2/4] virtio_ring: virtqueue_disable_and_recycle let the callback detach bufs Xuan Zhuo
@ 2023-11-14 11:31 ` Xuan Zhuo
  2023-11-14 11:31 ` [RFC 4/4] virtio_net: sq support premapped mode Xuan Zhuo
  3 siblings, 0 replies; 19+ messages in thread
From: Xuan Zhuo @ 2023-11-14 11:31 UTC (permalink / raw)
  To: virtualization; +Cc: Jason Wang, Michael S . Tsirkin

introduce virtqueue_detach_unused_buf_premapped() to collect the dma
info when get buf from virtio core for premapped mode.

If the virtio queue is premapped mode, the virtio-net send buf may
have many desc. Every desc dma address need to be unmap. So here we
introduce a new helper to collect the dma address of the buffer from
the virtio core.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 33 +++++++++++++++++++++++++--------
 include/linux/virtio.h       |  1 +
 2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 18f021095afd..60666b31d437 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1001,7 +1001,7 @@ static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
 	return true;
 }
 
-static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
+static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq, struct scatterlist *sg)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	unsigned int i;
@@ -1014,7 +1014,7 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
 			continue;
 		/* detach_buf_split clears data, so grab it now. */
 		buf = vq->split.desc_state[i].data;
-		detach_buf_split(vq, i, NULL, NULL);
+		detach_buf_split(vq, i, sg, NULL);
 		vq->split.avail_idx_shadow--;
 		vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
 				vq->split.avail_idx_shadow);
@@ -1908,7 +1908,7 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
 	return true;
 }
 
-static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
+static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq, struct scatterlist *sg)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	unsigned int i;
@@ -1921,7 +1921,7 @@ static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
 			continue;
 		/* detach_buf clears data, so grab it now. */
 		buf = vq->packed.desc_state[i].data;
-		detach_buf_packed(vq, i, NULL, NULL);
+		detach_buf_packed(vq, i, sg, NULL);
 		END_USE(vq);
 		return buf;
 	}
@@ -2614,19 +2614,36 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
 
 /**
- * virtqueue_detach_unused_buf - detach first unused buffer
+ * virtqueue_detach_unused_buf_premapped - detach first unused buffer
  * @_vq: the struct virtqueue we're talking about.
+ * @sg: scatterlist array to store the dma info
+ *
+ * more see virtqueue_get_buf_ctx_dma()
  *
  * Returns NULL or the "data" token handed to virtqueue_add_*().
  * This is not valid on an active queue; it is useful for device
  * shutdown or the reset queue.
  */
-void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
+void *virtqueue_detach_unused_buf_premapped(struct virtqueue *_vq, struct scatterlist *sg)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
-				 virtqueue_detach_unused_buf_split(_vq);
+	return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq, sg) :
+				 virtqueue_detach_unused_buf_split(_vq, sg);
+}
+EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf_premapped);
+
+/**
+ * virtqueue_detach_unused_buf - detach first unused buffer
+ * @_vq: the struct virtqueue we're talking about.
+ *
+ * Returns NULL or the "data" token handed to virtqueue_add_*().
+ * This is not valid on an active queue; it is useful for device
+ * shutdown or the reset queue.
+ */
+void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
+{
+	return virtqueue_detach_unused_buf_premapped(_vq, NULL);
 }
 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 225271940223..7e9b0ea6b6b2 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -90,6 +90,7 @@ bool virtqueue_poll(struct virtqueue *vq, unsigned);
 bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
 
 void *virtqueue_detach_unused_buf(struct virtqueue *vq);
+void *virtqueue_detach_unused_buf_premapped(struct virtqueue *_vq, struct scatterlist *sg);
 
 unsigned int virtqueue_get_vring_size(const struct virtqueue *vq);
 
-- 
2.32.0.3.g01195cf9f


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [RFC 4/4] virtio_net: sq support premapped mode
  2023-11-14 11:31 [RFC 0/4] virtio helper: collect dma info from virtio core Xuan Zhuo
                   ` (2 preceding siblings ...)
  2023-11-14 11:31 ` [RFC 3/4] virtio_ring: introduce virtqueue_detach_unused_buf_premapped Xuan Zhuo
@ 2023-11-14 11:31 ` Xuan Zhuo
  2023-11-16  8:11   ` Jason Wang
  3 siblings, 1 reply; 19+ messages in thread
From: Xuan Zhuo @ 2023-11-14 11:31 UTC (permalink / raw)
  To: virtualization; +Cc: Jason Wang, Michael S . Tsirkin

If the xsk is enabling, the xsk tx will share the send queue.
But the xsk requires that the send queue use the premapped mode.
So the send queue must support premapped mode.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/net/virtio/main.c       | 114 ++++++++++++++++++++++++++++----
 drivers/net/virtio/virtio_net.h |   2 +
 2 files changed, 102 insertions(+), 14 deletions(-)

diff --git a/drivers/net/virtio/main.c b/drivers/net/virtio/main.c
index 57cbd449637a..0766fed28afe 100644
--- a/drivers/net/virtio/main.c
+++ b/drivers/net/virtio/main.c
@@ -167,13 +167,46 @@ static struct xdp_frame *ptr_to_xdp(void *ptr)
 	return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
 }
 
+static void virtnet_sq_unmap(struct virtnet_sq *sq)
+{
+	struct scatterlist *sg;
+
+	for (sg = sq->sg; ; sg = sg_next(sg)) {
+		virtqueue_dma_unmap_single_attrs(sq->vq,
+						 sg->dma_address,
+						 sg->length,
+						 DMA_TO_DEVICE, 0);
+
+		if (sg_is_last(sg)) {
+			/* clear the end mark setted by virtio core. */
+			sg_unmark_end(sg);
+			break;
+		}
+	}
+
+	/* mark the last one of sq->sg[] as the end for reuse by
+	 * virtqueue_get_buf_ctx_dma().
+	 */
+	sg_mark_end(&sq->sg[ARRAY_SIZE(sq->sg) - 1]);
+}
+
 static void __free_old_xmit(struct virtnet_sq *sq, bool in_napi,
 			    u64 *bytes, u64 *packets)
 {
+	struct scatterlist *sg = NULL;
 	unsigned int len;
 	void *ptr;
 
-	while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+	/* clear the sg table */
+	if (sq->do_dma) {
+		sg_init_table(sq->sg, ARRAY_SIZE(sq->sg));
+		sg = sq->sg;
+	}
+
+	while ((ptr = virtqueue_get_buf_ctx_dma(sq->vq, &len, sg, NULL)) != NULL) {
+		if (sq->do_dma)
+			virtnet_sq_unmap(sq);
+
 		if (!is_xdp_frame(ptr)) {
 			struct sk_buff *skb = ptr;
 
@@ -567,22 +600,61 @@ static void *virtnet_rq_alloc(struct virtnet_rq *rq, u32 size, gfp_t gfp)
 	return buf;
 }
 
-static void virtnet_rq_set_premapped(struct virtnet_info *vi)
+static void virtnet_set_premapped(struct virtnet_info *vi)
 {
 	int i;
 
-	/* disable for big mode */
-	if (!vi->mergeable_rx_bufs && vi->big_packets)
-		return;
-
 	for (i = 0; i < vi->max_queue_pairs; i++) {
-		if (virtqueue_set_dma_premapped(vi->rq[i].vq))
-			continue;
+		if (!virtqueue_set_dma_premapped(vi->sq[i].vq))
+			vi->sq[i].do_dma = true;
 
-		vi->rq[i].do_dma = true;
+		/* disable for big mode */
+		if (vi->mergeable_rx_bufs || !vi->big_packets) {
+			if (!virtqueue_set_dma_premapped(vi->rq[i].vq))
+				vi->rq[i].do_dma = true;
+		}
 	}
 }
 
+static int virtnet_sq_map_sg(struct virtnet_sq *sq)
+{
+	struct scatterlist *sg;
+
+	for (sg = sq->sg; sg; sg = sg_next(sg)) {
+		sg->dma_address = virtqueue_dma_map_single_attrs(sq->vq, sg_virt(sg),
+								 sg->length,
+								 DMA_TO_DEVICE, 0);
+		if (virtqueue_dma_mapping_error(sq->vq, sg->dma_address))
+			goto err;
+	}
+
+	return 0;
+
+err:
+	if (sg != sq->sg) {
+		sg_mark_end(sg - 1);
+		virtnet_sq_unmap(sq);
+	}
+	return -ENOMEM;
+}
+
+static int virtnet_add_outbuf(struct virtnet_sq *sq, u32 num, void *data)
+{
+	int ret;
+
+	if (sq->do_dma) {
+		ret = virtnet_sq_map_sg(sq);
+		if (ret)
+			return -ENOMEM;
+	}
+
+	ret = virtqueue_add_outbuf(sq->vq, sq->sg, num, data, GFP_ATOMIC);
+	if (ret && sq->do_dma)
+		virtnet_sq_unmap(sq);
+
+	return ret;
+}
+
 static void free_old_xmit(struct virtnet_sq *sq, bool in_napi)
 {
 	u64 bytes, packets = 0;
@@ -686,8 +758,7 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
 			    skb_frag_size(frag), skb_frag_off(frag));
 	}
 
-	err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1,
-				   xdp_to_ptr(xdpf), GFP_ATOMIC);
+	err = virtnet_add_outbuf(sq, nr_frags + 1, xdp_to_ptr(xdpf));
 	if (unlikely(err))
 		return -ENOSPC; /* Caller handle free/refcnt */
 
@@ -2126,7 +2197,7 @@ static int xmit_skb(struct virtnet_sq *sq, struct sk_buff *skb)
 			return num_sg;
 		num_sg++;
 	}
-	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
+	return virtnet_add_outbuf(sq, num_sg, skb);
 }
 
 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -3852,10 +3923,25 @@ static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf)
 
 static void virtnet_sq_free_unused_bufs(struct virtqueue *vq)
 {
+	struct virtnet_info *vi = vq->vdev->priv;
+	struct scatterlist *sg = NULL;
+	struct virtnet_sq *sq;
+	int i = vq2txq(vq);
 	void *buf;
 
-	while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
+	sq = &vi->sq[i];
+
+	if (sq->do_dma) {
+		sg_init_table(sq->sg, ARRAY_SIZE(sq->sg));
+		sg = sq->sg;
+	}
+
+	while ((buf = virtqueue_detach_unused_buf_premapped(vq, sg)) != NULL) {
+		if (sq->do_dma)
+			virtnet_sq_unmap(sq);
+
 		virtnet_sq_free_unused_buf(vq, buf);
+	}
 }
 
 static void virtnet_rq_free_unused_bufs(struct virtqueue *vq)
@@ -4052,7 +4138,7 @@ static int init_vqs(struct virtnet_info *vi)
 	if (ret)
 		goto err_free;
 
-	virtnet_rq_set_premapped(vi);
+	virtnet_set_premapped(vi);
 
 	cpus_read_lock();
 	virtnet_set_affinity(vi);
diff --git a/drivers/net/virtio/virtio_net.h b/drivers/net/virtio/virtio_net.h
index ebf9f344648a..870855a829a0 100644
--- a/drivers/net/virtio/virtio_net.h
+++ b/drivers/net/virtio/virtio_net.h
@@ -67,6 +67,8 @@ struct virtnet_sq {
 
 	/* Record whether sq is in reset state. */
 	bool reset;
+
+	bool do_dma;
 };
 
 /* Internal representation of a receive virtqueue */
-- 
2.32.0.3.g01195cf9f


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma()
  2023-11-14 11:31 ` [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma() Xuan Zhuo
@ 2023-11-16  8:11   ` Jason Wang
  2023-11-21  6:12     ` Xuan Zhuo
  0 siblings, 1 reply; 19+ messages in thread
From: Jason Wang @ 2023-11-16  8:11 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: virtualization, Michael S . Tsirkin

On Tue, Nov 14, 2023 at 7:31 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> introduce virtqueue_get_buf_ctx_dma() to collect the dma info when
> get buf from virtio core for premapped mode.
>
> If the virtio queue is premapped mode, the virtio-net send buf may
> have many desc. Every desc dma address need to be unmap. So here we
> introduce a new helper to collect the dma address of the buffer from
> the virtio core.

So looking at vring_desc_extra, what we have right now is:

struct vring_desc_extra {
        dma_addr_t addr;                /* Descriptor DMA addr. */
        u32 len;                        /* Descriptor length. */
        u16 flags;                      /* Descriptor flags. */
        u16 next;                       /* The next desc state in a list. */
};

And sg is

struct scatterlist {
        unsigned long   page_link;
unsigned int    offset;
        unsigned int    length;
        dma_addr_t      dma_address;
#ifdef CONFIG_NEED_SG_DMA_LENGTH
        unsigned int    dma_length;
#endif
#ifdef CONFIG_NEED_SG_DMA_FLAGS
        unsigned int    dma_flags;
#endif
};

Would it better just store sg?

More below

>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  drivers/virtio/virtio_ring.c | 148 ++++++++++++++++++++++++++---------
>  include/linux/virtio.h       |   2 +
>  2 files changed, 115 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 51d8f3299c10..0b3caee4ef9d 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -362,6 +362,20 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
>         return vq->dma_dev;
>  }
>
> +static void store_dma_to_sg(struct scatterlist **sgp, dma_addr_t addr, unsigned int length)
> +{
> +       struct scatterlist *sg;
> +
> +       sg = *sgp;
> +
> +       sg->dma_address = addr;
> +       sg->length = length;
> +
> +       sg = sg_next(sg);
> +
> +       *sgp = sg;
> +}
> +
>  /* Map one sg entry. */
>  static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
>                             enum dma_data_direction direction, dma_addr_t *addr)
> @@ -441,12 +455,18 @@ static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
>   */
>
>  static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> -                                          const struct vring_desc *desc)
> +                                          const struct vring_desc *desc,
> +                                          struct scatterlist **sg)
>  {
>         u16 flags;
>
> -       if (!vq->do_unmap)
> +       if (!vq->do_unmap) {
> +               if (*sg)

Can we simply move the

if (*sg) to store_dma_to_sg()?

> +                       store_dma_to_sg(sg,
> +                                       virtio64_to_cpu(vq->vq.vdev, desc->addr),
> +                                       virtio32_to_cpu(vq->vq.vdev, desc->len));
>                 return;
> +       }
>
>         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
>
> @@ -458,7 +478,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
>  }
>
>  static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> -                                         unsigned int i)
> +                                         unsigned int i, struct scatterlist **sg)
>  {
>         struct vring_desc_extra *extra = vq->split.desc_extra;
>         u16 flags;
> @@ -475,8 +495,11 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
>                                  (flags & VRING_DESC_F_WRITE) ?
>                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
>         } else {
> -               if (!vq->do_unmap)
> +               if (!vq->do_unmap) {

The:

else {
      if {
            if {
}

seems odd. I think I would prefer

if (flags & VRING_DESC_F_INDIRECT) {
} else if (!vq->do_unmap) {
} else {
}

here

Btw, I really think do_unmap is not a good name, we probably need to
rename it as "unmap_desc".


> +                       if (*sg)
> +                               store_dma_to_sg(sg, extra[i].addr, extra[i].len);

In which case we need to unmap by driver but we don't need a dma address?

>                         goto out;
> +               }
>
>                 dma_unmap_page(vring_dma_dev(vq),
>                                extra[i].addr,
> @@ -717,10 +740,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
>                 if (i == err_idx)
>                         break;
>                 if (indirect) {
> -                       vring_unmap_one_split_indirect(vq, &desc[i]);
> +                       vring_unmap_one_split_indirect(vq, &desc[i], NULL);
>                         i = virtio16_to_cpu(_vq->vdev, desc[i].next);
>                 } else
> -                       i = vring_unmap_one_split(vq, i);
> +                       i = vring_unmap_one_split(vq, i, NULL);
>         }
>
>  free_indirect:
> @@ -763,7 +786,7 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
>  }
>
>  static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> -                            void **ctx)
> +                            struct scatterlist *sg, void **ctx)
>  {
>         unsigned int i, j;
>         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
> @@ -775,12 +798,12 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
>         i = head;
>
>         while (vq->split.vring.desc[i].flags & nextflag) {
> -               vring_unmap_one_split(vq, i);
> +               vring_unmap_one_split(vq, i, &sg);
>                 i = vq->split.desc_extra[i].next;
>                 vq->vq.num_free++;
>         }
>
> -       vring_unmap_one_split(vq, i);
> +       vring_unmap_one_split(vq, i, &sg);
>         vq->split.desc_extra[i].next = vq->free_head;
>         vq->free_head = head;
>
> @@ -794,7 +817,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
>
>                 /* Free the indirect table, if any, now that it's unmapped. */
>                 if (!indir_desc)
> -                       return;
> +                       goto end;
>
>                 len = vq->split.desc_extra[head].len;
>
> @@ -802,9 +825,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
>                                 VRING_DESC_F_INDIRECT));
>                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
>
> -               if (vq->do_unmap) {
> +               if (vq->do_unmap || sg) {
>                         for (j = 0; j < len / sizeof(struct vring_desc); j++)
> -                               vring_unmap_one_split_indirect(vq, &indir_desc[j]);
> +                               vring_unmap_one_split_indirect(vq, &indir_desc[j], &sg);
> +
>                 }
>
>                 kfree(indir_desc);
> @@ -812,6 +836,11 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
>         } else if (ctx) {
>                 *ctx = vq->split.desc_state[head].indir_desc;
>         }
> +
> +end:
> +       /* sg point to the next. So we mark the last one as the end. */
> +       if (!vq->do_unmap && sg)
> +               sg_mark_end(sg - 1);
>  }
>
>  static bool more_used_split(const struct vring_virtqueue *vq)
> @@ -822,6 +851,7 @@ static bool more_used_split(const struct vring_virtqueue *vq)
>
>  static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>                                          unsigned int *len,
> +                                        struct scatterlist *sg,
>                                          void **ctx)
>  {
>         struct vring_virtqueue *vq = to_vvq(_vq);
> @@ -862,7 +892,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>
>         /* detach_buf_split clears data, so grab it now. */
>         ret = vq->split.desc_state[i].data;
> -       detach_buf_split(vq, i, ctx);
> +       detach_buf_split(vq, i, sg, ctx);
>         vq->last_used_idx++;
>         /* If we expect an interrupt for the next entry, tell host
>          * by writing event index and flush out the write before
> @@ -984,7 +1014,7 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
>                         continue;
>                 /* detach_buf_split clears data, so grab it now. */
>                 buf = vq->split.desc_state[i].data;
> -               detach_buf_split(vq, i, NULL);
> +               detach_buf_split(vq, i, NULL, NULL);
>                 vq->split.avail_idx_shadow--;
>                 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
>                                 vq->split.avail_idx_shadow);
> @@ -1221,7 +1251,8 @@ static u16 packed_last_used(u16 last_used_idx)
>  }
>
>  static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> -                                    const struct vring_desc_extra *extra)
> +                                    const struct vring_desc_extra *extra,
> +                                    struct scatterlist **sg)
>  {
>         u16 flags;
>
> @@ -1236,8 +1267,11 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
>                                  (flags & VRING_DESC_F_WRITE) ?
>                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
>         } else {
> -               if (!vq->do_unmap)
> +               if (!vq->do_unmap) {
> +                       if (*sg)
> +                               store_dma_to_sg(sg, extra->addr, extra->len);
>                         return;
> +               }
>
>                 dma_unmap_page(vring_dma_dev(vq),
>                                extra->addr, extra->len,
> @@ -1247,12 +1281,17 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
>  }
>
>  static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
> -                                   const struct vring_packed_desc *desc)
> +                                   const struct vring_packed_desc *desc,
> +                                   struct scatterlist **sg)
>  {

Interesting, I think this is only needed for indirect descriptors?

If yes, why do we care about the dma addresses of indirect descriptors?
If not, it's a bug that we should use desc_extra, otherwise it's a
device-triggerable unmap which has security implications (we need to
use vring_extra in this case).

 >         u16 flags;
>
> -       if (!vq->do_unmap)
> +       if (!vq->do_unmap) {
> +               if (*sg)
> +                       store_dma_to_sg(sg, le64_to_cpu(desc->addr),
> +                                       le32_to_cpu(desc->len));
>                 return;
> +       }
>
>         flags = le16_to_cpu(desc->flags);
>
> @@ -1389,7 +1428,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
>         err_idx = i;
>
>         for (i = 0; i < err_idx; i++)
> -               vring_unmap_desc_packed(vq, &desc[i]);
> +               vring_unmap_desc_packed(vq, &desc[i], NULL);
>
>  free_desc:
>         kfree(desc);
> @@ -1539,7 +1578,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
>         for (n = 0; n < total_sg; n++) {
>                 if (i == err_idx)
>                         break;
> -               vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
> +               vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr], NULL);
>                 curr = vq->packed.desc_extra[curr].next;
>                 i++;
>                 if (i >= vq->packed.vring.num)
> @@ -1600,7 +1639,9 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
>  }
>
>  static void detach_buf_packed(struct vring_virtqueue *vq,
> -                             unsigned int id, void **ctx)
> +                             unsigned int id,
> +                             struct scatterlist *sg,
> +                             void **ctx)
>  {
>         struct vring_desc_state_packed *state = NULL;
>         struct vring_packed_desc *desc;
> @@ -1615,13 +1656,10 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
>         vq->free_head = id;
>         vq->vq.num_free += state->num;
>
> -       if (unlikely(vq->do_unmap)) {
> -               curr = id;
> -               for (i = 0; i < state->num; i++) {
> -                       vring_unmap_extra_packed(vq,
> -                                                &vq->packed.desc_extra[curr]);
> -                       curr = vq->packed.desc_extra[curr].next;
> -               }
> +       curr = id;
> +       for (i = 0; i < state->num; i++) {
> +               vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr], &sg);
> +               curr = vq->packed.desc_extra[curr].next;
>         }

Looks like an independent fix or cleanup?


>
>         if (vq->indirect) {
> @@ -1630,19 +1668,24 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
>                 /* Free the indirect table, if any, now that it's unmapped. */
>                 desc = state->indir_desc;
>                 if (!desc)
> -                       return;
> +                       goto end;
>
> -               if (vq->do_unmap) {
> +               if (vq->do_unmap || sg) {
>                         len = vq->packed.desc_extra[id].len;
>                         for (i = 0; i < len / sizeof(struct vring_packed_desc);
>                                         i++)
> -                               vring_unmap_desc_packed(vq, &desc[i]);
> +                               vring_unmap_desc_packed(vq, &desc[i], &sg);
>                 }
>                 kfree(desc);
>                 state->indir_desc = NULL;
>         } else if (ctx) {
>                 *ctx = state->indir_desc;
>         }
> +
> +end:
> +       /* sg point to the next. So we mark the last one as the end. */
> +       if (!vq->do_unmap && sg)
> +               sg_mark_end(sg - 1);
>  }
>
>  static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
> @@ -1672,6 +1715,7 @@ static bool more_used_packed(const struct vring_virtqueue *vq)
>
>  static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
>                                           unsigned int *len,
> +                                         struct scatterlist *sg,
>                                           void **ctx)
>  {
>         struct vring_virtqueue *vq = to_vvq(_vq);
> @@ -1712,7 +1756,7 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
>
>         /* detach_buf_packed clears data, so grab it now. */
>         ret = vq->packed.desc_state[id].data;
> -       detach_buf_packed(vq, id, ctx);
> +       detach_buf_packed(vq, id, sg, ctx);
>
>         last_used += vq->packed.desc_state[id].num;
>         if (unlikely(last_used >= vq->packed.vring.num)) {
> @@ -1877,7 +1921,7 @@ static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
>                         continue;
>                 /* detach_buf clears data, so grab it now. */
>                 buf = vq->packed.desc_state[i].data;
> -               detach_buf_packed(vq, i, NULL);
> +               detach_buf_packed(vq, i, NULL, NULL);
>                 END_USE(vq);
>                 return buf;
>         }
> @@ -2417,11 +2461,45 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
>  {
>         struct vring_virtqueue *vq = to_vvq(_vq);
>
> -       return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
> -                                virtqueue_get_buf_ctx_split(_vq, len, ctx);
> +       return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, NULL, ctx) :
> +                                virtqueue_get_buf_ctx_split(_vq, len, NULL, ctx);
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
>
> +/**
> + * virtqueue_get_buf_ctx_dma - get the next used buffer with the dma info
> + * @_vq: the struct virtqueue we're talking about.
> + * @len: the length written into the buffer
> + * @sg: scatterlist array to store the dma info
> + * @ctx: extra context for the token
> + *
> + * If the device wrote data into the buffer, @len will be set to the
> + * amount written.  This means you don't need to clear the buffer
> + * beforehand to ensure there's no data leakage in the case of short
> + * writes.
> + *
> + * Caller must ensure we don't call this with other virtqueue
> + * operations at the same time (except where noted).
> + *
> + * Only when the vq is in premapped mode and the sg is not null,

So let's fail if the caller is not in those state?

> we store the
> + * dma info of every descriptor of this buf to the sg array. The sg array must
> + * point to a scatterlist array, with the last element marked as the sg last.
> + * Once the function is done, we mark the last sg stored with dma info as the
> + * last one. If the sg array size is too small, some dma info may be missed.
> + *
> + * Returns NULL if there are no used buffers, or the "data" token
> + * handed to virtqueue_add_*().
> + */
> +void *virtqueue_get_buf_ctx_dma(struct virtqueue *_vq, unsigned int *len,
> +                               struct scatterlist *sg, void **ctx)

Or maybe get_buf_ctx_sg() ?

Thanks


> +{
> +       struct vring_virtqueue *vq = to_vvq(_vq);
> +
> +       return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, sg, ctx) :
> +                                virtqueue_get_buf_ctx_split(_vq, len, sg, ctx);
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx_dma);
> +
>  void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
>  {
>         return virtqueue_get_buf_ctx(_vq, len, NULL);
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index 4cc614a38376..0b919786ade5 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -74,6 +74,8 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
>
>  void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
>                             void **ctx);
> +void *virtqueue_get_buf_ctx_dma(struct virtqueue *_vq, unsigned int *len,
> +                               struct scatterlist *sg, void **ctx);
>
>  void virtqueue_disable_cb(struct virtqueue *vq);
>
> --
> 2.32.0.3.g01195cf9f
>


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC 2/4] virtio_ring: virtqueue_disable_and_recycle let the callback detach bufs
  2023-11-14 11:31 ` [RFC 2/4] virtio_ring: virtqueue_disable_and_recycle let the callback detach bufs Xuan Zhuo
@ 2023-11-16  8:11   ` Jason Wang
  2023-11-21  4:10     ` Xuan Zhuo
  0 siblings, 1 reply; 19+ messages in thread
From: Jason Wang @ 2023-11-16  8:11 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: virtualization, Michael S . Tsirkin

On Tue, Nov 14, 2023 at 7:31 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> Now, inside virtqueue_disable_and_recycle, the recycle() just has two
> parameters(vq, buf) after detach operate.
>
> But if we are in premapped mode, we may need to get some dma info when
> detach buf like virtqueue_get_buf_ctx_dma().
>
> So we call recycle directly, this callback detaches bufs self. It should
> complete the work of detaching all the unused buffers.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  drivers/net/virtio/main.c    | 31 ++++++++++++++++++++++---------

Btw, is it better to do this before the restructure, or anything I missed?

Thanks


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC 4/4] virtio_net: sq support premapped mode
  2023-11-14 11:31 ` [RFC 4/4] virtio_net: sq support premapped mode Xuan Zhuo
@ 2023-11-16  8:11   ` Jason Wang
  0 siblings, 0 replies; 19+ messages in thread
From: Jason Wang @ 2023-11-16  8:11 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: virtualization, Michael S . Tsirkin

On Tue, Nov 14, 2023 at 7:31 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> If the xsk is enabling, the xsk tx will share the send queue.
> But the xsk requires that the send queue use the premapped mode.
> So the send queue must support premapped mode.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  drivers/net/virtio/main.c       | 114 ++++++++++++++++++++++++++++----
>  drivers/net/virtio/virtio_net.h |   2 +
>  2 files changed, 102 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/net/virtio/main.c b/drivers/net/virtio/main.c
> index 57cbd449637a..0766fed28afe 100644
> --- a/drivers/net/virtio/main.c
> +++ b/drivers/net/virtio/main.c
> @@ -167,13 +167,46 @@ static struct xdp_frame *ptr_to_xdp(void *ptr)
>         return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
>  }
>
> +static void virtnet_sq_unmap(struct virtnet_sq *sq)
> +{
> +       struct scatterlist *sg;
> +
> +       for (sg = sq->sg; ; sg = sg_next(sg)) {
> +               virtqueue_dma_unmap_single_attrs(sq->vq,
> +                                                sg->dma_address,
> +                                                sg->length,
> +                                                DMA_TO_DEVICE, 0);
> +
> +               if (sg_is_last(sg)) {
> +                       /* clear the end mark setted by virtio core. */
> +                       sg_unmark_end(sg);
> +                       break;
> +               }
> +       }
> +
> +       /* mark the last one of sq->sg[] as the end for reuse by
> +        * virtqueue_get_buf_ctx_dma().
> +        */
> +       sg_mark_end(&sq->sg[ARRAY_SIZE(sq->sg) - 1]);

This API seems to be odd: the flag is set to be core but clear by driver.

Can we simply follow the add API that returns the #sgs?

> +}
> +
>  static void __free_old_xmit(struct virtnet_sq *sq, bool in_napi,
>                             u64 *bytes, u64 *packets)
>  {
> +       struct scatterlist *sg = NULL;
>         unsigned int len;
>         void *ptr;
>
> -       while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> +       /* clear the sg table */
> +       if (sq->do_dma) {
> +               sg_init_table(sq->sg, ARRAY_SIZE(sq->sg));
> +               sg = sq->sg;
> +       }
> +
> +       while ((ptr = virtqueue_get_buf_ctx_dma(sq->vq, &len, sg, NULL)) != NULL) {
> +               if (sq->do_dma)
> +                       virtnet_sq_unmap(sq);
> +
>                 if (!is_xdp_frame(ptr)) {
>                         struct sk_buff *skb = ptr;
>
> @@ -567,22 +600,61 @@ static void *virtnet_rq_alloc(struct virtnet_rq *rq, u32 size, gfp_t gfp)
>         return buf;
>  }
>
> -static void virtnet_rq_set_premapped(struct virtnet_info *vi)
> +static void virtnet_set_premapped(struct virtnet_info *vi)
>  {
>         int i;
>
> -       /* disable for big mode */
> -       if (!vi->mergeable_rx_bufs && vi->big_packets)
> -               return;
> -
>         for (i = 0; i < vi->max_queue_pairs; i++) {
> -               if (virtqueue_set_dma_premapped(vi->rq[i].vq))
> -                       continue;
> +               if (!virtqueue_set_dma_premapped(vi->sq[i].vq))
> +                       vi->sq[i].do_dma = true;
>
> -               vi->rq[i].do_dma = true;
> +               /* disable for big mode */
> +               if (vi->mergeable_rx_bufs || !vi->big_packets) {
> +                       if (!virtqueue_set_dma_premapped(vi->rq[i].vq))
> +                               vi->rq[i].do_dma = true;
> +               }
>         }
>  }
>
> +static int virtnet_sq_map_sg(struct virtnet_sq *sq)
> +{
> +       struct scatterlist *sg;
> +
> +       for (sg = sq->sg; sg; sg = sg_next(sg)) {
> +               sg->dma_address = virtqueue_dma_map_single_attrs(sq->vq, sg_virt(sg),
> +                                                                sg->length,
> +                                                                DMA_TO_DEVICE, 0);
> +               if (virtqueue_dma_mapping_error(sq->vq, sg->dma_address))
> +                       goto err;
> +       }
> +
> +       return 0;
> +
> +err:
> +       if (sg != sq->sg) {
> +               sg_mark_end(sg - 1);
> +               virtnet_sq_unmap(sq);
> +       }
> +       return -ENOMEM;
> +}
> +
> +static int virtnet_add_outbuf(struct virtnet_sq *sq, u32 num, void *data)
> +{
> +       int ret;
> +
> +       if (sq->do_dma) {
> +               ret = virtnet_sq_map_sg(sq);
> +               if (ret)
> +                       return -ENOMEM;
> +       }
> +
> +       ret = virtqueue_add_outbuf(sq->vq, sq->sg, num, data, GFP_ATOMIC);
> +       if (ret && sq->do_dma)
> +               virtnet_sq_unmap(sq);
> +
> +       return ret;
> +}
> +
>  static void free_old_xmit(struct virtnet_sq *sq, bool in_napi)
>  {
>         u64 bytes, packets = 0;
> @@ -686,8 +758,7 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
>                             skb_frag_size(frag), skb_frag_off(frag));
>         }
>
> -       err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1,
> -                                  xdp_to_ptr(xdpf), GFP_ATOMIC);
> +       err = virtnet_add_outbuf(sq, nr_frags + 1, xdp_to_ptr(xdpf));
>         if (unlikely(err))
>                 return -ENOSPC; /* Caller handle free/refcnt */
>
> @@ -2126,7 +2197,7 @@ static int xmit_skb(struct virtnet_sq *sq, struct sk_buff *skb)
>                         return num_sg;
>                 num_sg++;
>         }
> -       return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
> +       return virtnet_add_outbuf(sq, num_sg, skb);
>  }
>
>  static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
> @@ -3852,10 +3923,25 @@ static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf)
>
>  static void virtnet_sq_free_unused_bufs(struct virtqueue *vq)
>  {
> +       struct virtnet_info *vi = vq->vdev->priv;
> +       struct scatterlist *sg = NULL;
> +       struct virtnet_sq *sq;
> +       int i = vq2txq(vq);
>         void *buf;
>
> -       while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
> +       sq = &vi->sq[i];
> +
> +       if (sq->do_dma) {
> +               sg_init_table(sq->sg, ARRAY_SIZE(sq->sg));

Btw, if we only care about dma_addr and length, using sg seems to be
expensive here.

Thanks


> +               sg = sq->sg;
> +       }
> +
> +       while ((buf = virtqueue_detach_unused_buf_premapped(vq, sg)) != NULL) {
> +               if (sq->do_dma)
> +                       virtnet_sq_unmap(sq);
> +
>                 virtnet_sq_free_unused_buf(vq, buf);
> +       }
>  }
>
>  static void virtnet_rq_free_unused_bufs(struct virtqueue *vq)
> @@ -4052,7 +4138,7 @@ static int init_vqs(struct virtnet_info *vi)
>         if (ret)
>                 goto err_free;
>
> -       virtnet_rq_set_premapped(vi);
> +       virtnet_set_premapped(vi);
>
>         cpus_read_lock();
>         virtnet_set_affinity(vi);
> diff --git a/drivers/net/virtio/virtio_net.h b/drivers/net/virtio/virtio_net.h
> index ebf9f344648a..870855a829a0 100644
> --- a/drivers/net/virtio/virtio_net.h
> +++ b/drivers/net/virtio/virtio_net.h
> @@ -67,6 +67,8 @@ struct virtnet_sq {
>
>         /* Record whether sq is in reset state. */
>         bool reset;
> +
> +       bool do_dma;
>  };
>
>  /* Internal representation of a receive virtqueue */
> --
> 2.32.0.3.g01195cf9f
>


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC 2/4] virtio_ring: virtqueue_disable_and_recycle let the callback detach bufs
  2023-11-16  8:11   ` Jason Wang
@ 2023-11-21  4:10     ` Xuan Zhuo
  0 siblings, 0 replies; 19+ messages in thread
From: Xuan Zhuo @ 2023-11-21  4:10 UTC (permalink / raw)
  To: Jason Wang; +Cc: virtualization, Michael S . Tsirkin

On Thu, 16 Nov 2023 16:11:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Tue, Nov 14, 2023 at 7:31 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > Now, inside virtqueue_disable_and_recycle, the recycle() just has two
> > parameters(vq, buf) after detach operate.
> >
> > But if we are in premapped mode, we may need to get some dma info when
> > detach buf like virtqueue_get_buf_ctx_dma().
> >
> > So we call recycle directly, this callback detaches bufs self. It should
> > complete the work of detaching all the unused buffers.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  drivers/net/virtio/main.c    | 31 ++++++++++++++++++++++---------
>
> Btw, is it better to do this before the restructure, or anything I missed?


#1 is for get_buf from virtio core.
This is for the next pacth. (get unused buf)

So this is #2.

Thanks.

>
> Thanks
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma()
  2023-11-16  8:11   ` Jason Wang
@ 2023-11-21  6:12     ` Xuan Zhuo
  2023-11-22  6:03       ` Jason Wang
  0 siblings, 1 reply; 19+ messages in thread
From: Xuan Zhuo @ 2023-11-21  6:12 UTC (permalink / raw)
  To: Jason Wang; +Cc: virtualization, Michael S . Tsirkin

On Thu, 16 Nov 2023 16:11:11 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Tue, Nov 14, 2023 at 7:31 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > introduce virtqueue_get_buf_ctx_dma() to collect the dma info when
> > get buf from virtio core for premapped mode.
> >
> > If the virtio queue is premapped mode, the virtio-net send buf may
> > have many desc. Every desc dma address need to be unmap. So here we
> > introduce a new helper to collect the dma address of the buffer from
> > the virtio core.
>
> So looking at vring_desc_extra, what we have right now is:
>
> struct vring_desc_extra {
>         dma_addr_t addr;                /* Descriptor DMA addr. */
>         u32 len;                        /* Descriptor length. */
>         u16 flags;                      /* Descriptor flags. */
>         u16 next;                       /* The next desc state in a list. */
> };
>
> And sg is
>
> struct scatterlist {
>         unsigned long   page_link;
> unsigned int    offset;
>         unsigned int    length;
>         dma_addr_t      dma_address;
> #ifdef CONFIG_NEED_SG_DMA_LENGTH
>         unsigned int    dma_length;
> #endif
> #ifdef CONFIG_NEED_SG_DMA_FLAGS
>         unsigned int    dma_flags;
> #endif
> };
>
> Would it better just store sg?

Do you mean we expose the vring_desc_extra to dirver?

How about introducing such a new structure?

struct virtio_dma_item {
         dma_addr_t addr;
         u32 len;
};

struct virtio_dma_head {
	u32 num;
	u32 used;
	struct virtio_dma_item items[];
};

Then we just need to pass one pointer to the virtio.
num is used to pass the size of items
used is used to return the num used by virtio core.

>
> More below
>
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  drivers/virtio/virtio_ring.c | 148 ++++++++++++++++++++++++++---------
> >  include/linux/virtio.h       |   2 +
> >  2 files changed, 115 insertions(+), 35 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 51d8f3299c10..0b3caee4ef9d 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -362,6 +362,20 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
> >         return vq->dma_dev;
> >  }
> >
> > +static void store_dma_to_sg(struct scatterlist **sgp, dma_addr_t addr, unsigned int length)
> > +{
> > +       struct scatterlist *sg;
> > +
> > +       sg = *sgp;
> > +
> > +       sg->dma_address = addr;
> > +       sg->length = length;
> > +
> > +       sg = sg_next(sg);
> > +
> > +       *sgp = sg;
> > +}
> > +
> >  /* Map one sg entry. */
> >  static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
> >                             enum dma_data_direction direction, dma_addr_t *addr)
> > @@ -441,12 +455,18 @@ static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
> >   */
> >
> >  static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > -                                          const struct vring_desc *desc)
> > +                                          const struct vring_desc *desc,
> > +                                          struct scatterlist **sg)
> >  {
> >         u16 flags;
> >
> > -       if (!vq->do_unmap)
> > +       if (!vq->do_unmap) {
> > +               if (*sg)
>
> Can we simply move the
>
> if (*sg) to store_dma_to_sg()?

OK


>
> > +                       store_dma_to_sg(sg,
> > +                                       virtio64_to_cpu(vq->vq.vdev, desc->addr),
> > +                                       virtio32_to_cpu(vq->vq.vdev, desc->len));
> >                 return;
> > +       }
> >
> >         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> >
> > @@ -458,7 +478,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> >  }
> >
> >  static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > -                                         unsigned int i)
> > +                                         unsigned int i, struct scatterlist **sg)
> >  {
> >         struct vring_desc_extra *extra = vq->split.desc_extra;
> >         u16 flags;
> > @@ -475,8 +495,11 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> >                                  (flags & VRING_DESC_F_WRITE) ?
> >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> >         } else {
> > -               if (!vq->do_unmap)
> > +               if (!vq->do_unmap) {
>
> The:
>
> else {
>       if {
>             if {
> }
>
> seems odd. I think I would prefer
>
> if (flags & VRING_DESC_F_INDIRECT) {
> } else if (!vq->do_unmap) {
> } else {
> }


Will fix.


>
> here
>
> Btw, I really think do_unmap is not a good name, we probably need to
> rename it as "unmap_desc".

How about a separate patch for this?

>
>
> > +                       if (*sg)
> > +                               store_dma_to_sg(sg, extra[i].addr, extra[i].len);
>
> In which case we need to unmap by driver but we don't need a dma address?

Sorry, do not get it.

>
> >                         goto out;
> > +               }
> >
> >                 dma_unmap_page(vring_dma_dev(vq),
> >                                extra[i].addr,
> > @@ -717,10 +740,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> >                 if (i == err_idx)
> >                         break;
> >                 if (indirect) {
> > -                       vring_unmap_one_split_indirect(vq, &desc[i]);
> > +                       vring_unmap_one_split_indirect(vq, &desc[i], NULL);
> >                         i = virtio16_to_cpu(_vq->vdev, desc[i].next);
> >                 } else
> > -                       i = vring_unmap_one_split(vq, i);
> > +                       i = vring_unmap_one_split(vq, i, NULL);
> >         }
> >
> >  free_indirect:
> > @@ -763,7 +786,7 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
> >  }
> >
> >  static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > -                            void **ctx)
> > +                            struct scatterlist *sg, void **ctx)
> >  {
> >         unsigned int i, j;
> >         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
> > @@ -775,12 +798,12 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> >         i = head;
> >
> >         while (vq->split.vring.desc[i].flags & nextflag) {
> > -               vring_unmap_one_split(vq, i);
> > +               vring_unmap_one_split(vq, i, &sg);
> >                 i = vq->split.desc_extra[i].next;
> >                 vq->vq.num_free++;
> >         }
> >
> > -       vring_unmap_one_split(vq, i);
> > +       vring_unmap_one_split(vq, i, &sg);
> >         vq->split.desc_extra[i].next = vq->free_head;
> >         vq->free_head = head;
> >
> > @@ -794,7 +817,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> >
> >                 /* Free the indirect table, if any, now that it's unmapped. */
> >                 if (!indir_desc)
> > -                       return;
> > +                       goto end;
> >
> >                 len = vq->split.desc_extra[head].len;
> >
> > @@ -802,9 +825,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> >                                 VRING_DESC_F_INDIRECT));
> >                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
> >
> > -               if (vq->do_unmap) {
> > +               if (vq->do_unmap || sg) {
> >                         for (j = 0; j < len / sizeof(struct vring_desc); j++)
> > -                               vring_unmap_one_split_indirect(vq, &indir_desc[j]);
> > +                               vring_unmap_one_split_indirect(vq, &indir_desc[j], &sg);
> > +
> >                 }
> >
> >                 kfree(indir_desc);
> > @@ -812,6 +836,11 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> >         } else if (ctx) {
> >                 *ctx = vq->split.desc_state[head].indir_desc;
> >         }
> > +
> > +end:
> > +       /* sg point to the next. So we mark the last one as the end. */
> > +       if (!vq->do_unmap && sg)
> > +               sg_mark_end(sg - 1);
> >  }
> >
> >  static bool more_used_split(const struct vring_virtqueue *vq)
> > @@ -822,6 +851,7 @@ static bool more_used_split(const struct vring_virtqueue *vq)
> >
> >  static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> >                                          unsigned int *len,
> > +                                        struct scatterlist *sg,
> >                                          void **ctx)
> >  {
> >         struct vring_virtqueue *vq = to_vvq(_vq);
> > @@ -862,7 +892,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> >
> >         /* detach_buf_split clears data, so grab it now. */
> >         ret = vq->split.desc_state[i].data;
> > -       detach_buf_split(vq, i, ctx);
> > +       detach_buf_split(vq, i, sg, ctx);
> >         vq->last_used_idx++;
> >         /* If we expect an interrupt for the next entry, tell host
> >          * by writing event index and flush out the write before
> > @@ -984,7 +1014,7 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
> >                         continue;
> >                 /* detach_buf_split clears data, so grab it now. */
> >                 buf = vq->split.desc_state[i].data;
> > -               detach_buf_split(vq, i, NULL);
> > +               detach_buf_split(vq, i, NULL, NULL);
> >                 vq->split.avail_idx_shadow--;
> >                 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
> >                                 vq->split.avail_idx_shadow);
> > @@ -1221,7 +1251,8 @@ static u16 packed_last_used(u16 last_used_idx)
> >  }
> >
> >  static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> > -                                    const struct vring_desc_extra *extra)
> > +                                    const struct vring_desc_extra *extra,
> > +                                    struct scatterlist **sg)
> >  {
> >         u16 flags;
> >
> > @@ -1236,8 +1267,11 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> >                                  (flags & VRING_DESC_F_WRITE) ?
> >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> >         } else {
> > -               if (!vq->do_unmap)
> > +               if (!vq->do_unmap) {
> > +                       if (*sg)
> > +                               store_dma_to_sg(sg, extra->addr, extra->len);
> >                         return;
> > +               }
> >
> >                 dma_unmap_page(vring_dma_dev(vq),
> >                                extra->addr, extra->len,
> > @@ -1247,12 +1281,17 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> >  }
> >
> >  static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
> > -                                   const struct vring_packed_desc *desc)
> > +                                   const struct vring_packed_desc *desc,
> > +                                   struct scatterlist **sg)
> >  {
>
> Interesting, I think this is only needed for indirect descriptors?
>
> If yes, why do we care about the dma addresses of indirect descriptors?
> If not, it's a bug that we should use desc_extra, otherwise it's a
> device-triggerable unmap which has security implications (we need to
> use vring_extra in this case).

Sorry, I do not get.

                            indirect desc(alloc by virtio core)
       virtio-desc ------> |  | -------------------------------------> dma address of buffer
                           |  | -------------------------------------> dma address of buffer
                           |  | -------------------------------------> dma address of buffer
                           |  | -------------------------------------> dma address of buffer
                           |  | -------------------------------------> dma address of buffer
                           |  | -------------------------------------> dma address of buffer

For vring_unmap_desc_packed the desc is the indirect desc(alloc by virtio core,
not the virtio desc), which record the dma address of buffer that is passed by
the driver. Here we need to record the dma address to back to the driver.


>
>  >         u16 flags;
> >
> > -       if (!vq->do_unmap)
> > +       if (!vq->do_unmap) {
> > +               if (*sg)
> > +                       store_dma_to_sg(sg, le64_to_cpu(desc->addr),
> > +                                       le32_to_cpu(desc->len));
> >                 return;
> > +       }
> >
> >         flags = le16_to_cpu(desc->flags);
> >
> > @@ -1389,7 +1428,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
> >         err_idx = i;
> >
> >         for (i = 0; i < err_idx; i++)
> > -               vring_unmap_desc_packed(vq, &desc[i]);
> > +               vring_unmap_desc_packed(vq, &desc[i], NULL);
> >
> >  free_desc:
> >         kfree(desc);
> > @@ -1539,7 +1578,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> >         for (n = 0; n < total_sg; n++) {
> >                 if (i == err_idx)
> >                         break;
> > -               vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
> > +               vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr], NULL);
> >                 curr = vq->packed.desc_extra[curr].next;
> >                 i++;
> >                 if (i >= vq->packed.vring.num)
> > @@ -1600,7 +1639,9 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> >  }
> >
> >  static void detach_buf_packed(struct vring_virtqueue *vq,
> > -                             unsigned int id, void **ctx)
> > +                             unsigned int id,
> > +                             struct scatterlist *sg,
> > +                             void **ctx)
> >  {
> >         struct vring_desc_state_packed *state = NULL;
> >         struct vring_packed_desc *desc;
> > @@ -1615,13 +1656,10 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
> >         vq->free_head = id;
> >         vq->vq.num_free += state->num;
> >
> > -       if (unlikely(vq->do_unmap)) {
> > -               curr = id;
> > -               for (i = 0; i < state->num; i++) {
> > -                       vring_unmap_extra_packed(vq,
> > -                                                &vq->packed.desc_extra[curr]);
> > -                       curr = vq->packed.desc_extra[curr].next;
> > -               }
> > +       curr = id;
> > +       for (i = 0; i < state->num; i++) {
> > +               vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr], &sg);
> > +               curr = vq->packed.desc_extra[curr].next;
> >         }
>
> Looks like an independent fix or cleanup?

Before this commit, if do_unmap is false, the loop is skip.
Now, if do_unmap is false, we need to record the dma address.
So the loop is needed whatever do_unmap is true or false.

>
>
> >
> >         if (vq->indirect) {
> > @@ -1630,19 +1668,24 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
> >                 /* Free the indirect table, if any, now that it's unmapped. */
> >                 desc = state->indir_desc;
> >                 if (!desc)
> > -                       return;
> > +                       goto end;
> >
> > -               if (vq->do_unmap) {
> > +               if (vq->do_unmap || sg) {
> >                         len = vq->packed.desc_extra[id].len;
> >                         for (i = 0; i < len / sizeof(struct vring_packed_desc);
> >                                         i++)
> > -                               vring_unmap_desc_packed(vq, &desc[i]);
> > +                               vring_unmap_desc_packed(vq, &desc[i], &sg);
> >                 }
> >                 kfree(desc);
> >                 state->indir_desc = NULL;
> >         } else if (ctx) {
> >                 *ctx = state->indir_desc;
> >         }
> > +
> > +end:
> > +       /* sg point to the next. So we mark the last one as the end. */
> > +       if (!vq->do_unmap && sg)
> > +               sg_mark_end(sg - 1);
> >  }
> >
> >  static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
> > @@ -1672,6 +1715,7 @@ static bool more_used_packed(const struct vring_virtqueue *vq)
> >
> >  static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> >                                           unsigned int *len,
> > +                                         struct scatterlist *sg,
> >                                           void **ctx)
> >  {
> >         struct vring_virtqueue *vq = to_vvq(_vq);
> > @@ -1712,7 +1756,7 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> >
> >         /* detach_buf_packed clears data, so grab it now. */
> >         ret = vq->packed.desc_state[id].data;
> > -       detach_buf_packed(vq, id, ctx);
> > +       detach_buf_packed(vq, id, sg, ctx);
> >
> >         last_used += vq->packed.desc_state[id].num;
> >         if (unlikely(last_used >= vq->packed.vring.num)) {
> > @@ -1877,7 +1921,7 @@ static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
> >                         continue;
> >                 /* detach_buf clears data, so grab it now. */
> >                 buf = vq->packed.desc_state[i].data;
> > -               detach_buf_packed(vq, i, NULL);
> > +               detach_buf_packed(vq, i, NULL, NULL);
> >                 END_USE(vq);
> >                 return buf;
> >         }
> > @@ -2417,11 +2461,45 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
> >  {
> >         struct vring_virtqueue *vq = to_vvq(_vq);
> >
> > -       return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
> > -                                virtqueue_get_buf_ctx_split(_vq, len, ctx);
> > +       return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, NULL, ctx) :
> > +                                virtqueue_get_buf_ctx_split(_vq, len, NULL, ctx);
> >  }
> >  EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
> >
> > +/**
> > + * virtqueue_get_buf_ctx_dma - get the next used buffer with the dma info
> > + * @_vq: the struct virtqueue we're talking about.
> > + * @len: the length written into the buffer
> > + * @sg: scatterlist array to store the dma info
> > + * @ctx: extra context for the token
> > + *
> > + * If the device wrote data into the buffer, @len will be set to the
> > + * amount written.  This means you don't need to clear the buffer
> > + * beforehand to ensure there's no data leakage in the case of short
> > + * writes.
> > + *
> > + * Caller must ensure we don't call this with other virtqueue
> > + * operations at the same time (except where noted).
> > + *
> > + * Only when the vq is in premapped mode and the sg is not null,
>
> So let's fail if the caller is not in those state?

OK.


>
> > we store the
> > + * dma info of every descriptor of this buf to the sg array. The sg array must
> > + * point to a scatterlist array, with the last element marked as the sg last.
> > + * Once the function is done, we mark the last sg stored with dma info as the
> > + * last one. If the sg array size is too small, some dma info may be missed.
> > + *
> > + * Returns NULL if there are no used buffers, or the "data" token
> > + * handed to virtqueue_add_*().
> > + */
> > +void *virtqueue_get_buf_ctx_dma(struct virtqueue *_vq, unsigned int *len,
> > +                               struct scatterlist *sg, void **ctx)
>
> Or maybe get_buf_ctx_sg() ?
>
> Thanks
>
>
> > +{
> > +       struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > +       return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, sg, ctx) :
> > +                                virtqueue_get_buf_ctx_split(_vq, len, sg, ctx);
> > +}
> > +EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx_dma);
> > +
> >  void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
> >  {
> >         return virtqueue_get_buf_ctx(_vq, len, NULL);
> > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > index 4cc614a38376..0b919786ade5 100644
> > --- a/include/linux/virtio.h
> > +++ b/include/linux/virtio.h
> > @@ -74,6 +74,8 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
> >
> >  void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
> >                             void **ctx);
> > +void *virtqueue_get_buf_ctx_dma(struct virtqueue *_vq, unsigned int *len,
> > +                               struct scatterlist *sg, void **ctx);
> >
> >  void virtqueue_disable_cb(struct virtqueue *vq);
> >
> > --
> > 2.32.0.3.g01195cf9f
> >
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma()
  2023-11-21  6:12     ` Xuan Zhuo
@ 2023-11-22  6:03       ` Jason Wang
  2023-11-22  6:29         ` Xuan Zhuo
  0 siblings, 1 reply; 19+ messages in thread
From: Jason Wang @ 2023-11-22  6:03 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: virtualization, Michael S . Tsirkin

On Tue, Nov 21, 2023 at 2:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Thu, 16 Nov 2023 16:11:11 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Tue, Nov 14, 2023 at 7:31 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > introduce virtqueue_get_buf_ctx_dma() to collect the dma info when
> > > get buf from virtio core for premapped mode.
> > >
> > > If the virtio queue is premapped mode, the virtio-net send buf may
> > > have many desc. Every desc dma address need to be unmap. So here we
> > > introduce a new helper to collect the dma address of the buffer from
> > > the virtio core.
> >
> > So looking at vring_desc_extra, what we have right now is:
> >
> > struct vring_desc_extra {
> >         dma_addr_t addr;                /* Descriptor DMA addr. */
> >         u32 len;                        /* Descriptor length. */
> >         u16 flags;                      /* Descriptor flags. */
> >         u16 next;                       /* The next desc state in a list. */
> > };
> >
> > And sg is
> >
> > struct scatterlist {
> >         unsigned long   page_link;
> > unsigned int    offset;
> >         unsigned int    length;
> >         dma_addr_t      dma_address;
> > #ifdef CONFIG_NEED_SG_DMA_LENGTH
> >         unsigned int    dma_length;
> > #endif
> > #ifdef CONFIG_NEED_SG_DMA_FLAGS
> >         unsigned int    dma_flags;
> > #endif
> > };
> >
> > Would it better just store sg?
>
> Do you mean we expose the vring_desc_extra to dirver?

Maybe or not, (or just us sg for desc_extra).

Anyhow, any method needs to be benchmarked to see the performance impact.

>
> How about introducing such a new structure?
>
> struct virtio_dma_item {
>          dma_addr_t addr;
>          u32 len;
> };
>
> struct virtio_dma_head {
>         u32 num;
>         u32 used;
>         struct virtio_dma_item items[];
> };
>
> Then we just need to pass one pointer to the virtio.

Just to make sure I understand here, where did we store those arrays?

> num is used to pass the size of items
> used is used to return the num used by virtio core.
>
> >
> > More below
> >
> > >
> > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > ---
> > >  drivers/virtio/virtio_ring.c | 148 ++++++++++++++++++++++++++---------
> > >  include/linux/virtio.h       |   2 +
> > >  2 files changed, 115 insertions(+), 35 deletions(-)
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 51d8f3299c10..0b3caee4ef9d 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -362,6 +362,20 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
> > >         return vq->dma_dev;
> > >  }
> > >
> > > +static void store_dma_to_sg(struct scatterlist **sgp, dma_addr_t addr, unsigned int length)
> > > +{
> > > +       struct scatterlist *sg;
> > > +
> > > +       sg = *sgp;
> > > +
> > > +       sg->dma_address = addr;
> > > +       sg->length = length;
> > > +
> > > +       sg = sg_next(sg);
> > > +
> > > +       *sgp = sg;
> > > +}
> > > +
> > >  /* Map one sg entry. */
> > >  static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
> > >                             enum dma_data_direction direction, dma_addr_t *addr)
> > > @@ -441,12 +455,18 @@ static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
> > >   */
> > >
> > >  static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > -                                          const struct vring_desc *desc)
> > > +                                          const struct vring_desc *desc,
> > > +                                          struct scatterlist **sg)
> > >  {
> > >         u16 flags;
> > >
> > > -       if (!vq->do_unmap)
> > > +       if (!vq->do_unmap) {
> > > +               if (*sg)
> >
> > Can we simply move the
> >
> > if (*sg) to store_dma_to_sg()?
>
> OK
>
>
> >
> > > +                       store_dma_to_sg(sg,
> > > +                                       virtio64_to_cpu(vq->vq.vdev, desc->addr),
> > > +                                       virtio32_to_cpu(vq->vq.vdev, desc->len));
> > >                 return;
> > > +       }
> > >
> > >         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> > >
> > > @@ -458,7 +478,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > >  }
> > >
> > >  static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > -                                         unsigned int i)
> > > +                                         unsigned int i, struct scatterlist **sg)
> > >  {
> > >         struct vring_desc_extra *extra = vq->split.desc_extra;
> > >         u16 flags;
> > > @@ -475,8 +495,11 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > >                                  (flags & VRING_DESC_F_WRITE) ?
> > >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > >         } else {
> > > -               if (!vq->do_unmap)
> > > +               if (!vq->do_unmap) {
> >
> > The:
> >
> > else {
> >       if {
> >             if {
> > }
> >
> > seems odd. I think I would prefer
> >
> > if (flags & VRING_DESC_F_INDIRECT) {
> > } else if (!vq->do_unmap) {
> > } else {
> > }
>
>
> Will fix.
>
>
> >
> > here
> >
> > Btw, I really think do_unmap is not a good name, we probably need to
> > rename it as "unmap_desc".
>
> How about a separate patch for this?
>
> >
> >
> > > +                       if (*sg)
> > > +                               store_dma_to_sg(sg, extra[i].addr, extra[i].len);
> >
> > In which case we need to unmap by driver but we don't need a dma address?
>
> Sorry, do not get it.

I meant, I don't get how we can reach to this check.

>
> >
> > >                         goto out;
> > > +               }
> > >
> > >                 dma_unmap_page(vring_dma_dev(vq),
> > >                                extra[i].addr,
> > > @@ -717,10 +740,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> > >                 if (i == err_idx)
> > >                         break;
> > >                 if (indirect) {
> > > -                       vring_unmap_one_split_indirect(vq, &desc[i]);
> > > +                       vring_unmap_one_split_indirect(vq, &desc[i], NULL);
> > >                         i = virtio16_to_cpu(_vq->vdev, desc[i].next);
> > >                 } else
> > > -                       i = vring_unmap_one_split(vq, i);
> > > +                       i = vring_unmap_one_split(vq, i, NULL);
> > >         }
> > >
> > >  free_indirect:
> > > @@ -763,7 +786,7 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
> > >  }
> > >
> > >  static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > -                            void **ctx)
> > > +                            struct scatterlist *sg, void **ctx)
> > >  {
> > >         unsigned int i, j;
> > >         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
> > > @@ -775,12 +798,12 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > >         i = head;
> > >
> > >         while (vq->split.vring.desc[i].flags & nextflag) {
> > > -               vring_unmap_one_split(vq, i);
> > > +               vring_unmap_one_split(vq, i, &sg);
> > >                 i = vq->split.desc_extra[i].next;
> > >                 vq->vq.num_free++;
> > >         }
> > >
> > > -       vring_unmap_one_split(vq, i);
> > > +       vring_unmap_one_split(vq, i, &sg);
> > >         vq->split.desc_extra[i].next = vq->free_head;
> > >         vq->free_head = head;
> > >
> > > @@ -794,7 +817,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > >
> > >                 /* Free the indirect table, if any, now that it's unmapped. */
> > >                 if (!indir_desc)
> > > -                       return;
> > > +                       goto end;
> > >
> > >                 len = vq->split.desc_extra[head].len;
> > >
> > > @@ -802,9 +825,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > >                                 VRING_DESC_F_INDIRECT));
> > >                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
> > >
> > > -               if (vq->do_unmap) {
> > > +               if (vq->do_unmap || sg) {
> > >                         for (j = 0; j < len / sizeof(struct vring_desc); j++)
> > > -                               vring_unmap_one_split_indirect(vq, &indir_desc[j]);
> > > +                               vring_unmap_one_split_indirect(vq, &indir_desc[j], &sg);
> > > +
> > >                 }
> > >
> > >                 kfree(indir_desc);
> > > @@ -812,6 +836,11 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > >         } else if (ctx) {
> > >                 *ctx = vq->split.desc_state[head].indir_desc;
> > >         }
> > > +
> > > +end:
> > > +       /* sg point to the next. So we mark the last one as the end. */
> > > +       if (!vq->do_unmap && sg)
> > > +               sg_mark_end(sg - 1);
> > >  }
> > >
> > >  static bool more_used_split(const struct vring_virtqueue *vq)
> > > @@ -822,6 +851,7 @@ static bool more_used_split(const struct vring_virtqueue *vq)
> > >
> > >  static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > >                                          unsigned int *len,
> > > +                                        struct scatterlist *sg,
> > >                                          void **ctx)
> > >  {
> > >         struct vring_virtqueue *vq = to_vvq(_vq);
> > > @@ -862,7 +892,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > >
> > >         /* detach_buf_split clears data, so grab it now. */
> > >         ret = vq->split.desc_state[i].data;
> > > -       detach_buf_split(vq, i, ctx);
> > > +       detach_buf_split(vq, i, sg, ctx);
> > >         vq->last_used_idx++;
> > >         /* If we expect an interrupt for the next entry, tell host
> > >          * by writing event index and flush out the write before
> > > @@ -984,7 +1014,7 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
> > >                         continue;
> > >                 /* detach_buf_split clears data, so grab it now. */
> > >                 buf = vq->split.desc_state[i].data;
> > > -               detach_buf_split(vq, i, NULL);
> > > +               detach_buf_split(vq, i, NULL, NULL);
> > >                 vq->split.avail_idx_shadow--;
> > >                 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
> > >                                 vq->split.avail_idx_shadow);
> > > @@ -1221,7 +1251,8 @@ static u16 packed_last_used(u16 last_used_idx)
> > >  }
> > >
> > >  static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> > > -                                    const struct vring_desc_extra *extra)
> > > +                                    const struct vring_desc_extra *extra,
> > > +                                    struct scatterlist **sg)
> > >  {
> > >         u16 flags;
> > >
> > > @@ -1236,8 +1267,11 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> > >                                  (flags & VRING_DESC_F_WRITE) ?
> > >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > >         } else {
> > > -               if (!vq->do_unmap)
> > > +               if (!vq->do_unmap) {
> > > +                       if (*sg)
> > > +                               store_dma_to_sg(sg, extra->addr, extra->len);
> > >                         return;
> > > +               }
> > >
> > >                 dma_unmap_page(vring_dma_dev(vq),
> > >                                extra->addr, extra->len,
> > > @@ -1247,12 +1281,17 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> > >  }
> > >
> > >  static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
> > > -                                   const struct vring_packed_desc *desc)
> > > +                                   const struct vring_packed_desc *desc,
> > > +                                   struct scatterlist **sg)
> > >  {
> >
> > Interesting, I think this is only needed for indirect descriptors?
> >
> > If yes, why do we care about the dma addresses of indirect descriptors?
> > If not, it's a bug that we should use desc_extra, otherwise it's a
> > device-triggerable unmap which has security implications (we need to
> > use vring_extra in this case).
>
> Sorry, I do not get.
>
>                             indirect desc(alloc by virtio core)
>        virtio-desc ------> |  | -------------------------------------> dma address of buffer
>                            |  | -------------------------------------> dma address of buffer
>                            |  | -------------------------------------> dma address of buffer
>                            |  | -------------------------------------> dma address of buffer
>                            |  | -------------------------------------> dma address of buffer
>                            |  | -------------------------------------> dma address of buffer
>
> For vring_unmap_desc_packed the desc is the indirect desc(alloc by virtio core,
> not the virtio desc), which record the dma address of buffer that is passed by
> the driver. Here we need to record the dma address to back to the driver.

I meant why should we return this to the driver? It is allocated by
the vitio core and should be transparent to the driver.

Thanks


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma()
  2023-11-22  6:03       ` Jason Wang
@ 2023-11-22  6:29         ` Xuan Zhuo
  2023-11-22  6:47           ` Jason Wang
  0 siblings, 1 reply; 19+ messages in thread
From: Xuan Zhuo @ 2023-11-22  6:29 UTC (permalink / raw)
  To: Jason Wang; +Cc: virtualization, Michael S . Tsirkin

On Wed, 22 Nov 2023 14:03:32 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Tue, Nov 21, 2023 at 2:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Thu, 16 Nov 2023 16:11:11 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Tue, Nov 14, 2023 at 7:31 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > introduce virtqueue_get_buf_ctx_dma() to collect the dma info when
> > > > get buf from virtio core for premapped mode.
> > > >
> > > > If the virtio queue is premapped mode, the virtio-net send buf may
> > > > have many desc. Every desc dma address need to be unmap. So here we
> > > > introduce a new helper to collect the dma address of the buffer from
> > > > the virtio core.
> > >
> > > So looking at vring_desc_extra, what we have right now is:
> > >
> > > struct vring_desc_extra {
> > >         dma_addr_t addr;                /* Descriptor DMA addr. */
> > >         u32 len;                        /* Descriptor length. */
> > >         u16 flags;                      /* Descriptor flags. */
> > >         u16 next;                       /* The next desc state in a list. */
> > > };
> > >
> > > And sg is
> > >
> > > struct scatterlist {
> > >         unsigned long   page_link;
> > > unsigned int    offset;
> > >         unsigned int    length;
> > >         dma_addr_t      dma_address;
> > > #ifdef CONFIG_NEED_SG_DMA_LENGTH
> > >         unsigned int    dma_length;
> > > #endif
> > > #ifdef CONFIG_NEED_SG_DMA_FLAGS
> > >         unsigned int    dma_flags;
> > > #endif
> > > };
> > >
> > > Would it better just store sg?
> >
> > Do you mean we expose the vring_desc_extra to dirver?
>
> Maybe or not, (or just us sg for desc_extra).
>
> Anyhow, any method needs to be benchmarked to see the performance impact.
>
> >
> > How about introducing such a new structure?
> >
> > struct virtio_dma_item {
> >          dma_addr_t addr;
> >          u32 len;
> > };
> >
> > struct virtio_dma_head {
> >         u32 num;
> >         u32 used;
> >         struct virtio_dma_item items[];
> > };
> >
> > Then we just need to pass one pointer to the virtio.
>
> Just to make sure I understand here, where did we store those arrays?


On the stack, or we can reuse the memory space of sq->sg.

dma = (struct virtio_dma_head*)sq->sg;


>
> > num is used to pass the size of items
> > used is used to return the num used by virtio core.
> >
> > >
> > > More below
> > >
> > > >
> > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > ---
> > > >  drivers/virtio/virtio_ring.c | 148 ++++++++++++++++++++++++++---------
> > > >  include/linux/virtio.h       |   2 +
> > > >  2 files changed, 115 insertions(+), 35 deletions(-)
> > > >
> > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > index 51d8f3299c10..0b3caee4ef9d 100644
> > > > --- a/drivers/virtio/virtio_ring.c
> > > > +++ b/drivers/virtio/virtio_ring.c
> > > > @@ -362,6 +362,20 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
> > > >         return vq->dma_dev;
> > > >  }
> > > >
> > > > +static void store_dma_to_sg(struct scatterlist **sgp, dma_addr_t addr, unsigned int length)
> > > > +{
> > > > +       struct scatterlist *sg;
> > > > +
> > > > +       sg = *sgp;
> > > > +
> > > > +       sg->dma_address = addr;
> > > > +       sg->length = length;
> > > > +
> > > > +       sg = sg_next(sg);
> > > > +
> > > > +       *sgp = sg;
> > > > +}
> > > > +
> > > >  /* Map one sg entry. */
> > > >  static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
> > > >                             enum dma_data_direction direction, dma_addr_t *addr)
> > > > @@ -441,12 +455,18 @@ static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
> > > >   */
> > > >
> > > >  static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > -                                          const struct vring_desc *desc)
> > > > +                                          const struct vring_desc *desc,
> > > > +                                          struct scatterlist **sg)
> > > >  {
> > > >         u16 flags;
> > > >
> > > > -       if (!vq->do_unmap)
> > > > +       if (!vq->do_unmap) {
> > > > +               if (*sg)
> > >
> > > Can we simply move the
> > >
> > > if (*sg) to store_dma_to_sg()?
> >
> > OK
> >
> >
> > >
> > > > +                       store_dma_to_sg(sg,
> > > > +                                       virtio64_to_cpu(vq->vq.vdev, desc->addr),
> > > > +                                       virtio32_to_cpu(vq->vq.vdev, desc->len));
> > > >                 return;
> > > > +       }
> > > >
> > > >         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> > > >
> > > > @@ -458,7 +478,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > >  }
> > > >
> > > >  static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > -                                         unsigned int i)
> > > > +                                         unsigned int i, struct scatterlist **sg)
> > > >  {
> > > >         struct vring_desc_extra *extra = vq->split.desc_extra;
> > > >         u16 flags;
> > > > @@ -475,8 +495,11 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > >                                  (flags & VRING_DESC_F_WRITE) ?
> > > >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > > >         } else {
> > > > -               if (!vq->do_unmap)
> > > > +               if (!vq->do_unmap) {
> > >
> > > The:
> > >
> > > else {
> > >       if {
> > >             if {
> > > }
> > >
> > > seems odd. I think I would prefer
> > >
> > > if (flags & VRING_DESC_F_INDIRECT) {
> > > } else if (!vq->do_unmap) {
> > > } else {
> > > }
> >
> >
> > Will fix.
> >
> >
> > >
> > > here
> > >
> > > Btw, I really think do_unmap is not a good name, we probably need to
> > > rename it as "unmap_desc".
> >
> > How about a separate patch for this?
> >
> > >
> > >
> > > > +                       if (*sg)
> > > > +                               store_dma_to_sg(sg, extra[i].addr, extra[i].len);
> > >
> > > In which case we need to unmap by driver but we don't need a dma address?
> >
> > Sorry, do not get it.
>
> I meant, I don't get how we can reach to this check.

This is the main way. I think you miss something.


>
> >
> > >
> > > >                         goto out;
> > > > +               }
> > > >
> > > >                 dma_unmap_page(vring_dma_dev(vq),
> > > >                                extra[i].addr,
> > > > @@ -717,10 +740,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> > > >                 if (i == err_idx)
> > > >                         break;
> > > >                 if (indirect) {
> > > > -                       vring_unmap_one_split_indirect(vq, &desc[i]);
> > > > +                       vring_unmap_one_split_indirect(vq, &desc[i], NULL);
> > > >                         i = virtio16_to_cpu(_vq->vdev, desc[i].next);
> > > >                 } else
> > > > -                       i = vring_unmap_one_split(vq, i);
> > > > +                       i = vring_unmap_one_split(vq, i, NULL);
> > > >         }
> > > >
> > > >  free_indirect:
> > > > @@ -763,7 +786,7 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
> > > >  }
> > > >
> > > >  static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > > -                            void **ctx)
> > > > +                            struct scatterlist *sg, void **ctx)
> > > >  {
> > > >         unsigned int i, j;
> > > >         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
> > > > @@ -775,12 +798,12 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > >         i = head;
> > > >
> > > >         while (vq->split.vring.desc[i].flags & nextflag) {
> > > > -               vring_unmap_one_split(vq, i);
> > > > +               vring_unmap_one_split(vq, i, &sg);
> > > >                 i = vq->split.desc_extra[i].next;
> > > >                 vq->vq.num_free++;
> > > >         }
> > > >
> > > > -       vring_unmap_one_split(vq, i);
> > > > +       vring_unmap_one_split(vq, i, &sg);
> > > >         vq->split.desc_extra[i].next = vq->free_head;
> > > >         vq->free_head = head;
> > > >
> > > > @@ -794,7 +817,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > >
> > > >                 /* Free the indirect table, if any, now that it's unmapped. */
> > > >                 if (!indir_desc)
> > > > -                       return;
> > > > +                       goto end;
> > > >
> > > >                 len = vq->split.desc_extra[head].len;
> > > >
> > > > @@ -802,9 +825,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > >                                 VRING_DESC_F_INDIRECT));
> > > >                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
> > > >
> > > > -               if (vq->do_unmap) {
> > > > +               if (vq->do_unmap || sg) {
> > > >                         for (j = 0; j < len / sizeof(struct vring_desc); j++)
> > > > -                               vring_unmap_one_split_indirect(vq, &indir_desc[j]);
> > > > +                               vring_unmap_one_split_indirect(vq, &indir_desc[j], &sg);
> > > > +
> > > >                 }
> > > >
> > > >                 kfree(indir_desc);
> > > > @@ -812,6 +836,11 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > >         } else if (ctx) {
> > > >                 *ctx = vq->split.desc_state[head].indir_desc;
> > > >         }
> > > > +
> > > > +end:
> > > > +       /* sg point to the next. So we mark the last one as the end. */
> > > > +       if (!vq->do_unmap && sg)
> > > > +               sg_mark_end(sg - 1);
> > > >  }
> > > >
> > > >  static bool more_used_split(const struct vring_virtqueue *vq)
> > > > @@ -822,6 +851,7 @@ static bool more_used_split(const struct vring_virtqueue *vq)
> > > >
> > > >  static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > > >                                          unsigned int *len,
> > > > +                                        struct scatterlist *sg,
> > > >                                          void **ctx)
> > > >  {
> > > >         struct vring_virtqueue *vq = to_vvq(_vq);
> > > > @@ -862,7 +892,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > > >
> > > >         /* detach_buf_split clears data, so grab it now. */
> > > >         ret = vq->split.desc_state[i].data;
> > > > -       detach_buf_split(vq, i, ctx);
> > > > +       detach_buf_split(vq, i, sg, ctx);
> > > >         vq->last_used_idx++;
> > > >         /* If we expect an interrupt for the next entry, tell host
> > > >          * by writing event index and flush out the write before
> > > > @@ -984,7 +1014,7 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
> > > >                         continue;
> > > >                 /* detach_buf_split clears data, so grab it now. */
> > > >                 buf = vq->split.desc_state[i].data;
> > > > -               detach_buf_split(vq, i, NULL);
> > > > +               detach_buf_split(vq, i, NULL, NULL);
> > > >                 vq->split.avail_idx_shadow--;
> > > >                 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
> > > >                                 vq->split.avail_idx_shadow);
> > > > @@ -1221,7 +1251,8 @@ static u16 packed_last_used(u16 last_used_idx)
> > > >  }
> > > >
> > > >  static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> > > > -                                    const struct vring_desc_extra *extra)
> > > > +                                    const struct vring_desc_extra *extra,
> > > > +                                    struct scatterlist **sg)
> > > >  {
> > > >         u16 flags;
> > > >
> > > > @@ -1236,8 +1267,11 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> > > >                                  (flags & VRING_DESC_F_WRITE) ?
> > > >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > > >         } else {
> > > > -               if (!vq->do_unmap)
> > > > +               if (!vq->do_unmap) {
> > > > +                       if (*sg)
> > > > +                               store_dma_to_sg(sg, extra->addr, extra->len);
> > > >                         return;
> > > > +               }
> > > >
> > > >                 dma_unmap_page(vring_dma_dev(vq),
> > > >                                extra->addr, extra->len,
> > > > @@ -1247,12 +1281,17 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> > > >  }
> > > >
> > > >  static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
> > > > -                                   const struct vring_packed_desc *desc)
> > > > +                                   const struct vring_packed_desc *desc,
> > > > +                                   struct scatterlist **sg)
> > > >  {
> > >
> > > Interesting, I think this is only needed for indirect descriptors?
> > >
> > > If yes, why do we care about the dma addresses of indirect descriptors?
> > > If not, it's a bug that we should use desc_extra, otherwise it's a
> > > device-triggerable unmap which has security implications (we need to
> > > use vring_extra in this case).
> >
> > Sorry, I do not get.
> >
> >                             indirect desc(alloc by virtio core)
> >        virtio-desc ------> |  | -------------------------------------> dma address of buffer
> >                            |  | -------------------------------------> dma address of buffer
> >                            |  | -------------------------------------> dma address of buffer
> >                            |  | -------------------------------------> dma address of buffer
> >                            |  | -------------------------------------> dma address of buffer
> >                            |  | -------------------------------------> dma address of buffer
> >
> > For vring_unmap_desc_packed the desc is the indirect desc(alloc by virtio core,
> > not the virtio desc), which record the dma address of buffer that is passed by
> > the driver. Here we need to record the dma address to back to the driver.
>
> I meant why should we return this to the driver? It is allocated by
> the vitio core and should be transparent to the driver.

Yes.

So here, we collect the buffer dma address not the address of the indirect desc
allocated by the virtio core.

I also think you miss something.

	static void detach_buf_packed(struct vring_virtqueue *vq,
				      unsigned int id, void **ctx)
	{
		struct vring_desc_state_packed *state = NULL;
		struct vring_packed_desc *desc;
		unsigned int i, curr;

		state = &vq->packed.desc_state[id];

		/* Clear data ptr. */
		state->data = NULL;

		vq->packed.desc_extra[state->last].next = vq->free_head;
		vq->free_head = id;
		vq->vq.num_free += state->num;

		if (unlikely(vq->do_unmap)) {
			curr = id;
			for (i = 0; i < state->num; i++) {
				vring_unmap_extra_packed(vq,
							 &vq->packed.desc_extra[curr]);
				curr = vq->packed.desc_extra[curr].next;
			}
		}

		if (vq->indirect) {
			u32 len;

			/* Free the indirect table, if any, now that it's unmapped. */
			desc = state->indir_desc;
			if (!desc)
				return;

			if (vq->do_unmap) {
				len = vq->packed.desc_extra[id].len;
				for (i = 0; i < len / sizeof(struct vring_packed_desc);
						i++)
-->					vring_unmap_desc_packed(vq, &desc[i]);
			}
			kfree(desc);
			state->indir_desc = NULL;
		} else if (ctx) {
			*ctx = state->indir_desc;
		}
	}

Thanks.

>
> Thanks
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma()
  2023-11-22  6:29         ` Xuan Zhuo
@ 2023-11-22  6:47           ` Jason Wang
  2023-11-22  6:51             ` Xuan Zhuo
  0 siblings, 1 reply; 19+ messages in thread
From: Jason Wang @ 2023-11-22  6:47 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: virtualization, Michael S . Tsirkin

On Wed, Nov 22, 2023 at 2:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Wed, 22 Nov 2023 14:03:32 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Tue, Nov 21, 2023 at 2:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Thu, 16 Nov 2023 16:11:11 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > On Tue, Nov 14, 2023 at 7:31 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > introduce virtqueue_get_buf_ctx_dma() to collect the dma info when
> > > > > get buf from virtio core for premapped mode.
> > > > >
> > > > > If the virtio queue is premapped mode, the virtio-net send buf may
> > > > > have many desc. Every desc dma address need to be unmap. So here we
> > > > > introduce a new helper to collect the dma address of the buffer from
> > > > > the virtio core.
> > > >
> > > > So looking at vring_desc_extra, what we have right now is:
> > > >
> > > > struct vring_desc_extra {
> > > >         dma_addr_t addr;                /* Descriptor DMA addr. */
> > > >         u32 len;                        /* Descriptor length. */
> > > >         u16 flags;                      /* Descriptor flags. */
> > > >         u16 next;                       /* The next desc state in a list. */
> > > > };
> > > >
> > > > And sg is
> > > >
> > > > struct scatterlist {
> > > >         unsigned long   page_link;
> > > > unsigned int    offset;
> > > >         unsigned int    length;
> > > >         dma_addr_t      dma_address;
> > > > #ifdef CONFIG_NEED_SG_DMA_LENGTH
> > > >         unsigned int    dma_length;
> > > > #endif
> > > > #ifdef CONFIG_NEED_SG_DMA_FLAGS
> > > >         unsigned int    dma_flags;
> > > > #endif
> > > > };
> > > >
> > > > Would it better just store sg?
> > >
> > > Do you mean we expose the vring_desc_extra to dirver?
> >
> > Maybe or not, (or just us sg for desc_extra).
> >
> > Anyhow, any method needs to be benchmarked to see the performance impact.
> >
> > >
> > > How about introducing such a new structure?
> > >
> > > struct virtio_dma_item {
> > >          dma_addr_t addr;
> > >          u32 len;
> > > };
> > >
> > > struct virtio_dma_head {
> > >         u32 num;
> > >         u32 used;
> > >         struct virtio_dma_item items[];
> > > };
> > >
> > > Then we just need to pass one pointer to the virtio.
> >
> > Just to make sure I understand here, where did we store those arrays?
>
>
> On the stack, or we can reuse the memory space of sq->sg.
>
> dma = (struct virtio_dma_head*)sq->sg;

Ok, we need some benchmark for sure.

>
>
> >
> > > num is used to pass the size of items
> > > used is used to return the num used by virtio core.
> > >
> > > >
> > > > More below
> > > >
> > > > >
> > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > ---
> > > > >  drivers/virtio/virtio_ring.c | 148 ++++++++++++++++++++++++++---------
> > > > >  include/linux/virtio.h       |   2 +
> > > > >  2 files changed, 115 insertions(+), 35 deletions(-)
> > > > >
> > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > index 51d8f3299c10..0b3caee4ef9d 100644
> > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > @@ -362,6 +362,20 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
> > > > >         return vq->dma_dev;
> > > > >  }
> > > > >
> > > > > +static void store_dma_to_sg(struct scatterlist **sgp, dma_addr_t addr, unsigned int length)
> > > > > +{
> > > > > +       struct scatterlist *sg;
> > > > > +
> > > > > +       sg = *sgp;
> > > > > +
> > > > > +       sg->dma_address = addr;
> > > > > +       sg->length = length;
> > > > > +
> > > > > +       sg = sg_next(sg);
> > > > > +
> > > > > +       *sgp = sg;
> > > > > +}
> > > > > +
> > > > >  /* Map one sg entry. */
> > > > >  static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
> > > > >                             enum dma_data_direction direction, dma_addr_t *addr)
> > > > > @@ -441,12 +455,18 @@ static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
> > > > >   */
> > > > >
> > > > >  static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > > -                                          const struct vring_desc *desc)
> > > > > +                                          const struct vring_desc *desc,
> > > > > +                                          struct scatterlist **sg)
> > > > >  {
> > > > >         u16 flags;
> > > > >
> > > > > -       if (!vq->do_unmap)
> > > > > +       if (!vq->do_unmap) {
> > > > > +               if (*sg)
> > > >
> > > > Can we simply move the
> > > >
> > > > if (*sg) to store_dma_to_sg()?
> > >
> > > OK
> > >
> > >
> > > >
> > > > > +                       store_dma_to_sg(sg,
> > > > > +                                       virtio64_to_cpu(vq->vq.vdev, desc->addr),
> > > > > +                                       virtio32_to_cpu(vq->vq.vdev, desc->len));
> > > > >                 return;
> > > > > +       }
> > > > >
> > > > >         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> > > > >
> > > > > @@ -458,7 +478,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > >  }
> > > > >
> > > > >  static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > > -                                         unsigned int i)
> > > > > +                                         unsigned int i, struct scatterlist **sg)
> > > > >  {
> > > > >         struct vring_desc_extra *extra = vq->split.desc_extra;
> > > > >         u16 flags;
> > > > > @@ -475,8 +495,11 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > >                                  (flags & VRING_DESC_F_WRITE) ?
> > > > >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > > > >         } else {
> > > > > -               if (!vq->do_unmap)
> > > > > +               if (!vq->do_unmap) {
> > > >
> > > > The:
> > > >
> > > > else {
> > > >       if {
> > > >             if {
> > > > }
> > > >
> > > > seems odd. I think I would prefer
> > > >
> > > > if (flags & VRING_DESC_F_INDIRECT) {
> > > > } else if (!vq->do_unmap) {
> > > > } else {
> > > > }
> > >
> > >
> > > Will fix.
> > >
> > >
> > > >
> > > > here
> > > >
> > > > Btw, I really think do_unmap is not a good name, we probably need to
> > > > rename it as "unmap_desc".
> > >
> > > How about a separate patch for this?
> > >
> > > >
> > > >
> > > > > +                       if (*sg)
> > > > > +                               store_dma_to_sg(sg, extra[i].addr, extra[i].len);
> > > >
> > > > In which case we need to unmap by driver but we don't need a dma address?
> > >
> > > Sorry, do not get it.
> >
> > I meant, I don't get how we can reach to this check.
>
> This is the main way. I think you miss something.

Or maybe you can explain the case where vq->do_unmap is false but *sg is NULL?

>
>
> >
> > >
> > > >
> > > > >                         goto out;
> > > > > +               }
> > > > >
> > > > >                 dma_unmap_page(vring_dma_dev(vq),
> > > > >                                extra[i].addr,
> > > > > @@ -717,10 +740,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> > > > >                 if (i == err_idx)
> > > > >                         break;
> > > > >                 if (indirect) {
> > > > > -                       vring_unmap_one_split_indirect(vq, &desc[i]);
> > > > > +                       vring_unmap_one_split_indirect(vq, &desc[i], NULL);
> > > > >                         i = virtio16_to_cpu(_vq->vdev, desc[i].next);
> > > > >                 } else
> > > > > -                       i = vring_unmap_one_split(vq, i);
> > > > > +                       i = vring_unmap_one_split(vq, i, NULL);
> > > > >         }
> > > > >
> > > > >  free_indirect:
> > > > > @@ -763,7 +786,7 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
> > > > >  }
> > > > >
> > > > >  static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > > > -                            void **ctx)
> > > > > +                            struct scatterlist *sg, void **ctx)
> > > > >  {
> > > > >         unsigned int i, j;
> > > > >         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
> > > > > @@ -775,12 +798,12 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > > >         i = head;
> > > > >
> > > > >         while (vq->split.vring.desc[i].flags & nextflag) {
> > > > > -               vring_unmap_one_split(vq, i);
> > > > > +               vring_unmap_one_split(vq, i, &sg);
> > > > >                 i = vq->split.desc_extra[i].next;
> > > > >                 vq->vq.num_free++;
> > > > >         }
> > > > >
> > > > > -       vring_unmap_one_split(vq, i);
> > > > > +       vring_unmap_one_split(vq, i, &sg);
> > > > >         vq->split.desc_extra[i].next = vq->free_head;
> > > > >         vq->free_head = head;
> > > > >
> > > > > @@ -794,7 +817,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > > >
> > > > >                 /* Free the indirect table, if any, now that it's unmapped. */
> > > > >                 if (!indir_desc)
> > > > > -                       return;
> > > > > +                       goto end;
> > > > >
> > > > >                 len = vq->split.desc_extra[head].len;
> > > > >
> > > > > @@ -802,9 +825,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > > >                                 VRING_DESC_F_INDIRECT));
> > > > >                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
> > > > >
> > > > > -               if (vq->do_unmap) {
> > > > > +               if (vq->do_unmap || sg) {
> > > > >                         for (j = 0; j < len / sizeof(struct vring_desc); j++)
> > > > > -                               vring_unmap_one_split_indirect(vq, &indir_desc[j]);
> > > > > +                               vring_unmap_one_split_indirect(vq, &indir_desc[j], &sg);
> > > > > +
> > > > >                 }
> > > > >
> > > > >                 kfree(indir_desc);
> > > > > @@ -812,6 +836,11 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > > >         } else if (ctx) {
> > > > >                 *ctx = vq->split.desc_state[head].indir_desc;
> > > > >         }
> > > > > +
> > > > > +end:
> > > > > +       /* sg point to the next. So we mark the last one as the end. */
> > > > > +       if (!vq->do_unmap && sg)
> > > > > +               sg_mark_end(sg - 1);
> > > > >  }
> > > > >
> > > > >  static bool more_used_split(const struct vring_virtqueue *vq)
> > > > > @@ -822,6 +851,7 @@ static bool more_used_split(const struct vring_virtqueue *vq)
> > > > >
> > > > >  static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > > > >                                          unsigned int *len,
> > > > > +                                        struct scatterlist *sg,
> > > > >                                          void **ctx)
> > > > >  {
> > > > >         struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > @@ -862,7 +892,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > > > >
> > > > >         /* detach_buf_split clears data, so grab it now. */
> > > > >         ret = vq->split.desc_state[i].data;
> > > > > -       detach_buf_split(vq, i, ctx);
> > > > > +       detach_buf_split(vq, i, sg, ctx);
> > > > >         vq->last_used_idx++;
> > > > >         /* If we expect an interrupt for the next entry, tell host
> > > > >          * by writing event index and flush out the write before
> > > > > @@ -984,7 +1014,7 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
> > > > >                         continue;
> > > > >                 /* detach_buf_split clears data, so grab it now. */
> > > > >                 buf = vq->split.desc_state[i].data;
> > > > > -               detach_buf_split(vq, i, NULL);
> > > > > +               detach_buf_split(vq, i, NULL, NULL);
> > > > >                 vq->split.avail_idx_shadow--;
> > > > >                 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
> > > > >                                 vq->split.avail_idx_shadow);
> > > > > @@ -1221,7 +1251,8 @@ static u16 packed_last_used(u16 last_used_idx)
> > > > >  }
> > > > >
> > > > >  static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> > > > > -                                    const struct vring_desc_extra *extra)
> > > > > +                                    const struct vring_desc_extra *extra,
> > > > > +                                    struct scatterlist **sg)
> > > > >  {
> > > > >         u16 flags;
> > > > >
> > > > > @@ -1236,8 +1267,11 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> > > > >                                  (flags & VRING_DESC_F_WRITE) ?
> > > > >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > > > >         } else {
> > > > > -               if (!vq->do_unmap)
> > > > > +               if (!vq->do_unmap) {
> > > > > +                       if (*sg)
> > > > > +                               store_dma_to_sg(sg, extra->addr, extra->len);
> > > > >                         return;
> > > > > +               }
> > > > >
> > > > >                 dma_unmap_page(vring_dma_dev(vq),
> > > > >                                extra->addr, extra->len,
> > > > > @@ -1247,12 +1281,17 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> > > > >  }
> > > > >
> > > > >  static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
> > > > > -                                   const struct vring_packed_desc *desc)
> > > > > +                                   const struct vring_packed_desc *desc,
> > > > > +                                   struct scatterlist **sg)
> > > > >  {
> > > >
> > > > Interesting, I think this is only needed for indirect descriptors?
> > > >
> > > > If yes, why do we care about the dma addresses of indirect descriptors?
> > > > If not, it's a bug that we should use desc_extra, otherwise it's a
> > > > device-triggerable unmap which has security implications (we need to
> > > > use vring_extra in this case).
> > >
> > > Sorry, I do not get.
> > >
> > >                             indirect desc(alloc by virtio core)
> > >        virtio-desc ------> |  | -------------------------------------> dma address of buffer
> > >                            |  | -------------------------------------> dma address of buffer
> > >                            |  | -------------------------------------> dma address of buffer
> > >                            |  | -------------------------------------> dma address of buffer
> > >                            |  | -------------------------------------> dma address of buffer
> > >                            |  | -------------------------------------> dma address of buffer
> > >
> > > For vring_unmap_desc_packed the desc is the indirect desc(alloc by virtio core,
> > > not the virtio desc), which record the dma address of buffer that is passed by
> > > the driver. Here we need to record the dma address to back to the driver.
> >
> > I meant why should we return this to the driver? It is allocated by
> > the vitio core and should be transparent to the driver.
>
> Yes.
>
> So here, we collect the buffer dma address not the address of the indirect desc
> allocated by the virtio core.
>
> I also think you miss something.

Probably, I guess it works since it is only used for unmapping
indirect descriptors. So indirect descriptors using stream DMA mapping
where we don't need vring_extra.

Renaming it to something like vring_unmap_packed_indirect() might be better.

Thanks

>
>         static void detach_buf_packed(struct vring_virtqueue *vq,
>                                       unsigned int id, void **ctx)
>         {
>                 struct vring_desc_state_packed *state = NULL;
>                 struct vring_packed_desc *desc;
>                 unsigned int i, curr;
>
>                 state = &vq->packed.desc_state[id];
>
>                 /* Clear data ptr. */
>                 state->data = NULL;
>
>                 vq->packed.desc_extra[state->last].next = vq->free_head;
>                 vq->free_head = id;
>                 vq->vq.num_free += state->num;
>
>                 if (unlikely(vq->do_unmap)) {
>                         curr = id;
>                         for (i = 0; i < state->num; i++) {
>                                 vring_unmap_extra_packed(vq,
>                                                          &vq->packed.desc_extra[curr]);
>                                 curr = vq->packed.desc_extra[curr].next;
>                         }
>                 }
>
>                 if (vq->indirect) {
>                         u32 len;
>
>                         /* Free the indirect table, if any, now that it's unmapped. */
>                         desc = state->indir_desc;
>                         if (!desc)
>                                 return;
>
>                         if (vq->do_unmap) {
>                                 len = vq->packed.desc_extra[id].len;
>                                 for (i = 0; i < len / sizeof(struct vring_packed_desc);
>                                                 i++)
> -->                                     vring_unmap_desc_packed(vq, &desc[i]);
>                         }
>                         kfree(desc);
>                         state->indir_desc = NULL;
>                 } else if (ctx) {
>                         *ctx = state->indir_desc;
>                 }
>         }
>
> Thanks.
>
> >
> > Thanks
> >
>


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma()
  2023-11-22  6:47           ` Jason Wang
@ 2023-11-22  6:51             ` Xuan Zhuo
  2023-11-24  4:14               ` Jason Wang
  0 siblings, 1 reply; 19+ messages in thread
From: Xuan Zhuo @ 2023-11-22  6:51 UTC (permalink / raw)
  To: Jason Wang; +Cc: virtualization, Michael S . Tsirkin

On Wed, 22 Nov 2023 14:47:02 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Wed, Nov 22, 2023 at 2:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Wed, 22 Nov 2023 14:03:32 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Tue, Nov 21, 2023 at 2:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Thu, 16 Nov 2023 16:11:11 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > On Tue, Nov 14, 2023 at 7:31 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > introduce virtqueue_get_buf_ctx_dma() to collect the dma info when
> > > > > > get buf from virtio core for premapped mode.
> > > > > >
> > > > > > If the virtio queue is premapped mode, the virtio-net send buf may
> > > > > > have many desc. Every desc dma address need to be unmap. So here we
> > > > > > introduce a new helper to collect the dma address of the buffer from
> > > > > > the virtio core.
> > > > >
> > > > > So looking at vring_desc_extra, what we have right now is:
> > > > >
> > > > > struct vring_desc_extra {
> > > > >         dma_addr_t addr;                /* Descriptor DMA addr. */
> > > > >         u32 len;                        /* Descriptor length. */
> > > > >         u16 flags;                      /* Descriptor flags. */
> > > > >         u16 next;                       /* The next desc state in a list. */
> > > > > };
> > > > >
> > > > > And sg is
> > > > >
> > > > > struct scatterlist {
> > > > >         unsigned long   page_link;
> > > > > unsigned int    offset;
> > > > >         unsigned int    length;
> > > > >         dma_addr_t      dma_address;
> > > > > #ifdef CONFIG_NEED_SG_DMA_LENGTH
> > > > >         unsigned int    dma_length;
> > > > > #endif
> > > > > #ifdef CONFIG_NEED_SG_DMA_FLAGS
> > > > >         unsigned int    dma_flags;
> > > > > #endif
> > > > > };
> > > > >
> > > > > Would it better just store sg?
> > > >
> > > > Do you mean we expose the vring_desc_extra to dirver?
> > >
> > > Maybe or not, (or just us sg for desc_extra).
> > >
> > > Anyhow, any method needs to be benchmarked to see the performance impact.
> > >
> > > >
> > > > How about introducing such a new structure?
> > > >
> > > > struct virtio_dma_item {
> > > >          dma_addr_t addr;
> > > >          u32 len;
> > > > };
> > > >
> > > > struct virtio_dma_head {
> > > >         u32 num;
> > > >         u32 used;
> > > >         struct virtio_dma_item items[];
> > > > };
> > > >
> > > > Then we just need to pass one pointer to the virtio.
> > >
> > > Just to make sure I understand here, where did we store those arrays?
> >
> >
> > On the stack, or we can reuse the memory space of sq->sg.
> >
> > dma = (struct virtio_dma_head*)sq->sg;
>
> Ok, we need some benchmark for sure.
>
> >
> >
> > >
> > > > num is used to pass the size of items
> > > > used is used to return the num used by virtio core.
> > > >
> > > > >
> > > > > More below
> > > > >
> > > > > >
> > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > ---
> > > > > >  drivers/virtio/virtio_ring.c | 148 ++++++++++++++++++++++++++---------
> > > > > >  include/linux/virtio.h       |   2 +
> > > > > >  2 files changed, 115 insertions(+), 35 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > index 51d8f3299c10..0b3caee4ef9d 100644
> > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > @@ -362,6 +362,20 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
> > > > > >         return vq->dma_dev;
> > > > > >  }
> > > > > >
> > > > > > +static void store_dma_to_sg(struct scatterlist **sgp, dma_addr_t addr, unsigned int length)
> > > > > > +{
> > > > > > +       struct scatterlist *sg;
> > > > > > +
> > > > > > +       sg = *sgp;
> > > > > > +
> > > > > > +       sg->dma_address = addr;
> > > > > > +       sg->length = length;
> > > > > > +
> > > > > > +       sg = sg_next(sg);
> > > > > > +
> > > > > > +       *sgp = sg;
> > > > > > +}
> > > > > > +
> > > > > >  /* Map one sg entry. */
> > > > > >  static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
> > > > > >                             enum dma_data_direction direction, dma_addr_t *addr)
> > > > > > @@ -441,12 +455,18 @@ static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
> > > > > >   */
> > > > > >
> > > > > >  static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > > > -                                          const struct vring_desc *desc)
> > > > > > +                                          const struct vring_desc *desc,
> > > > > > +                                          struct scatterlist **sg)
> > > > > >  {
> > > > > >         u16 flags;
> > > > > >
> > > > > > -       if (!vq->do_unmap)
> > > > > > +       if (!vq->do_unmap) {
> > > > > > +               if (*sg)
> > > > >
> > > > > Can we simply move the
> > > > >
> > > > > if (*sg) to store_dma_to_sg()?
> > > >
> > > > OK
> > > >
> > > >
> > > > >
> > > > > > +                       store_dma_to_sg(sg,
> > > > > > +                                       virtio64_to_cpu(vq->vq.vdev, desc->addr),
> > > > > > +                                       virtio32_to_cpu(vq->vq.vdev, desc->len));
> > > > > >                 return;
> > > > > > +       }
> > > > > >
> > > > > >         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> > > > > >
> > > > > > @@ -458,7 +478,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > > >  }
> > > > > >
> > > > > >  static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > > > -                                         unsigned int i)
> > > > > > +                                         unsigned int i, struct scatterlist **sg)
> > > > > >  {
> > > > > >         struct vring_desc_extra *extra = vq->split.desc_extra;
> > > > > >         u16 flags;
> > > > > > @@ -475,8 +495,11 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > > >                                  (flags & VRING_DESC_F_WRITE) ?
> > > > > >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > > > > >         } else {
> > > > > > -               if (!vq->do_unmap)
> > > > > > +               if (!vq->do_unmap) {
> > > > >
> > > > > The:
> > > > >
> > > > > else {
> > > > >       if {
> > > > >             if {
> > > > > }
> > > > >
> > > > > seems odd. I think I would prefer
> > > > >
> > > > > if (flags & VRING_DESC_F_INDIRECT) {
> > > > > } else if (!vq->do_unmap) {
> > > > > } else {
> > > > > }
> > > >
> > > >
> > > > Will fix.
> > > >
> > > >
> > > > >
> > > > > here
> > > > >
> > > > > Btw, I really think do_unmap is not a good name, we probably need to
> > > > > rename it as "unmap_desc".
> > > >
> > > > How about a separate patch for this?
> > > >
> > > > >
> > > > >
> > > > > > +                       if (*sg)
> > > > > > +                               store_dma_to_sg(sg, extra[i].addr, extra[i].len);
> > > > >
> > > > > In which case we need to unmap by driver but we don't need a dma address?
> > > >
> > > > Sorry, do not get it.
> > >
> > > I meant, I don't get how we can reach to this check.
> >
> > This is the main way. I think you miss something.
>
> Or maybe you can explain the case where vq->do_unmap is false but *sg is NULL?


If vq->do_unmap is false, that is premapped mode or use_dma_api is false.
If the *sg is null, that means that the driver call the API
virtqueue_get_buf_ctx or virtqueue_get_buf.
If the *sg is not null, that means that the driver call the API

	void *virtqueue_get_buf_ctx_dma(struct virtqueue *_vq, unsigned int *len,
					struct scatterlist *sg, void **ctx);

Thanks.

>
> >
> >
> > >
> > > >
> > > > >
> > > > > >                         goto out;
> > > > > > +               }
> > > > > >
> > > > > >                 dma_unmap_page(vring_dma_dev(vq),
> > > > > >                                extra[i].addr,
> > > > > > @@ -717,10 +740,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> > > > > >                 if (i == err_idx)
> > > > > >                         break;
> > > > > >                 if (indirect) {
> > > > > > -                       vring_unmap_one_split_indirect(vq, &desc[i]);
> > > > > > +                       vring_unmap_one_split_indirect(vq, &desc[i], NULL);
> > > > > >                         i = virtio16_to_cpu(_vq->vdev, desc[i].next);
> > > > > >                 } else
> > > > > > -                       i = vring_unmap_one_split(vq, i);
> > > > > > +                       i = vring_unmap_one_split(vq, i, NULL);
> > > > > >         }
> > > > > >
> > > > > >  free_indirect:
> > > > > > @@ -763,7 +786,7 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
> > > > > >  }
> > > > > >
> > > > > >  static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > > > > -                            void **ctx)
> > > > > > +                            struct scatterlist *sg, void **ctx)
> > > > > >  {
> > > > > >         unsigned int i, j;
> > > > > >         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
> > > > > > @@ -775,12 +798,12 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > > > >         i = head;
> > > > > >
> > > > > >         while (vq->split.vring.desc[i].flags & nextflag) {
> > > > > > -               vring_unmap_one_split(vq, i);
> > > > > > +               vring_unmap_one_split(vq, i, &sg);
> > > > > >                 i = vq->split.desc_extra[i].next;
> > > > > >                 vq->vq.num_free++;
> > > > > >         }
> > > > > >
> > > > > > -       vring_unmap_one_split(vq, i);
> > > > > > +       vring_unmap_one_split(vq, i, &sg);
> > > > > >         vq->split.desc_extra[i].next = vq->free_head;
> > > > > >         vq->free_head = head;
> > > > > >
> > > > > > @@ -794,7 +817,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > > > >
> > > > > >                 /* Free the indirect table, if any, now that it's unmapped. */
> > > > > >                 if (!indir_desc)
> > > > > > -                       return;
> > > > > > +                       goto end;
> > > > > >
> > > > > >                 len = vq->split.desc_extra[head].len;
> > > > > >
> > > > > > @@ -802,9 +825,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > > > >                                 VRING_DESC_F_INDIRECT));
> > > > > >                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
> > > > > >
> > > > > > -               if (vq->do_unmap) {
> > > > > > +               if (vq->do_unmap || sg) {
> > > > > >                         for (j = 0; j < len / sizeof(struct vring_desc); j++)
> > > > > > -                               vring_unmap_one_split_indirect(vq, &indir_desc[j]);
> > > > > > +                               vring_unmap_one_split_indirect(vq, &indir_desc[j], &sg);
> > > > > > +
> > > > > >                 }
> > > > > >
> > > > > >                 kfree(indir_desc);
> > > > > > @@ -812,6 +836,11 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > > > > >         } else if (ctx) {
> > > > > >                 *ctx = vq->split.desc_state[head].indir_desc;
> > > > > >         }
> > > > > > +
> > > > > > +end:
> > > > > > +       /* sg point to the next. So we mark the last one as the end. */
> > > > > > +       if (!vq->do_unmap && sg)
> > > > > > +               sg_mark_end(sg - 1);
> > > > > >  }
> > > > > >
> > > > > >  static bool more_used_split(const struct vring_virtqueue *vq)
> > > > > > @@ -822,6 +851,7 @@ static bool more_used_split(const struct vring_virtqueue *vq)
> > > > > >
> > > > > >  static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > > > > >                                          unsigned int *len,
> > > > > > +                                        struct scatterlist *sg,
> > > > > >                                          void **ctx)
> > > > > >  {
> > > > > >         struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > @@ -862,7 +892,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > > > > >
> > > > > >         /* detach_buf_split clears data, so grab it now. */
> > > > > >         ret = vq->split.desc_state[i].data;
> > > > > > -       detach_buf_split(vq, i, ctx);
> > > > > > +       detach_buf_split(vq, i, sg, ctx);
> > > > > >         vq->last_used_idx++;
> > > > > >         /* If we expect an interrupt for the next entry, tell host
> > > > > >          * by writing event index and flush out the write before
> > > > > > @@ -984,7 +1014,7 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
> > > > > >                         continue;
> > > > > >                 /* detach_buf_split clears data, so grab it now. */
> > > > > >                 buf = vq->split.desc_state[i].data;
> > > > > > -               detach_buf_split(vq, i, NULL);
> > > > > > +               detach_buf_split(vq, i, NULL, NULL);
> > > > > >                 vq->split.avail_idx_shadow--;
> > > > > >                 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
> > > > > >                                 vq->split.avail_idx_shadow);
> > > > > > @@ -1221,7 +1251,8 @@ static u16 packed_last_used(u16 last_used_idx)
> > > > > >  }
> > > > > >
> > > > > >  static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> > > > > > -                                    const struct vring_desc_extra *extra)
> > > > > > +                                    const struct vring_desc_extra *extra,
> > > > > > +                                    struct scatterlist **sg)
> > > > > >  {
> > > > > >         u16 flags;
> > > > > >
> > > > > > @@ -1236,8 +1267,11 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> > > > > >                                  (flags & VRING_DESC_F_WRITE) ?
> > > > > >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > > > > >         } else {
> > > > > > -               if (!vq->do_unmap)
> > > > > > +               if (!vq->do_unmap) {
> > > > > > +                       if (*sg)
> > > > > > +                               store_dma_to_sg(sg, extra->addr, extra->len);
> > > > > >                         return;
> > > > > > +               }
> > > > > >
> > > > > >                 dma_unmap_page(vring_dma_dev(vq),
> > > > > >                                extra->addr, extra->len,
> > > > > > @@ -1247,12 +1281,17 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> > > > > >  }
> > > > > >
> > > > > >  static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
> > > > > > -                                   const struct vring_packed_desc *desc)
> > > > > > +                                   const struct vring_packed_desc *desc,
> > > > > > +                                   struct scatterlist **sg)
> > > > > >  {
> > > > >
> > > > > Interesting, I think this is only needed for indirect descriptors?
> > > > >
> > > > > If yes, why do we care about the dma addresses of indirect descriptors?
> > > > > If not, it's a bug that we should use desc_extra, otherwise it's a
> > > > > device-triggerable unmap which has security implications (we need to
> > > > > use vring_extra in this case).
> > > >
> > > > Sorry, I do not get.
> > > >
> > > >                             indirect desc(alloc by virtio core)
> > > >        virtio-desc ------> |  | -------------------------------------> dma address of buffer
> > > >                            |  | -------------------------------------> dma address of buffer
> > > >                            |  | -------------------------------------> dma address of buffer
> > > >                            |  | -------------------------------------> dma address of buffer
> > > >                            |  | -------------------------------------> dma address of buffer
> > > >                            |  | -------------------------------------> dma address of buffer
> > > >
> > > > For vring_unmap_desc_packed the desc is the indirect desc(alloc by virtio core,
> > > > not the virtio desc), which record the dma address of buffer that is passed by
> > > > the driver. Here we need to record the dma address to back to the driver.
> > >
> > > I meant why should we return this to the driver? It is allocated by
> > > the vitio core and should be transparent to the driver.
> >
> > Yes.
> >
> > So here, we collect the buffer dma address not the address of the indirect desc
> > allocated by the virtio core.
> >
> > I also think you miss something.
>
> Probably, I guess it works since it is only used for unmapping
> indirect descriptors. So indirect descriptors using stream DMA mapping
> where we don't need vring_extra.
>
> Renaming it to something like vring_unmap_packed_indirect() might be better.

YES.

Thanks.


>
> Thanks
>
> >
> >         static void detach_buf_packed(struct vring_virtqueue *vq,
> >                                       unsigned int id, void **ctx)
> >         {
> >                 struct vring_desc_state_packed *state = NULL;
> >                 struct vring_packed_desc *desc;
> >                 unsigned int i, curr;
> >
> >                 state = &vq->packed.desc_state[id];
> >
> >                 /* Clear data ptr. */
> >                 state->data = NULL;
> >
> >                 vq->packed.desc_extra[state->last].next = vq->free_head;
> >                 vq->free_head = id;
> >                 vq->vq.num_free += state->num;
> >
> >                 if (unlikely(vq->do_unmap)) {
> >                         curr = id;
> >                         for (i = 0; i < state->num; i++) {
> >                                 vring_unmap_extra_packed(vq,
> >                                                          &vq->packed.desc_extra[curr]);
> >                                 curr = vq->packed.desc_extra[curr].next;
> >                         }
> >                 }
> >
> >                 if (vq->indirect) {
> >                         u32 len;
> >
> >                         /* Free the indirect table, if any, now that it's unmapped. */
> >                         desc = state->indir_desc;
> >                         if (!desc)
> >                                 return;
> >
> >                         if (vq->do_unmap) {
> >                                 len = vq->packed.desc_extra[id].len;
> >                                 for (i = 0; i < len / sizeof(struct vring_packed_desc);
> >                                                 i++)
> > -->                                     vring_unmap_desc_packed(vq, &desc[i]);
> >                         }
> >                         kfree(desc);
> >                         state->indir_desc = NULL;
> >                 } else if (ctx) {
> >                         *ctx = state->indir_desc;
> >                 }
> >         }
> >
> > Thanks.
> >
> > >
> > > Thanks
> > >
> >
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma()
  2023-11-22  6:51             ` Xuan Zhuo
@ 2023-11-24  4:14               ` Jason Wang
  2023-11-24  5:51                 ` Xuan Zhuo
  0 siblings, 1 reply; 19+ messages in thread
From: Jason Wang @ 2023-11-24  4:14 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: virtualization, Michael S . Tsirkin

On Wed, Nov 22, 2023 at 2:57 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Wed, 22 Nov 2023 14:47:02 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Wed, Nov 22, 2023 at 2:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Wed, 22 Nov 2023 14:03:32 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > On Tue, Nov 21, 2023 at 2:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > On Thu, 16 Nov 2023 16:11:11 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > On Tue, Nov 14, 2023 at 7:31 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > >
> > > > > > > introduce virtqueue_get_buf_ctx_dma() to collect the dma info when
> > > > > > > get buf from virtio core for premapped mode.
> > > > > > >
> > > > > > > If the virtio queue is premapped mode, the virtio-net send buf may
> > > > > > > have many desc. Every desc dma address need to be unmap. So here we
> > > > > > > introduce a new helper to collect the dma address of the buffer from
> > > > > > > the virtio core.
> > > > > >
> > > > > > So looking at vring_desc_extra, what we have right now is:
> > > > > >
> > > > > > struct vring_desc_extra {
> > > > > >         dma_addr_t addr;                /* Descriptor DMA addr. */
> > > > > >         u32 len;                        /* Descriptor length. */
> > > > > >         u16 flags;                      /* Descriptor flags. */
> > > > > >         u16 next;                       /* The next desc state in a list. */
> > > > > > };
> > > > > >
> > > > > > And sg is
> > > > > >
> > > > > > struct scatterlist {
> > > > > >         unsigned long   page_link;
> > > > > > unsigned int    offset;
> > > > > >         unsigned int    length;
> > > > > >         dma_addr_t      dma_address;
> > > > > > #ifdef CONFIG_NEED_SG_DMA_LENGTH
> > > > > >         unsigned int    dma_length;
> > > > > > #endif
> > > > > > #ifdef CONFIG_NEED_SG_DMA_FLAGS
> > > > > >         unsigned int    dma_flags;
> > > > > > #endif
> > > > > > };
> > > > > >
> > > > > > Would it better just store sg?
> > > > >
> > > > > Do you mean we expose the vring_desc_extra to dirver?
> > > >
> > > > Maybe or not, (or just us sg for desc_extra).
> > > >
> > > > Anyhow, any method needs to be benchmarked to see the performance impact.
> > > >
> > > > >
> > > > > How about introducing such a new structure?
> > > > >
> > > > > struct virtio_dma_item {
> > > > >          dma_addr_t addr;
> > > > >          u32 len;
> > > > > };
> > > > >
> > > > > struct virtio_dma_head {
> > > > >         u32 num;
> > > > >         u32 used;
> > > > >         struct virtio_dma_item items[];
> > > > > };
> > > > >
> > > > > Then we just need to pass one pointer to the virtio.
> > > >
> > > > Just to make sure I understand here, where did we store those arrays?
> > >
> > >
> > > On the stack, or we can reuse the memory space of sq->sg.
> > >
> > > dma = (struct virtio_dma_head*)sq->sg;
> >
> > Ok, we need some benchmark for sure.
> >
> > >
> > >
> > > >
> > > > > num is used to pass the size of items
> > > > > used is used to return the num used by virtio core.
> > > > >
> > > > > >
> > > > > > More below
> > > > > >
> > > > > > >
> > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > ---
> > > > > > >  drivers/virtio/virtio_ring.c | 148 ++++++++++++++++++++++++++---------
> > > > > > >  include/linux/virtio.h       |   2 +
> > > > > > >  2 files changed, 115 insertions(+), 35 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > index 51d8f3299c10..0b3caee4ef9d 100644
> > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > @@ -362,6 +362,20 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
> > > > > > >         return vq->dma_dev;
> > > > > > >  }
> > > > > > >
> > > > > > > +static void store_dma_to_sg(struct scatterlist **sgp, dma_addr_t addr, unsigned int length)
> > > > > > > +{
> > > > > > > +       struct scatterlist *sg;
> > > > > > > +
> > > > > > > +       sg = *sgp;
> > > > > > > +
> > > > > > > +       sg->dma_address = addr;
> > > > > > > +       sg->length = length;
> > > > > > > +
> > > > > > > +       sg = sg_next(sg);
> > > > > > > +
> > > > > > > +       *sgp = sg;
> > > > > > > +}
> > > > > > > +
> > > > > > >  /* Map one sg entry. */
> > > > > > >  static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
> > > > > > >                             enum dma_data_direction direction, dma_addr_t *addr)
> > > > > > > @@ -441,12 +455,18 @@ static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
> > > > > > >   */
> > > > > > >
> > > > > > >  static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > > > > -                                          const struct vring_desc *desc)
> > > > > > > +                                          const struct vring_desc *desc,
> > > > > > > +                                          struct scatterlist **sg)
> > > > > > >  {
> > > > > > >         u16 flags;
> > > > > > >
> > > > > > > -       if (!vq->do_unmap)
> > > > > > > +       if (!vq->do_unmap) {
> > > > > > > +               if (*sg)
> > > > > >
> > > > > > Can we simply move the
> > > > > >
> > > > > > if (*sg) to store_dma_to_sg()?
> > > > >
> > > > > OK
> > > > >
> > > > >
> > > > > >
> > > > > > > +                       store_dma_to_sg(sg,
> > > > > > > +                                       virtio64_to_cpu(vq->vq.vdev, desc->addr),
> > > > > > > +                                       virtio32_to_cpu(vq->vq.vdev, desc->len));
> > > > > > >                 return;
> > > > > > > +       }
> > > > > > >
> > > > > > >         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> > > > > > >
> > > > > > > @@ -458,7 +478,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > > > >  }
> > > > > > >
> > > > > > >  static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > > > > -                                         unsigned int i)
> > > > > > > +                                         unsigned int i, struct scatterlist **sg)
> > > > > > >  {
> > > > > > >         struct vring_desc_extra *extra = vq->split.desc_extra;
> > > > > > >         u16 flags;
> > > > > > > @@ -475,8 +495,11 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > > > >                                  (flags & VRING_DESC_F_WRITE) ?
> > > > > > >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > > > > > >         } else {
> > > > > > > -               if (!vq->do_unmap)
> > > > > > > +               if (!vq->do_unmap) {
> > > > > >
> > > > > > The:
> > > > > >
> > > > > > else {
> > > > > >       if {
> > > > > >             if {
> > > > > > }
> > > > > >
> > > > > > seems odd. I think I would prefer
> > > > > >
> > > > > > if (flags & VRING_DESC_F_INDIRECT) {
> > > > > > } else if (!vq->do_unmap) {
> > > > > > } else {
> > > > > > }
> > > > >
> > > > >
> > > > > Will fix.
> > > > >
> > > > >
> > > > > >
> > > > > > here
> > > > > >
> > > > > > Btw, I really think do_unmap is not a good name, we probably need to
> > > > > > rename it as "unmap_desc".
> > > > >
> > > > > How about a separate patch for this?
> > > > >
> > > > > >
> > > > > >
> > > > > > > +                       if (*sg)
> > > > > > > +                               store_dma_to_sg(sg, extra[i].addr, extra[i].len);
> > > > > >
> > > > > > In which case we need to unmap by driver but we don't need a dma address?
> > > > >
> > > > > Sorry, do not get it.
> > > >
> > > > I meant, I don't get how we can reach to this check.
> > >
> > > This is the main way. I think you miss something.
> >
> > Or maybe you can explain the case where vq->do_unmap is false but *sg is NULL?
>
>
> If vq->do_unmap is false, that is premapped mode or use_dma_api is false.
> If the *sg is null, that means that the driver call the API
> virtqueue_get_buf_ctx or virtqueue_get_buf.
> If the *sg is not null, that means that the driver call the API
>
>         void *virtqueue_get_buf_ctx_dma(struct virtqueue *_vq, unsigned int *len,
>                                         struct scatterlist *sg, void **ctx);
>
> Thanks.

Ok, but my question still:

1) you explain one possible way here is use_dma_api is false and the
driver doesn't call virtqueue_get_buf_ctx_dma().
2) what happens if the driver uses a preamp but doesn't use
virtqueue_get_buf_ctx_dma()? Should we warn in this case?

Thanks


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma()
  2023-11-24  4:14               ` Jason Wang
@ 2023-11-24  5:51                 ` Xuan Zhuo
  2023-11-24  6:00                   ` Jason Wang
  0 siblings, 1 reply; 19+ messages in thread
From: Xuan Zhuo @ 2023-11-24  5:51 UTC (permalink / raw)
  To: Jason Wang; +Cc: virtualization, Michael S . Tsirkin

On Fri, 24 Nov 2023 12:14:57 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Wed, Nov 22, 2023 at 2:57 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Wed, 22 Nov 2023 14:47:02 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Wed, Nov 22, 2023 at 2:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Wed, 22 Nov 2023 14:03:32 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > On Tue, Nov 21, 2023 at 2:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > On Thu, 16 Nov 2023 16:11:11 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > On Tue, Nov 14, 2023 at 7:31 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > >
> > > > > > > > introduce virtqueue_get_buf_ctx_dma() to collect the dma info when
> > > > > > > > get buf from virtio core for premapped mode.
> > > > > > > >
> > > > > > > > If the virtio queue is premapped mode, the virtio-net send buf may
> > > > > > > > have many desc. Every desc dma address need to be unmap. So here we
> > > > > > > > introduce a new helper to collect the dma address of the buffer from
> > > > > > > > the virtio core.
> > > > > > >
> > > > > > > So looking at vring_desc_extra, what we have right now is:
> > > > > > >
> > > > > > > struct vring_desc_extra {
> > > > > > >         dma_addr_t addr;                /* Descriptor DMA addr. */
> > > > > > >         u32 len;                        /* Descriptor length. */
> > > > > > >         u16 flags;                      /* Descriptor flags. */
> > > > > > >         u16 next;                       /* The next desc state in a list. */
> > > > > > > };
> > > > > > >
> > > > > > > And sg is
> > > > > > >
> > > > > > > struct scatterlist {
> > > > > > >         unsigned long   page_link;
> > > > > > > unsigned int    offset;
> > > > > > >         unsigned int    length;
> > > > > > >         dma_addr_t      dma_address;
> > > > > > > #ifdef CONFIG_NEED_SG_DMA_LENGTH
> > > > > > >         unsigned int    dma_length;
> > > > > > > #endif
> > > > > > > #ifdef CONFIG_NEED_SG_DMA_FLAGS
> > > > > > >         unsigned int    dma_flags;
> > > > > > > #endif
> > > > > > > };
> > > > > > >
> > > > > > > Would it better just store sg?
> > > > > >
> > > > > > Do you mean we expose the vring_desc_extra to dirver?
> > > > >
> > > > > Maybe or not, (or just us sg for desc_extra).
> > > > >
> > > > > Anyhow, any method needs to be benchmarked to see the performance impact.
> > > > >
> > > > > >
> > > > > > How about introducing such a new structure?
> > > > > >
> > > > > > struct virtio_dma_item {
> > > > > >          dma_addr_t addr;
> > > > > >          u32 len;
> > > > > > };
> > > > > >
> > > > > > struct virtio_dma_head {
> > > > > >         u32 num;
> > > > > >         u32 used;
> > > > > >         struct virtio_dma_item items[];
> > > > > > };
> > > > > >
> > > > > > Then we just need to pass one pointer to the virtio.
> > > > >
> > > > > Just to make sure I understand here, where did we store those arrays?
> > > >
> > > >
> > > > On the stack, or we can reuse the memory space of sq->sg.
> > > >
> > > > dma = (struct virtio_dma_head*)sq->sg;
> > >
> > > Ok, we need some benchmark for sure.
> > >
> > > >
> > > >
> > > > >
> > > > > > num is used to pass the size of items
> > > > > > used is used to return the num used by virtio core.
> > > > > >
> > > > > > >
> > > > > > > More below
> > > > > > >
> > > > > > > >
> > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > ---
> > > > > > > >  drivers/virtio/virtio_ring.c | 148 ++++++++++++++++++++++++++---------
> > > > > > > >  include/linux/virtio.h       |   2 +
> > > > > > > >  2 files changed, 115 insertions(+), 35 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > index 51d8f3299c10..0b3caee4ef9d 100644
> > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > @@ -362,6 +362,20 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
> > > > > > > >         return vq->dma_dev;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > +static void store_dma_to_sg(struct scatterlist **sgp, dma_addr_t addr, unsigned int length)
> > > > > > > > +{
> > > > > > > > +       struct scatterlist *sg;
> > > > > > > > +
> > > > > > > > +       sg = *sgp;
> > > > > > > > +
> > > > > > > > +       sg->dma_address = addr;
> > > > > > > > +       sg->length = length;
> > > > > > > > +
> > > > > > > > +       sg = sg_next(sg);
> > > > > > > > +
> > > > > > > > +       *sgp = sg;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >  /* Map one sg entry. */
> > > > > > > >  static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
> > > > > > > >                             enum dma_data_direction direction, dma_addr_t *addr)
> > > > > > > > @@ -441,12 +455,18 @@ static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
> > > > > > > >   */
> > > > > > > >
> > > > > > > >  static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > > > > > -                                          const struct vring_desc *desc)
> > > > > > > > +                                          const struct vring_desc *desc,
> > > > > > > > +                                          struct scatterlist **sg)
> > > > > > > >  {
> > > > > > > >         u16 flags;
> > > > > > > >
> > > > > > > > -       if (!vq->do_unmap)
> > > > > > > > +       if (!vq->do_unmap) {
> > > > > > > > +               if (*sg)
> > > > > > >
> > > > > > > Can we simply move the
> > > > > > >
> > > > > > > if (*sg) to store_dma_to_sg()?
> > > > > >
> > > > > > OK
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > > +                       store_dma_to_sg(sg,
> > > > > > > > +                                       virtio64_to_cpu(vq->vq.vdev, desc->addr),
> > > > > > > > +                                       virtio32_to_cpu(vq->vq.vdev, desc->len));
> > > > > > > >                 return;
> > > > > > > > +       }
> > > > > > > >
> > > > > > > >         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> > > > > > > >
> > > > > > > > @@ -458,7 +478,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > > > > >  }
> > > > > > > >
> > > > > > > >  static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > > > > > -                                         unsigned int i)
> > > > > > > > +                                         unsigned int i, struct scatterlist **sg)
> > > > > > > >  {
> > > > > > > >         struct vring_desc_extra *extra = vq->split.desc_extra;
> > > > > > > >         u16 flags;
> > > > > > > > @@ -475,8 +495,11 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > > > > >                                  (flags & VRING_DESC_F_WRITE) ?
> > > > > > > >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > > > > > > >         } else {
> > > > > > > > -               if (!vq->do_unmap)
> > > > > > > > +               if (!vq->do_unmap) {
> > > > > > >
> > > > > > > The:
> > > > > > >
> > > > > > > else {
> > > > > > >       if {
> > > > > > >             if {
> > > > > > > }
> > > > > > >
> > > > > > > seems odd. I think I would prefer
> > > > > > >
> > > > > > > if (flags & VRING_DESC_F_INDIRECT) {
> > > > > > > } else if (!vq->do_unmap) {
> > > > > > > } else {
> > > > > > > }
> > > > > >
> > > > > >
> > > > > > Will fix.
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > here
> > > > > > >
> > > > > > > Btw, I really think do_unmap is not a good name, we probably need to
> > > > > > > rename it as "unmap_desc".
> > > > > >
> > > > > > How about a separate patch for this?
> > > > > >
> > > > > > >
> > > > > > >
> > > > > > > > +                       if (*sg)
> > > > > > > > +                               store_dma_to_sg(sg, extra[i].addr, extra[i].len);
> > > > > > >
> > > > > > > In which case we need to unmap by driver but we don't need a dma address?
> > > > > >
> > > > > > Sorry, do not get it.
> > > > >
> > > > > I meant, I don't get how we can reach to this check.
> > > >
> > > > This is the main way. I think you miss something.
> > >
> > > Or maybe you can explain the case where vq->do_unmap is false but *sg is NULL?
> >
> >
> > If vq->do_unmap is false, that is premapped mode or use_dma_api is false.
> > If the *sg is null, that means that the driver call the API
> > virtqueue_get_buf_ctx or virtqueue_get_buf.
> > If the *sg is not null, that means that the driver call the API
> >
> >         void *virtqueue_get_buf_ctx_dma(struct virtqueue *_vq, unsigned int *len,
> >                                         struct scatterlist *sg, void **ctx);
> >
> > Thanks.
>
> Ok, but my question still:
>
> 1) you explain one possible way here is use_dma_api is false and the
> driver doesn't call virtqueue_get_buf_ctx_dma().

If the use_dma_api is false, then we cannot set to premapped mode.
If the driver calls virtqueue_get_buf_ctx_dma(), that works as
virtqueue_get_buf_ctx().


> 2) what happens if the driver uses a preamp but doesn't use
> virtqueue_get_buf_ctx_dma()? Should we warn in this case?

The developer should know that he need to do unmap, so I think
that is not needed.

Thanks.



>
> Thanks
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma()
  2023-11-24  5:51                 ` Xuan Zhuo
@ 2023-11-24  6:00                   ` Jason Wang
  2023-11-24  6:03                     ` Xuan Zhuo
  2023-12-14  8:33                     ` Xuan Zhuo
  0 siblings, 2 replies; 19+ messages in thread
From: Jason Wang @ 2023-11-24  6:00 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: virtualization, Michael S . Tsirkin

On Fri, Nov 24, 2023 at 1:56 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Fri, 24 Nov 2023 12:14:57 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Wed, Nov 22, 2023 at 2:57 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Wed, 22 Nov 2023 14:47:02 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > On Wed, Nov 22, 2023 at 2:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > On Wed, 22 Nov 2023 14:03:32 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > On Tue, Nov 21, 2023 at 2:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > >
> > > > > > > On Thu, 16 Nov 2023 16:11:11 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > On Tue, Nov 14, 2023 at 7:31 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > >
> > > > > > > > > introduce virtqueue_get_buf_ctx_dma() to collect the dma info when
> > > > > > > > > get buf from virtio core for premapped mode.
> > > > > > > > >
> > > > > > > > > If the virtio queue is premapped mode, the virtio-net send buf may
> > > > > > > > > have many desc. Every desc dma address need to be unmap. So here we
> > > > > > > > > introduce a new helper to collect the dma address of the buffer from
> > > > > > > > > the virtio core.
> > > > > > > >
> > > > > > > > So looking at vring_desc_extra, what we have right now is:
> > > > > > > >
> > > > > > > > struct vring_desc_extra {
> > > > > > > >         dma_addr_t addr;                /* Descriptor DMA addr. */
> > > > > > > >         u32 len;                        /* Descriptor length. */
> > > > > > > >         u16 flags;                      /* Descriptor flags. */
> > > > > > > >         u16 next;                       /* The next desc state in a list. */
> > > > > > > > };
> > > > > > > >
> > > > > > > > And sg is
> > > > > > > >
> > > > > > > > struct scatterlist {
> > > > > > > >         unsigned long   page_link;
> > > > > > > > unsigned int    offset;
> > > > > > > >         unsigned int    length;
> > > > > > > >         dma_addr_t      dma_address;
> > > > > > > > #ifdef CONFIG_NEED_SG_DMA_LENGTH
> > > > > > > >         unsigned int    dma_length;
> > > > > > > > #endif
> > > > > > > > #ifdef CONFIG_NEED_SG_DMA_FLAGS
> > > > > > > >         unsigned int    dma_flags;
> > > > > > > > #endif
> > > > > > > > };
> > > > > > > >
> > > > > > > > Would it better just store sg?
> > > > > > >
> > > > > > > Do you mean we expose the vring_desc_extra to dirver?
> > > > > >
> > > > > > Maybe or not, (or just us sg for desc_extra).
> > > > > >
> > > > > > Anyhow, any method needs to be benchmarked to see the performance impact.
> > > > > >
> > > > > > >
> > > > > > > How about introducing such a new structure?
> > > > > > >
> > > > > > > struct virtio_dma_item {
> > > > > > >          dma_addr_t addr;
> > > > > > >          u32 len;
> > > > > > > };
> > > > > > >
> > > > > > > struct virtio_dma_head {
> > > > > > >         u32 num;
> > > > > > >         u32 used;
> > > > > > >         struct virtio_dma_item items[];
> > > > > > > };
> > > > > > >
> > > > > > > Then we just need to pass one pointer to the virtio.
> > > > > >
> > > > > > Just to make sure I understand here, where did we store those arrays?
> > > > >
> > > > >
> > > > > On the stack, or we can reuse the memory space of sq->sg.
> > > > >
> > > > > dma = (struct virtio_dma_head*)sq->sg;
> > > >
> > > > Ok, we need some benchmark for sure.
> > > >
> > > > >
> > > > >
> > > > > >
> > > > > > > num is used to pass the size of items
> > > > > > > used is used to return the num used by virtio core.
> > > > > > >
> > > > > > > >
> > > > > > > > More below
> > > > > > > >
> > > > > > > > >
> > > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > > ---
> > > > > > > > >  drivers/virtio/virtio_ring.c | 148 ++++++++++++++++++++++++++---------
> > > > > > > > >  include/linux/virtio.h       |   2 +
> > > > > > > > >  2 files changed, 115 insertions(+), 35 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > index 51d8f3299c10..0b3caee4ef9d 100644
> > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > @@ -362,6 +362,20 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
> > > > > > > > >         return vq->dma_dev;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > +static void store_dma_to_sg(struct scatterlist **sgp, dma_addr_t addr, unsigned int length)
> > > > > > > > > +{
> > > > > > > > > +       struct scatterlist *sg;
> > > > > > > > > +
> > > > > > > > > +       sg = *sgp;
> > > > > > > > > +
> > > > > > > > > +       sg->dma_address = addr;
> > > > > > > > > +       sg->length = length;
> > > > > > > > > +
> > > > > > > > > +       sg = sg_next(sg);
> > > > > > > > > +
> > > > > > > > > +       *sgp = sg;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >  /* Map one sg entry. */
> > > > > > > > >  static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
> > > > > > > > >                             enum dma_data_direction direction, dma_addr_t *addr)
> > > > > > > > > @@ -441,12 +455,18 @@ static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
> > > > > > > > >   */
> > > > > > > > >
> > > > > > > > >  static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > > > > > > -                                          const struct vring_desc *desc)
> > > > > > > > > +                                          const struct vring_desc *desc,
> > > > > > > > > +                                          struct scatterlist **sg)
> > > > > > > > >  {
> > > > > > > > >         u16 flags;
> > > > > > > > >
> > > > > > > > > -       if (!vq->do_unmap)
> > > > > > > > > +       if (!vq->do_unmap) {
> > > > > > > > > +               if (*sg)
> > > > > > > >
> > > > > > > > Can we simply move the
> > > > > > > >
> > > > > > > > if (*sg) to store_dma_to_sg()?
> > > > > > >
> > > > > > > OK
> > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > > +                       store_dma_to_sg(sg,
> > > > > > > > > +                                       virtio64_to_cpu(vq->vq.vdev, desc->addr),
> > > > > > > > > +                                       virtio32_to_cpu(vq->vq.vdev, desc->len));
> > > > > > > > >                 return;
> > > > > > > > > +       }
> > > > > > > > >
> > > > > > > > >         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> > > > > > > > >
> > > > > > > > > @@ -458,7 +478,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > >  static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > > > > > > -                                         unsigned int i)
> > > > > > > > > +                                         unsigned int i, struct scatterlist **sg)
> > > > > > > > >  {
> > > > > > > > >         struct vring_desc_extra *extra = vq->split.desc_extra;
> > > > > > > > >         u16 flags;
> > > > > > > > > @@ -475,8 +495,11 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > > > > > >                                  (flags & VRING_DESC_F_WRITE) ?
> > > > > > > > >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > > > > > > > >         } else {
> > > > > > > > > -               if (!vq->do_unmap)
> > > > > > > > > +               if (!vq->do_unmap) {
> > > > > > > >
> > > > > > > > The:
> > > > > > > >
> > > > > > > > else {
> > > > > > > >       if {
> > > > > > > >             if {
> > > > > > > > }
> > > > > > > >
> > > > > > > > seems odd. I think I would prefer
> > > > > > > >
> > > > > > > > if (flags & VRING_DESC_F_INDIRECT) {
> > > > > > > > } else if (!vq->do_unmap) {
> > > > > > > > } else {
> > > > > > > > }
> > > > > > >
> > > > > > >
> > > > > > > Will fix.
> > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > here
> > > > > > > >
> > > > > > > > Btw, I really think do_unmap is not a good name, we probably need to
> > > > > > > > rename it as "unmap_desc".
> > > > > > >
> > > > > > > How about a separate patch for this?
> > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > > +                       if (*sg)
> > > > > > > > > +                               store_dma_to_sg(sg, extra[i].addr, extra[i].len);
> > > > > > > >
> > > > > > > > In which case we need to unmap by driver but we don't need a dma address?
> > > > > > >
> > > > > > > Sorry, do not get it.
> > > > > >
> > > > > > I meant, I don't get how we can reach to this check.
> > > > >
> > > > > This is the main way. I think you miss something.
> > > >
> > > > Or maybe you can explain the case where vq->do_unmap is false but *sg is NULL?
> > >
> > >
> > > If vq->do_unmap is false, that is premapped mode or use_dma_api is false.
> > > If the *sg is null, that means that the driver call the API
> > > virtqueue_get_buf_ctx or virtqueue_get_buf.
> > > If the *sg is not null, that means that the driver call the API
> > >
> > >         void *virtqueue_get_buf_ctx_dma(struct virtqueue *_vq, unsigned int *len,
> > >                                         struct scatterlist *sg, void **ctx);
> > >
> > > Thanks.
> >
> > Ok, but my question still:
> >
> > 1) you explain one possible way here is use_dma_api is false and the
> > driver doesn't call virtqueue_get_buf_ctx_dma().
>
> If the use_dma_api is false, then we cannot set to premapped mode.
> If the driver calls virtqueue_get_buf_ctx_dma(), that works as
> virtqueue_get_buf_ctx().
>
>
> > 2) what happens if the driver uses a preamp but doesn't use
> > virtqueue_get_buf_ctx_dma()? Should we warn in this case?
>
> The developer should know that he need to do unmap, so I think
> that is not needed.

Well, kind of layer violation here. Driver may trust the virtio core
but not verse vica.

E.g there are several WARN_ON() in the virtio_ring.

Thanks

>
> Thanks.
>
>
>
> >
> > Thanks
> >
>


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma()
  2023-11-24  6:00                   ` Jason Wang
@ 2023-11-24  6:03                     ` Xuan Zhuo
  2023-12-14  8:33                     ` Xuan Zhuo
  1 sibling, 0 replies; 19+ messages in thread
From: Xuan Zhuo @ 2023-11-24  6:03 UTC (permalink / raw)
  To: Jason Wang; +Cc: virtualization, Michael S . Tsirkin

On Fri, 24 Nov 2023 14:00:51 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Fri, Nov 24, 2023 at 1:56 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Fri, 24 Nov 2023 12:14:57 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Wed, Nov 22, 2023 at 2:57 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Wed, 22 Nov 2023 14:47:02 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > On Wed, Nov 22, 2023 at 2:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > On Wed, 22 Nov 2023 14:03:32 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > On Tue, Nov 21, 2023 at 2:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > >
> > > > > > > > On Thu, 16 Nov 2023 16:11:11 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > On Tue, Nov 14, 2023 at 7:31 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > > >
> > > > > > > > > > introduce virtqueue_get_buf_ctx_dma() to collect the dma info when
> > > > > > > > > > get buf from virtio core for premapped mode.
> > > > > > > > > >
> > > > > > > > > > If the virtio queue is premapped mode, the virtio-net send buf may
> > > > > > > > > > have many desc. Every desc dma address need to be unmap. So here we
> > > > > > > > > > introduce a new helper to collect the dma address of the buffer from
> > > > > > > > > > the virtio core.
> > > > > > > > >
> > > > > > > > > So looking at vring_desc_extra, what we have right now is:
> > > > > > > > >
> > > > > > > > > struct vring_desc_extra {
> > > > > > > > >         dma_addr_t addr;                /* Descriptor DMA addr. */
> > > > > > > > >         u32 len;                        /* Descriptor length. */
> > > > > > > > >         u16 flags;                      /* Descriptor flags. */
> > > > > > > > >         u16 next;                       /* The next desc state in a list. */
> > > > > > > > > };
> > > > > > > > >
> > > > > > > > > And sg is
> > > > > > > > >
> > > > > > > > > struct scatterlist {
> > > > > > > > >         unsigned long   page_link;
> > > > > > > > > unsigned int    offset;
> > > > > > > > >         unsigned int    length;
> > > > > > > > >         dma_addr_t      dma_address;
> > > > > > > > > #ifdef CONFIG_NEED_SG_DMA_LENGTH
> > > > > > > > >         unsigned int    dma_length;
> > > > > > > > > #endif
> > > > > > > > > #ifdef CONFIG_NEED_SG_DMA_FLAGS
> > > > > > > > >         unsigned int    dma_flags;
> > > > > > > > > #endif
> > > > > > > > > };
> > > > > > > > >
> > > > > > > > > Would it better just store sg?
> > > > > > > >
> > > > > > > > Do you mean we expose the vring_desc_extra to dirver?
> > > > > > >
> > > > > > > Maybe or not, (or just us sg for desc_extra).
> > > > > > >
> > > > > > > Anyhow, any method needs to be benchmarked to see the performance impact.
> > > > > > >
> > > > > > > >
> > > > > > > > How about introducing such a new structure?
> > > > > > > >
> > > > > > > > struct virtio_dma_item {
> > > > > > > >          dma_addr_t addr;
> > > > > > > >          u32 len;
> > > > > > > > };
> > > > > > > >
> > > > > > > > struct virtio_dma_head {
> > > > > > > >         u32 num;
> > > > > > > >         u32 used;
> > > > > > > >         struct virtio_dma_item items[];
> > > > > > > > };
> > > > > > > >
> > > > > > > > Then we just need to pass one pointer to the virtio.
> > > > > > >
> > > > > > > Just to make sure I understand here, where did we store those arrays?
> > > > > >
> > > > > >
> > > > > > On the stack, or we can reuse the memory space of sq->sg.
> > > > > >
> > > > > > dma = (struct virtio_dma_head*)sq->sg;
> > > > >
> > > > > Ok, we need some benchmark for sure.
> > > > >
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > > num is used to pass the size of items
> > > > > > > > used is used to return the num used by virtio core.
> > > > > > > >
> > > > > > > > >
> > > > > > > > > More below
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > > > ---
> > > > > > > > > >  drivers/virtio/virtio_ring.c | 148 ++++++++++++++++++++++++++---------
> > > > > > > > > >  include/linux/virtio.h       |   2 +
> > > > > > > > > >  2 files changed, 115 insertions(+), 35 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > > index 51d8f3299c10..0b3caee4ef9d 100644
> > > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > > @@ -362,6 +362,20 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
> > > > > > > > > >         return vq->dma_dev;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > +static void store_dma_to_sg(struct scatterlist **sgp, dma_addr_t addr, unsigned int length)
> > > > > > > > > > +{
> > > > > > > > > > +       struct scatterlist *sg;
> > > > > > > > > > +
> > > > > > > > > > +       sg = *sgp;
> > > > > > > > > > +
> > > > > > > > > > +       sg->dma_address = addr;
> > > > > > > > > > +       sg->length = length;
> > > > > > > > > > +
> > > > > > > > > > +       sg = sg_next(sg);
> > > > > > > > > > +
> > > > > > > > > > +       *sgp = sg;
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > >  /* Map one sg entry. */
> > > > > > > > > >  static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
> > > > > > > > > >                             enum dma_data_direction direction, dma_addr_t *addr)
> > > > > > > > > > @@ -441,12 +455,18 @@ static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
> > > > > > > > > >   */
> > > > > > > > > >
> > > > > > > > > >  static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > > > > > > > -                                          const struct vring_desc *desc)
> > > > > > > > > > +                                          const struct vring_desc *desc,
> > > > > > > > > > +                                          struct scatterlist **sg)
> > > > > > > > > >  {
> > > > > > > > > >         u16 flags;
> > > > > > > > > >
> > > > > > > > > > -       if (!vq->do_unmap)
> > > > > > > > > > +       if (!vq->do_unmap) {
> > > > > > > > > > +               if (*sg)
> > > > > > > > >
> > > > > > > > > Can we simply move the
> > > > > > > > >
> > > > > > > > > if (*sg) to store_dma_to_sg()?
> > > > > > > >
> > > > > > > > OK
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > > +                       store_dma_to_sg(sg,
> > > > > > > > > > +                                       virtio64_to_cpu(vq->vq.vdev, desc->addr),
> > > > > > > > > > +                                       virtio32_to_cpu(vq->vq.vdev, desc->len));
> > > > > > > > > >                 return;
> > > > > > > > > > +       }
> > > > > > > > > >
> > > > > > > > > >         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> > > > > > > > > >
> > > > > > > > > > @@ -458,7 +478,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > >  static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > > > > > > > -                                         unsigned int i)
> > > > > > > > > > +                                         unsigned int i, struct scatterlist **sg)
> > > > > > > > > >  {
> > > > > > > > > >         struct vring_desc_extra *extra = vq->split.desc_extra;
> > > > > > > > > >         u16 flags;
> > > > > > > > > > @@ -475,8 +495,11 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > > > > > > >                                  (flags & VRING_DESC_F_WRITE) ?
> > > > > > > > > >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > > > > > > > > >         } else {
> > > > > > > > > > -               if (!vq->do_unmap)
> > > > > > > > > > +               if (!vq->do_unmap) {
> > > > > > > > >
> > > > > > > > > The:
> > > > > > > > >
> > > > > > > > > else {
> > > > > > > > >       if {
> > > > > > > > >             if {
> > > > > > > > > }
> > > > > > > > >
> > > > > > > > > seems odd. I think I would prefer
> > > > > > > > >
> > > > > > > > > if (flags & VRING_DESC_F_INDIRECT) {
> > > > > > > > > } else if (!vq->do_unmap) {
> > > > > > > > > } else {
> > > > > > > > > }
> > > > > > > >
> > > > > > > >
> > > > > > > > Will fix.
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > here
> > > > > > > > >
> > > > > > > > > Btw, I really think do_unmap is not a good name, we probably need to
> > > > > > > > > rename it as "unmap_desc".
> > > > > > > >
> > > > > > > > How about a separate patch for this?
> > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > +                       if (*sg)
> > > > > > > > > > +                               store_dma_to_sg(sg, extra[i].addr, extra[i].len);
> > > > > > > > >
> > > > > > > > > In which case we need to unmap by driver but we don't need a dma address?
> > > > > > > >
> > > > > > > > Sorry, do not get it.
> > > > > > >
> > > > > > > I meant, I don't get how we can reach to this check.
> > > > > >
> > > > > > This is the main way. I think you miss something.
> > > > >
> > > > > Or maybe you can explain the case where vq->do_unmap is false but *sg is NULL?
> > > >
> > > >
> > > > If vq->do_unmap is false, that is premapped mode or use_dma_api is false.
> > > > If the *sg is null, that means that the driver call the API
> > > > virtqueue_get_buf_ctx or virtqueue_get_buf.
> > > > If the *sg is not null, that means that the driver call the API
> > > >
> > > >         void *virtqueue_get_buf_ctx_dma(struct virtqueue *_vq, unsigned int *len,
> > > >                                         struct scatterlist *sg, void **ctx);
> > > >
> > > > Thanks.
> > >
> > > Ok, but my question still:
> > >
> > > 1) you explain one possible way here is use_dma_api is false and the
> > > driver doesn't call virtqueue_get_buf_ctx_dma().
> >
> > If the use_dma_api is false, then we cannot set to premapped mode.
> > If the driver calls virtqueue_get_buf_ctx_dma(), that works as
> > virtqueue_get_buf_ctx().
> >
> >
> > > 2) what happens if the driver uses a preamp but doesn't use
> > > virtqueue_get_buf_ctx_dma()? Should we warn in this case?
> >
> > The developer should know that he need to do unmap, so I think
> > that is not needed.
>
> Well, kind of layer violation here. Driver may trust the virtio core
> but not verse vica.
>
> E.g there are several WARN_ON() in the virtio_ring.

If you think so, I am ok.

I will add WARN_ON() in next version.

Thanks.


>
> Thanks
>
> >
> > Thanks.
> >
> >
> >
> > >
> > > Thanks
> > >
> >
>
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma()
  2023-11-24  6:00                   ` Jason Wang
  2023-11-24  6:03                     ` Xuan Zhuo
@ 2023-12-14  8:33                     ` Xuan Zhuo
  1 sibling, 0 replies; 19+ messages in thread
From: Xuan Zhuo @ 2023-12-14  8:33 UTC (permalink / raw)
  To: Jason Wang; +Cc: virtualization, Michael S . Tsirkin

On Fri, 24 Nov 2023 14:00:51 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Fri, Nov 24, 2023 at 1:56 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Fri, 24 Nov 2023 12:14:57 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Wed, Nov 22, 2023 at 2:57 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Wed, 22 Nov 2023 14:47:02 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > On Wed, Nov 22, 2023 at 2:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > On Wed, 22 Nov 2023 14:03:32 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > On Tue, Nov 21, 2023 at 2:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > >
> > > > > > > > On Thu, 16 Nov 2023 16:11:11 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > On Tue, Nov 14, 2023 at 7:31 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > > >
> > > > > > > > > > introduce virtqueue_get_buf_ctx_dma() to collect the dma info when
> > > > > > > > > > get buf from virtio core for premapped mode.
> > > > > > > > > >
> > > > > > > > > > If the virtio queue is premapped mode, the virtio-net send buf may
> > > > > > > > > > have many desc. Every desc dma address need to be unmap. So here we
> > > > > > > > > > introduce a new helper to collect the dma address of the buffer from
> > > > > > > > > > the virtio core.
> > > > > > > > >
> > > > > > > > > So looking at vring_desc_extra, what we have right now is:
> > > > > > > > >
> > > > > > > > > struct vring_desc_extra {
> > > > > > > > >         dma_addr_t addr;                /* Descriptor DMA addr. */
> > > > > > > > >         u32 len;                        /* Descriptor length. */
> > > > > > > > >         u16 flags;                      /* Descriptor flags. */
> > > > > > > > >         u16 next;                       /* The next desc state in a list. */
> > > > > > > > > };
> > > > > > > > >
> > > > > > > > > And sg is
> > > > > > > > >
> > > > > > > > > struct scatterlist {
> > > > > > > > >         unsigned long   page_link;
> > > > > > > > > unsigned int    offset;
> > > > > > > > >         unsigned int    length;
> > > > > > > > >         dma_addr_t      dma_address;
> > > > > > > > > #ifdef CONFIG_NEED_SG_DMA_LENGTH
> > > > > > > > >         unsigned int    dma_length;
> > > > > > > > > #endif
> > > > > > > > > #ifdef CONFIG_NEED_SG_DMA_FLAGS
> > > > > > > > >         unsigned int    dma_flags;
> > > > > > > > > #endif
> > > > > > > > > };
> > > > > > > > >
> > > > > > > > > Would it better just store sg?
> > > > > > > >
> > > > > > > > Do you mean we expose the vring_desc_extra to dirver?
> > > > > > >
> > > > > > > Maybe or not, (or just us sg for desc_extra).
> > > > > > >
> > > > > > > Anyhow, any method needs to be benchmarked to see the performance impact.
> > > > > > >
> > > > > > > >
> > > > > > > > How about introducing such a new structure?
> > > > > > > >
> > > > > > > > struct virtio_dma_item {
> > > > > > > >          dma_addr_t addr;
> > > > > > > >          u32 len;
> > > > > > > > };
> > > > > > > >
> > > > > > > > struct virtio_dma_head {
> > > > > > > >         u32 num;
> > > > > > > >         u32 used;
> > > > > > > >         struct virtio_dma_item items[];
> > > > > > > > };
> > > > > > > >
> > > > > > > > Then we just need to pass one pointer to the virtio.
> > > > > > >
> > > > > > > Just to make sure I understand here, where did we store those arrays?
> > > > > >
> > > > > >
> > > > > > On the stack, or we can reuse the memory space of sq->sg.
> > > > > >
> > > > > > dma = (struct virtio_dma_head*)sq->sg;
> > > > >
> > > > > Ok, we need some benchmark for sure.
> > > > >
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > > num is used to pass the size of items
> > > > > > > > used is used to return the num used by virtio core.
> > > > > > > >
> > > > > > > > >
> > > > > > > > > More below
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > > > ---
> > > > > > > > > >  drivers/virtio/virtio_ring.c | 148 ++++++++++++++++++++++++++---------
> > > > > > > > > >  include/linux/virtio.h       |   2 +
> > > > > > > > > >  2 files changed, 115 insertions(+), 35 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > > index 51d8f3299c10..0b3caee4ef9d 100644
> > > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > > @@ -362,6 +362,20 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
> > > > > > > > > >         return vq->dma_dev;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > +static void store_dma_to_sg(struct scatterlist **sgp, dma_addr_t addr, unsigned int length)
> > > > > > > > > > +{
> > > > > > > > > > +       struct scatterlist *sg;
> > > > > > > > > > +
> > > > > > > > > > +       sg = *sgp;
> > > > > > > > > > +
> > > > > > > > > > +       sg->dma_address = addr;
> > > > > > > > > > +       sg->length = length;
> > > > > > > > > > +
> > > > > > > > > > +       sg = sg_next(sg);
> > > > > > > > > > +
> > > > > > > > > > +       *sgp = sg;
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > >  /* Map one sg entry. */
> > > > > > > > > >  static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
> > > > > > > > > >                             enum dma_data_direction direction, dma_addr_t *addr)
> > > > > > > > > > @@ -441,12 +455,18 @@ static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
> > > > > > > > > >   */
> > > > > > > > > >
> > > > > > > > > >  static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > > > > > > > -                                          const struct vring_desc *desc)
> > > > > > > > > > +                                          const struct vring_desc *desc,
> > > > > > > > > > +                                          struct scatterlist **sg)
> > > > > > > > > >  {
> > > > > > > > > >         u16 flags;
> > > > > > > > > >
> > > > > > > > > > -       if (!vq->do_unmap)
> > > > > > > > > > +       if (!vq->do_unmap) {
> > > > > > > > > > +               if (*sg)
> > > > > > > > >
> > > > > > > > > Can we simply move the
> > > > > > > > >
> > > > > > > > > if (*sg) to store_dma_to_sg()?
> > > > > > > >
> > > > > > > > OK
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > > +                       store_dma_to_sg(sg,
> > > > > > > > > > +                                       virtio64_to_cpu(vq->vq.vdev, desc->addr),
> > > > > > > > > > +                                       virtio32_to_cpu(vq->vq.vdev, desc->len));
> > > > > > > > > >                 return;
> > > > > > > > > > +       }
> > > > > > > > > >
> > > > > > > > > >         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> > > > > > > > > >
> > > > > > > > > > @@ -458,7 +478,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > >  static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > > > > > > > -                                         unsigned int i)
> > > > > > > > > > +                                         unsigned int i, struct scatterlist **sg)
> > > > > > > > > >  {
> > > > > > > > > >         struct vring_desc_extra *extra = vq->split.desc_extra;
> > > > > > > > > >         u16 flags;
> > > > > > > > > > @@ -475,8 +495,11 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > > > > > > >                                  (flags & VRING_DESC_F_WRITE) ?
> > > > > > > > > >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > > > > > > > > >         } else {
> > > > > > > > > > -               if (!vq->do_unmap)
> > > > > > > > > > +               if (!vq->do_unmap) {
> > > > > > > > >
> > > > > > > > > The:
> > > > > > > > >
> > > > > > > > > else {
> > > > > > > > >       if {
> > > > > > > > >             if {
> > > > > > > > > }
> > > > > > > > >
> > > > > > > > > seems odd. I think I would prefer
> > > > > > > > >
> > > > > > > > > if (flags & VRING_DESC_F_INDIRECT) {
> > > > > > > > > } else if (!vq->do_unmap) {
> > > > > > > > > } else {
> > > > > > > > > }
> > > > > > > >
> > > > > > > >
> > > > > > > > Will fix.
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > here
> > > > > > > > >
> > > > > > > > > Btw, I really think do_unmap is not a good name, we probably need to
> > > > > > > > > rename it as "unmap_desc".
> > > > > > > >
> > > > > > > > How about a separate patch for this?
> > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > +                       if (*sg)
> > > > > > > > > > +                               store_dma_to_sg(sg, extra[i].addr, extra[i].len);
> > > > > > > > >
> > > > > > > > > In which case we need to unmap by driver but we don't need a dma address?
> > > > > > > >
> > > > > > > > Sorry, do not get it.
> > > > > > >
> > > > > > > I meant, I don't get how we can reach to this check.
> > > > > >
> > > > > > This is the main way. I think you miss something.
> > > > >
> > > > > Or maybe you can explain the case where vq->do_unmap is false but *sg is NULL?
> > > >
> > > >
> > > > If vq->do_unmap is false, that is premapped mode or use_dma_api is false.
> > > > If the *sg is null, that means that the driver call the API
> > > > virtqueue_get_buf_ctx or virtqueue_get_buf.
> > > > If the *sg is not null, that means that the driver call the API
> > > >
> > > >         void *virtqueue_get_buf_ctx_dma(struct virtqueue *_vq, unsigned int *len,
> > > >                                         struct scatterlist *sg, void **ctx);
> > > >
> > > > Thanks.
> > >
> > > Ok, but my question still:
> > >
> > > 1) you explain one possible way here is use_dma_api is false and the
> > > driver doesn't call virtqueue_get_buf_ctx_dma().
> >
> > If the use_dma_api is false, then we cannot set to premapped mode.
> > If the driver calls virtqueue_get_buf_ctx_dma(), that works as
> > virtqueue_get_buf_ctx().
> >
> >
> > > 2) what happens if the driver uses a preamp but doesn't use
> > > virtqueue_get_buf_ctx_dma()? Should we warn in this case?


About this, may we should not add warn.

Such as the virtio-net rq and the AF_XDP, the driver will manage the dma meta,
so the driver do not need to get the dma info from the virtio core.

Thanks.


> >
> > The developer should know that he need to do unmap, so I think
> > that is not needed.
>
> Well, kind of layer violation here. Driver may trust the virtio core
> but not verse vica.
>
> E.g there are several WARN_ON() in the virtio_ring.
>
> Thanks
>
> >
> > Thanks.
> >
> >
> >
> > >
> > > Thanks
> > >
> >
>
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2023-12-14  8:35 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-14 11:31 [RFC 0/4] virtio helper: collect dma info from virtio core Xuan Zhuo
2023-11-14 11:31 ` [RFC 1/4] virtio_ring: introduce virtqueue_get_buf_ctx_dma() Xuan Zhuo
2023-11-16  8:11   ` Jason Wang
2023-11-21  6:12     ` Xuan Zhuo
2023-11-22  6:03       ` Jason Wang
2023-11-22  6:29         ` Xuan Zhuo
2023-11-22  6:47           ` Jason Wang
2023-11-22  6:51             ` Xuan Zhuo
2023-11-24  4:14               ` Jason Wang
2023-11-24  5:51                 ` Xuan Zhuo
2023-11-24  6:00                   ` Jason Wang
2023-11-24  6:03                     ` Xuan Zhuo
2023-12-14  8:33                     ` Xuan Zhuo
2023-11-14 11:31 ` [RFC 2/4] virtio_ring: virtqueue_disable_and_recycle let the callback detach bufs Xuan Zhuo
2023-11-16  8:11   ` Jason Wang
2023-11-21  4:10     ` Xuan Zhuo
2023-11-14 11:31 ` [RFC 3/4] virtio_ring: introduce virtqueue_detach_unused_buf_premapped Xuan Zhuo
2023-11-14 11:31 ` [RFC 4/4] virtio_net: sq support premapped mode Xuan Zhuo
2023-11-16  8:11   ` Jason Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.