All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH vhost 00/10] virtio core prepares for AF_XDP
@ 2023-02-14  7:26 Xuan Zhuo
  2023-02-14  7:26 ` [PATCH vhost 01/10] virtio_ring: split: refactor virtqueue_add_split() for premapped Xuan Zhuo
                   ` (10 more replies)
  0 siblings, 11 replies; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-14  7:26 UTC (permalink / raw)
  To: virtualization; +Cc: Michael S. Tsirkin

XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero
copy feature of xsk (XDP socket) needs to be supported by the driver. The
performance of zero copy is very good.

ENV: Qemu with vhost.

                   vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS
-----------------------------|---------------|------------------|------------
xmit by sockperf:     90%    |   100%        |                  |  318967
xmit by xsk:          100%   |   30%         |   33%            | 1192064
recv by sockperf:     100%   |   68%         |   100%           |  692288
recv by xsk:          100%   |   33%         |   43%            |  771670

Before achieving the function of Virtio-Net, we also have to let virtio core
support these features:

1. virtio core support premapped
2. virtio core support reset per-queue
3. introduce DMA APIs to virtio core

Please review.

Thanks.

Xuan Zhuo (10):
  virtio_ring: split: refactor virtqueue_add_split() for premapped
  virtio_ring: packed: separate prepare code from
    virtuque_add_indirect_packed()
  virtio_ring: packed: refactor virtqueue_add_packed() for premapped
  virtio_ring: split: introduce virtqueue_add_split_premapped()
  virtio_ring: packed: introduce virtqueue_add_packed_premapped()
  virtio_ring: introduce virtqueue_add_inbuf_premapped()
  virtio_ring: add api virtio_dma_map() for advance dma
  virtio_ring: introduce dma sync api for virtio
  virtio_ring: correct the expression of the description of
    virtqueue_resize()
  virtio_ring: introduce virtqueue_reset()

 drivers/virtio/virtio_ring.c | 792 ++++++++++++++++++++++++++++-------
 include/linux/virtio.h       |  29 ++
 2 files changed, 659 insertions(+), 162 deletions(-)

--
2.32.0.3.g01195cf9f

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH vhost 01/10] virtio_ring: split: refactor virtqueue_add_split() for premapped
  2023-02-14  7:26 [PATCH vhost 00/10] virtio core prepares for AF_XDP Xuan Zhuo
@ 2023-02-14  7:26 ` Xuan Zhuo
  2023-02-20  5:37   ` Jason Wang
  2023-02-14  7:26 ` [PATCH vhost 02/10] virtio_ring: packed: separate prepare code from virtuque_add_indirect_packed() Xuan Zhuo
                   ` (9 subsequent siblings)
  10 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-14  7:26 UTC (permalink / raw)
  To: virtualization; +Cc: Michael S. Tsirkin

DMA-related logic is separated from the virtqueue_add_split to prepare
for subsequent support for premapped.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 219 ++++++++++++++++++++++++-----------
 1 file changed, 152 insertions(+), 67 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 41144b5246a8..560ee30d942c 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -520,29 +520,83 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
 	return next;
 }
 
-static inline int virtqueue_add_split(struct virtqueue *_vq,
-				      struct scatterlist *sgs[],
-				      unsigned int total_sg,
-				      unsigned int out_sgs,
-				      unsigned int in_sgs,
-				      void *data,
-				      void *ctx,
-				      gfp_t gfp)
+static int virtqueue_map_sgs(struct vring_virtqueue *vq,
+			     struct scatterlist *sgs[],
+			     unsigned int total_sg,
+			     unsigned int out_sgs,
+			     unsigned int in_sgs)
 {
-	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct scatterlist *sg;
-	struct vring_desc *desc;
-	unsigned int i, n, avail, descs_used, prev, err_idx;
-	int head;
-	bool indirect;
+	unsigned int n;
 
-	START_USE(vq);
+	for (n = 0; n < out_sgs; n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
+
+			if (vring_mapping_error(vq, addr))
+				return -ENOMEM;
+
+			sg->dma_address = addr;
+		}
+	}
+	for (; n < (out_sgs + in_sgs); n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
+
+			if (vring_mapping_error(vq, addr))
+				return -ENOMEM;
+
+			sg->dma_address = addr;
+		}
+	}
+
+	return 0;
+}
+
+static void virtqueue_unmap_sgs(struct vring_virtqueue *vq,
+				struct scatterlist *sgs[],
+				unsigned int total_sg,
+				unsigned int out_sgs,
+				unsigned int in_sgs)
+{
+	struct scatterlist *sg;
+	unsigned int n;
+
+	for (n = 0; n < out_sgs; n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			if (!sg->dma_address)
+				return;
+
+			dma_unmap_single(vring_dma_dev(vq), sg->dma_address,
+					 sg->length, DMA_TO_DEVICE);
+		}
+	}
+	for (; n < (out_sgs + in_sgs); n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			if (!sg->dma_address)
+				return;
+
+			dma_unmap_single(vring_dma_dev(vq), sg->dma_address,
+					 sg->length, DMA_FROM_DEVICE);
+		}
+	}
+}
+
+static inline int virtqueue_add_split_prepare(struct vring_virtqueue *vq,
+					      unsigned int total_sg,
+					      unsigned int out_sgs,
+					      void *data,
+					      void *ctx,
+					      gfp_t gfp,
+					      struct vring_desc **pdesc)
+{
+	struct vring_desc *desc;
+	unsigned int descs_used;
 
 	BUG_ON(data == NULL);
 	BUG_ON(ctx && vq->indirect);
 
 	if (unlikely(vq->broken)) {
-		END_USE(vq);
 		return -EIO;
 	}
 
@@ -550,27 +604,17 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 
 	BUG_ON(total_sg == 0);
 
-	head = vq->free_head;
-
 	if (virtqueue_use_indirect(vq, total_sg))
-		desc = alloc_indirect_split(_vq, total_sg, gfp);
+		desc = alloc_indirect_split(&vq->vq, total_sg, gfp);
 	else {
 		desc = NULL;
 		WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
 	}
 
-	if (desc) {
-		/* Use a single buffer which doesn't continue */
-		indirect = true;
-		/* Set up rest to use this indirect table. */
-		i = 0;
+	if (desc)
 		descs_used = 1;
-	} else {
-		indirect = false;
-		desc = vq->split.vring.desc;
-		i = head;
+	else
 		descs_used = total_sg;
-	}
 
 	if (unlikely(vq->vq.num_free < descs_used)) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
@@ -580,38 +624,64 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 		 * host should service the ring ASAP. */
 		if (out_sgs)
 			vq->notify(&vq->vq);
-		if (indirect)
-			kfree(desc);
-		END_USE(vq);
+		kfree(desc);
 		return -ENOSPC;
 	}
 
+	*pdesc = desc;
+
+	return 0;
+}
+
+static inline int virtqueue_add_split_vring(struct vring_virtqueue *vq,
+					    struct scatterlist *sgs[],
+					    unsigned int total_sg,
+					    unsigned int out_sgs,
+					    unsigned int in_sgs,
+					    struct vring_desc *desc)
+{
+	unsigned int n, i, avail, descs_used, prev;
+	struct virtqueue *_vq = &vq->vq;
+	struct scatterlist *sg;
+	bool indirect;
+	int head;
+
+	head = vq->free_head;
+
+	if (desc) {
+		/* Use a single buffer which doesn't continue */
+		indirect = true;
+		/* Set up rest to use this indirect table. */
+		i = 0;
+		descs_used = 1;
+	} else {
+		indirect = false;
+		desc = vq->split.vring.desc;
+		i = head;
+		descs_used = total_sg;
+	}
+
 	for (n = 0; n < out_sgs; n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
-			if (vring_mapping_error(vq, addr))
-				goto unmap_release;
-
 			prev = i;
 			/* Note that we trust indirect descriptor
 			 * table since it use stream DMA mapping.
 			 */
-			i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
+			i = virtqueue_add_desc_split(_vq, desc, i,
+						     sg->dma_address,
+						     sg->length,
 						     VRING_DESC_F_NEXT,
 						     indirect);
 		}
 	}
 	for (; n < (out_sgs + in_sgs); n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
-			if (vring_mapping_error(vq, addr))
-				goto unmap_release;
-
 			prev = i;
 			/* Note that we trust indirect descriptor
 			 * table since it use stream DMA mapping.
 			 */
-			i = virtqueue_add_desc_split(_vq, desc, i, addr,
+			i = virtqueue_add_desc_split(_vq, desc, i,
+						     sg->dma_address,
 						     sg->length,
 						     VRING_DESC_F_NEXT |
 						     VRING_DESC_F_WRITE,
@@ -630,7 +700,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 			vq, desc, total_sg * sizeof(struct vring_desc),
 			DMA_TO_DEVICE);
 		if (vring_mapping_error(vq, addr))
-			goto unmap_release;
+			return -ENOMEM;
 
 		virtqueue_add_desc_split(_vq, vq->split.vring.desc,
 					 head, addr,
@@ -648,13 +718,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 	else
 		vq->free_head = i;
 
-	/* Store token and indirect buffer state. */
-	vq->split.desc_state[head].data = data;
-	if (indirect)
-		vq->split.desc_state[head].indir_desc = desc;
-	else
-		vq->split.desc_state[head].indir_desc = ctx;
-
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
 	avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
@@ -677,30 +740,52 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 		virtqueue_kick(_vq);
 
 	return 0;
+}
 
-unmap_release:
-	err_idx = i;
+static inline int virtqueue_add_split(struct virtqueue *_vq,
+				      struct scatterlist *sgs[],
+				      unsigned int total_sg,
+				      unsigned int out_sgs,
+				      unsigned int in_sgs,
+				      void *data,
+				      void *ctx,
+				      gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_desc *desc;
+	int head;
+	int err;
 
-	if (indirect)
-		i = 0;
-	else
-		i = head;
+	START_USE(vq);
 
-	for (n = 0; n < total_sg; n++) {
-		if (i == err_idx)
-			break;
-		if (indirect) {
-			vring_unmap_one_split_indirect(vq, &desc[i]);
-			i = virtio16_to_cpu(_vq->vdev, desc[i].next);
-		} else
-			i = vring_unmap_one_split(vq, i);
-	}
+	/* check vq state and try to alloc desc for indirect. */
+	err = virtqueue_add_split_prepare(vq, total_sg, out_sgs, data, ctx, gfp, &desc);
+	if (err)
+		goto end;
 
-	if (indirect)
-		kfree(desc);
+	err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+	if (err)
+		goto err;
 
+	head = vq->free_head;
+	err = virtqueue_add_split_vring(vq, sgs, total_sg, out_sgs, in_sgs, desc);
+	if (err)
+		goto err;
+
+	/* Store token and indirect buffer state. */
+	vq->split.desc_state[head].data = data;
+	vq->split.desc_state[head].indir_desc = desc ? desc : ctx;
+
+	goto end;
+
+err:
+	virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+
+	kfree(desc);
+
+end:
 	END_USE(vq);
-	return -ENOMEM;
+	return err;
 }
 
 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
-- 
2.32.0.3.g01195cf9f

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH vhost 02/10] virtio_ring: packed: separate prepare code from virtuque_add_indirect_packed()
  2023-02-14  7:26 [PATCH vhost 00/10] virtio core prepares for AF_XDP Xuan Zhuo
  2023-02-14  7:26 ` [PATCH vhost 01/10] virtio_ring: split: refactor virtqueue_add_split() for premapped Xuan Zhuo
@ 2023-02-14  7:26 ` Xuan Zhuo
  2023-02-20  5:37   ` Jason Wang
  2023-02-14  7:26 ` [PATCH vhost 03/10] virtio_ring: packed: refactor virtqueue_add_packed() for premapped Xuan Zhuo
                   ` (8 subsequent siblings)
  10 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-14  7:26 UTC (permalink / raw)
  To: virtualization; +Cc: Michael S. Tsirkin

Separating the logic of allocating indirect desc and checking queue
status to the upper layer function.

The proposal of this is convenient to refactor virtqueue_add_packed()
for premapped.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 560ee30d942c..42b1ff87518e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1330,25 +1330,14 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 					 unsigned int out_sgs,
 					 unsigned int in_sgs,
 					 void *data,
-					 gfp_t gfp)
+					 struct vring_packed_desc *desc)
 {
-	struct vring_packed_desc *desc;
 	struct scatterlist *sg;
 	unsigned int i, n, err_idx;
 	u16 head, id;
 	dma_addr_t addr;
 
 	head = vq->packed.next_avail_idx;
-	desc = alloc_indirect_packed(total_sg, gfp);
-	if (!desc)
-		return -ENOMEM;
-
-	if (unlikely(vq->vq.num_free < 1)) {
-		pr_debug("Can't add buf len 1 - avail = 0\n");
-		kfree(desc);
-		END_USE(vq);
-		return -ENOSPC;
-	}
 
 	i = 0;
 	id = vq->free_head;
@@ -1470,11 +1459,17 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 	BUG_ON(total_sg == 0);
 
 	if (virtqueue_use_indirect(vq, total_sg)) {
-		err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
-						    in_sgs, data, gfp);
-		if (err != -ENOMEM) {
-			END_USE(vq);
-			return err;
+		desc = alloc_indirect_packed(total_sg, gfp);
+		if (desc) {
+			if (unlikely(vq->vq.num_free < 1)) {
+				pr_debug("Can't add buf len 1 - avail = 0\n");
+				kfree(desc);
+				END_USE(vq);
+				return -ENOSPC;
+			}
+
+			return virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
+							     in_sgs, data, desc);
 		}
 
 		/* fall back on direct */
-- 
2.32.0.3.g01195cf9f

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH vhost 03/10] virtio_ring: packed: refactor virtqueue_add_packed() for premapped
  2023-02-14  7:26 [PATCH vhost 00/10] virtio core prepares for AF_XDP Xuan Zhuo
  2023-02-14  7:26 ` [PATCH vhost 01/10] virtio_ring: split: refactor virtqueue_add_split() for premapped Xuan Zhuo
  2023-02-14  7:26 ` [PATCH vhost 02/10] virtio_ring: packed: separate prepare code from virtuque_add_indirect_packed() Xuan Zhuo
@ 2023-02-14  7:26 ` Xuan Zhuo
  2023-02-20  5:37   ` Jason Wang
  2023-02-14  7:26 ` [PATCH vhost 04/10] virtio_ring: split: introduce virtqueue_add_split_premapped() Xuan Zhuo
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-14  7:26 UTC (permalink / raw)
  To: virtualization; +Cc: Michael S. Tsirkin

DMA-related logic is separated from virtqueue_add_packed to prepare for
the subsequent support for premapped.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 150 ++++++++++++++++++-----------------
 1 file changed, 78 insertions(+), 72 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 42b1ff87518e..47b6f9152f9f 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1329,7 +1329,6 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 					 unsigned int total_sg,
 					 unsigned int out_sgs,
 					 unsigned int in_sgs,
-					 void *data,
 					 struct vring_packed_desc *desc)
 {
 	struct scatterlist *sg;
@@ -1345,14 +1344,9 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 
 	for (n = 0; n < out_sgs + in_sgs; n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			addr = vring_map_one_sg(vq, sg, n < out_sgs ?
-					DMA_TO_DEVICE : DMA_FROM_DEVICE);
-			if (vring_mapping_error(vq, addr))
-				goto unmap_release;
-
 			desc[i].flags = cpu_to_le16(n < out_sgs ?
 						0 : VRING_DESC_F_WRITE);
-			desc[i].addr = cpu_to_le64(addr);
+			desc[i].addr = cpu_to_le64(sg->dma_address);
 			desc[i].len = cpu_to_le32(sg->length);
 			i++;
 		}
@@ -1363,7 +1357,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 			total_sg * sizeof(struct vring_packed_desc),
 			DMA_TO_DEVICE);
 	if (vring_mapping_error(vq, addr))
-		goto unmap_release;
+		return -ENOMEM;
 
 	vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
 	vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
@@ -1404,53 +1398,30 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 
 	/* Store token and indirect buffer state. */
 	vq->packed.desc_state[id].num = 1;
-	vq->packed.desc_state[id].data = data;
 	vq->packed.desc_state[id].indir_desc = desc;
 	vq->packed.desc_state[id].last = id;
 
 	vq->num_added += 1;
 
 	pr_debug("Added buffer head %i to %p\n", head, vq);
-	END_USE(vq);
 
 	return 0;
-
-unmap_release:
-	err_idx = i;
-
-	for (i = 0; i < err_idx; i++)
-		vring_unmap_desc_packed(vq, &desc[i]);
-
-	kfree(desc);
-
-	END_USE(vq);
-	return -ENOMEM;
 }
 
-static inline int virtqueue_add_packed(struct virtqueue *_vq,
-				       struct scatterlist *sgs[],
-				       unsigned int total_sg,
-				       unsigned int out_sgs,
-				       unsigned int in_sgs,
-				       void *data,
-				       void *ctx,
-				       gfp_t gfp)
+static inline int virtqueue_add_packed_prepare(struct vring_virtqueue *vq,
+					       unsigned int total_sg,
+					       void *data,
+					       void *ctx,
+					       struct vring_packed_desc **pdesc,
+					       gfp_t gfp)
 {
-	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct vring_packed_desc *desc;
-	struct scatterlist *sg;
-	unsigned int i, n, c, descs_used, err_idx;
-	__le16 head_flags, flags;
-	u16 head, id, prev, curr, avail_used_flags;
-	int err;
-
-	START_USE(vq);
+	unsigned int descs_used;
 
 	BUG_ON(data == NULL);
 	BUG_ON(ctx && vq->indirect);
 
 	if (unlikely(vq->broken)) {
-		END_USE(vq);
 		return -EIO;
 	}
 
@@ -1458,39 +1429,55 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 
 	BUG_ON(total_sg == 0);
 
+	desc = NULL;
+
 	if (virtqueue_use_indirect(vq, total_sg)) {
 		desc = alloc_indirect_packed(total_sg, gfp);
 		if (desc) {
 			if (unlikely(vq->vq.num_free < 1)) {
 				pr_debug("Can't add buf len 1 - avail = 0\n");
 				kfree(desc);
-				END_USE(vq);
 				return -ENOSPC;
 			}
 
-			return virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
-							     in_sgs, data, desc);
+			return 0;
 		}
 
 		/* fall back on direct */
 	}
 
-	head = vq->packed.next_avail_idx;
-	avail_used_flags = vq->packed.avail_used_flags;
-
 	WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
 
-	desc = vq->packed.vring.desc;
-	i = head;
 	descs_used = total_sg;
 
 	if (unlikely(vq->vq.num_free < descs_used)) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
 			 descs_used, vq->vq.num_free);
-		END_USE(vq);
 		return -ENOSPC;
 	}
 
+	*pdesc = desc;
+
+	return 0;
+}
+
+static void virtqueue_add_packed_vring(struct vring_virtqueue *vq,
+				       struct scatterlist *sgs[],
+				       unsigned int total_sg,
+				       unsigned int out_sgs,
+				       unsigned int in_sgs)
+{
+	struct vring_packed_desc *desc;
+	struct scatterlist *sg;
+	unsigned int i, n, c, descs_used;
+	__le16 head_flags, flags;
+	u16 head, id, prev, curr;
+
+	desc = vq->packed.vring.desc;
+	head = vq->packed.next_avail_idx;
+	i = head;
+	descs_used = total_sg;
+
 	id = vq->free_head;
 	BUG_ON(id == vq->packed.vring.num);
 
@@ -1498,11 +1485,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 	c = 0;
 	for (n = 0; n < out_sgs + in_sgs; n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
-					DMA_TO_DEVICE : DMA_FROM_DEVICE);
-			if (vring_mapping_error(vq, addr))
-				goto unmap_release;
-
 			flags = cpu_to_le16(vq->packed.avail_used_flags |
 				    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
 				    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
@@ -1511,12 +1493,12 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 			else
 				desc[i].flags = flags;
 
-			desc[i].addr = cpu_to_le64(addr);
+			desc[i].addr = cpu_to_le64(sg->dma_address);
 			desc[i].len = cpu_to_le32(sg->length);
 			desc[i].id = cpu_to_le16(id);
 
 			if (unlikely(vq->use_dma_api)) {
-				vq->packed.desc_extra[curr].addr = addr;
+				vq->packed.desc_extra[curr].addr = sg->dma_address;
 				vq->packed.desc_extra[curr].len = sg->length;
 				vq->packed.desc_extra[curr].flags =
 					le16_to_cpu(flags);
@@ -1545,8 +1527,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 
 	/* Store token. */
 	vq->packed.desc_state[id].num = descs_used;
-	vq->packed.desc_state[id].data = data;
-	vq->packed.desc_state[id].indir_desc = ctx;
 	vq->packed.desc_state[id].last = prev;
 
 	/*
@@ -1559,29 +1539,55 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 	vq->num_added += descs_used;
 
 	pr_debug("Added buffer head %i to %p\n", head, vq);
-	END_USE(vq);
+}
 
-	return 0;
+static inline int virtqueue_add_packed(struct virtqueue *_vq,
+				       struct scatterlist *sgs[],
+				       unsigned int total_sg,
+				       unsigned int out_sgs,
+				       unsigned int in_sgs,
+				       void *data,
+				       void *ctx,
+				       gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_packed_desc *desc;
+	u16 id;
+	int err;
 
-unmap_release:
-	err_idx = i;
-	i = head;
-	curr = vq->free_head;
+	START_USE(vq);
 
-	vq->packed.avail_used_flags = avail_used_flags;
+	/* check vq state and try to alloc desc for indirect. */
+	err = virtqueue_add_packed_prepare(vq, total_sg, data, ctx, &desc, gfp);
+	if (err)
+		goto end;
 
-	for (n = 0; n < total_sg; n++) {
-		if (i == err_idx)
-			break;
-		vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
-		curr = vq->packed.desc_extra[curr].next;
-		i++;
-		if (i >= vq->packed.vring.num)
-			i = 0;
+	err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+	if (err)
+		goto err;
+
+	id = vq->free_head;
+
+	if (desc) {
+		err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, in_sgs, desc);
+		if (err)
+			goto err;
+	} else {
+		virtqueue_add_packed_vring(vq, sgs, total_sg, out_sgs, in_sgs);
+		vq->packed.desc_state[id].indir_desc = ctx;
 	}
 
+	vq->packed.desc_state[id].data = data;
+
+	goto end;
+
+err:
+	virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+	kfree(desc);
+
+end:
 	END_USE(vq);
-	return -EIO;
+	return err;
 }
 
 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
-- 
2.32.0.3.g01195cf9f

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH vhost 04/10] virtio_ring: split: introduce virtqueue_add_split_premapped()
  2023-02-14  7:26 [PATCH vhost 00/10] virtio core prepares for AF_XDP Xuan Zhuo
                   ` (2 preceding siblings ...)
  2023-02-14  7:26 ` [PATCH vhost 03/10] virtio_ring: packed: refactor virtqueue_add_packed() for premapped Xuan Zhuo
@ 2023-02-14  7:26 ` Xuan Zhuo
  2023-02-20  5:38   ` Jason Wang
  2023-02-14  7:26 ` [PATCH vhost 05/10] virtio_ring: packed: introduce virtqueue_add_packed_premapped() Xuan Zhuo
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-14  7:26 UTC (permalink / raw)
  To: virtualization; +Cc: Michael S. Tsirkin

virtqueue_add_split() only supports virtual addresses, dma is completed
in virtqueue_add_split().

In some scenarios (such as the AF_XDP scenario), the memory is allocated
and DMA is completed in advance, so it is necessary for us to support
passing the DMA address to virtio core.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 100 +++++++++++++++++++++++++++++++++--
 include/linux/virtio.h       |   5 ++
 2 files changed, 100 insertions(+), 5 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 47b6f9152f9f..a31155abe101 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -70,6 +70,7 @@
 struct vring_desc_state_split {
 	void *data;			/* Data for callback. */
 	struct vring_desc *indir_desc;	/* Indirect descriptor, if any. */
+	bool premapped;
 };
 
 struct vring_desc_state_packed {
@@ -440,7 +441,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
 }
 
 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
-					  unsigned int i)
+					  unsigned int i, bool premapped)
 {
 	struct vring_desc_extra *extra = vq->split.desc_extra;
 	u16 flags;
@@ -457,6 +458,9 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
 				 (flags & VRING_DESC_F_WRITE) ?
 				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 	} else {
+		if (premapped)
+			goto out;
+
 		dma_unmap_page(vring_dma_dev(vq),
 			       extra[i].addr,
 			       extra[i].len,
@@ -788,6 +792,47 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 	return err;
 }
 
+static inline int virtqueue_add_split_premapped(struct virtqueue *_vq,
+						struct scatterlist *sgs[],
+						unsigned int total_sg,
+						unsigned int out_sgs,
+						unsigned int in_sgs,
+						void *data,
+						void *ctx,
+						gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_desc *desc;
+	int head;
+	int err;
+
+	START_USE(vq);
+
+	/* check vq state and try to alloc desc for indirect. */
+	err = virtqueue_add_split_prepare(vq, total_sg, out_sgs, data, ctx, gfp, &desc);
+	if (err)
+		goto end;
+
+	head = vq->free_head;
+	err = virtqueue_add_split_vring(vq, sgs, total_sg, out_sgs, in_sgs, desc);
+	if (err)
+		goto err;
+
+	/* Store token and indirect buffer state. */
+	vq->split.desc_state[head].data = data;
+	vq->split.desc_state[head].indir_desc = desc ? desc : ctx;
+	vq->split.desc_state[head].premapped = true;
+
+	goto end;
+
+err:
+	kfree(desc);
+
+end:
+	END_USE(vq);
+	return err;
+}
+
 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -824,20 +869,23 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
 {
 	unsigned int i, j;
 	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
+	bool premapped;
 
 	/* Clear data ptr. */
 	vq->split.desc_state[head].data = NULL;
 
+	premapped = vq->split.desc_state[head].premapped;
+
 	/* Put back on free list: unmap first-level descriptors and find end */
 	i = head;
 
 	while (vq->split.vring.desc[i].flags & nextflag) {
-		vring_unmap_one_split(vq, i);
+		vring_unmap_one_split(vq, i, premapped);
 		i = vq->split.desc_extra[i].next;
 		vq->vq.num_free++;
 	}
 
-	vring_unmap_one_split(vq, i);
+	vring_unmap_one_split(vq, i, premapped);
 	vq->split.desc_extra[i].next = vq->free_head;
 	vq->free_head = head;
 
@@ -859,8 +907,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
 				VRING_DESC_F_INDIRECT));
 		BUG_ON(len == 0 || len % sizeof(struct vring_desc));
 
-		for (j = 0; j < len / sizeof(struct vring_desc); j++)
-			vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+		if (!premapped) {
+			for (j = 0; j < len / sizeof(struct vring_desc); j++)
+				vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+		}
 
 		kfree(indir_desc);
 		vq->split.desc_state[head].indir_desc = NULL;
@@ -2204,6 +2254,21 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 					out_sgs, in_sgs, data, ctx, gfp);
 }
 
+static inline int virtqueue_add_premapped(struct virtqueue *_vq,
+					  struct scatterlist *sgs[],
+					  unsigned int total_sg,
+					  unsigned int out_sgs,
+					  unsigned int in_sgs,
+					  void *data,
+					  void *ctx,
+					  gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return virtqueue_add_split_premapped(_vq, sgs, total_sg, out_sgs,
+					     in_sgs, data, ctx, gfp);
+}
+
 /**
  * virtqueue_add_sgs - expose buffers to other end
  * @_vq: the struct virtqueue we're talking about.
@@ -2261,6 +2326,31 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
 
+/**
+ * virtqueue_add_outbuf_premapped - expose output buffers to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg readable by other side
+ * @data: the token identifying the buffer.
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * It is required that all addrs have completed DMA operations. And use
+ * sg->dma_address, sg->length to pass addr and length.
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
+				   struct scatterlist *sg, unsigned int num,
+				   void *data,
+				   gfp_t gfp)
+{
+	return virtqueue_add_premapped(vq, &sg, num, 1, 0, data, NULL, gfp);
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped);
+
 /**
  * virtqueue_add_inbuf - expose input buffers to other end
  * @vq: the struct virtqueue we're talking about.
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index dcab9c7e8784..d8b472a7dcae 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -43,6 +43,11 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
 			 void *data,
 			 gfp_t gfp);
 
+int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
+				   struct scatterlist *sg, unsigned int num,
+				   void *data,
+				   gfp_t gfp);
+
 int virtqueue_add_inbuf(struct virtqueue *vq,
 			struct scatterlist sg[], unsigned int num,
 			void *data,
-- 
2.32.0.3.g01195cf9f

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH vhost 05/10] virtio_ring: packed: introduce virtqueue_add_packed_premapped()
  2023-02-14  7:26 [PATCH vhost 00/10] virtio core prepares for AF_XDP Xuan Zhuo
                   ` (3 preceding siblings ...)
  2023-02-14  7:26 ` [PATCH vhost 04/10] virtio_ring: split: introduce virtqueue_add_split_premapped() Xuan Zhuo
@ 2023-02-14  7:26 ` Xuan Zhuo
  2023-02-14  7:27 ` [PATCH vhost 06/10] virtio_ring: introduce virtqueue_add_inbuf_premapped() Xuan Zhuo
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-14  7:26 UTC (permalink / raw)
  To: virtualization; +Cc: Michael S. Tsirkin

virtqueue_add_packed() only supports virtual addresses, dma is completed
in virtqueue_add_packed().

In some scenarios (such as the AF_XDP scenario), the memory is allocated
and DMA is completed in advance, so it is necessary for us to support
passing the DMA address to virtio core.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 67 ++++++++++++++++++++++++++++++++----
 1 file changed, 61 insertions(+), 6 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a31155abe101..79244ccbae9e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -78,6 +78,7 @@ struct vring_desc_state_packed {
 	struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
 	u16 num;			/* Descriptor list length. */
 	u16 last;			/* The last desc state in a list. */
+	bool premapped;
 };
 
 struct vring_desc_extra {
@@ -1318,7 +1319,8 @@ static inline u16 packed_last_used(u16 last_used_idx)
 }
 
 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
-				     struct vring_desc_extra *extra)
+				     struct vring_desc_extra *extra,
+				     bool premapped)
 {
 	u16 flags;
 
@@ -1333,6 +1335,9 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
 				 (flags & VRING_DESC_F_WRITE) ?
 				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 	} else {
+		if (premapped)
+			return;
+
 		dma_unmap_page(vring_dma_dev(vq),
 			       extra->addr, extra->len,
 			       (flags & VRING_DESC_F_WRITE) ?
@@ -1382,7 +1387,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 					 struct vring_packed_desc *desc)
 {
 	struct scatterlist *sg;
-	unsigned int i, n, err_idx;
+	unsigned int i, n;
 	u16 head, id;
 	dma_addr_t addr;
 
@@ -1640,6 +1645,51 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 	return err;
 }
 
+static inline int virtqueue_add_packed_premapped(struct virtqueue *_vq,
+						 struct scatterlist *sgs[],
+						 unsigned int total_sg,
+						 unsigned int out_sgs,
+						 unsigned int in_sgs,
+						 void *data,
+						 void *ctx,
+						 gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_packed_desc *desc;
+	u16 id;
+	int err;
+
+	START_USE(vq);
+
+	/* check vq state and try to alloc desc for indirect. */
+	err = virtqueue_add_packed_prepare(vq, total_sg, data, ctx, &desc, gfp);
+	if (err)
+		goto end;
+
+	id = vq->free_head;
+
+	if (desc) {
+		err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, in_sgs, desc);
+		if (err)
+			goto err;
+	} else {
+		virtqueue_add_packed_vring(vq, sgs, total_sg, out_sgs, in_sgs);
+		vq->packed.desc_state[id].indir_desc = ctx;
+	}
+
+	vq->packed.desc_state[id].data = data;
+	vq->packed.desc_state[id].premapped = true;
+
+	goto end;
+
+err:
+	kfree(desc);
+
+end:
+	END_USE(vq);
+	return err;
+}
+
 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -1695,8 +1745,10 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
 	struct vring_desc_state_packed *state = NULL;
 	struct vring_packed_desc *desc;
 	unsigned int i, curr;
+	bool premapped;
 
 	state = &vq->packed.desc_state[id];
+	premapped = state->premapped;
 
 	/* Clear data ptr. */
 	state->data = NULL;
@@ -1709,7 +1761,8 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
 		curr = id;
 		for (i = 0; i < state->num; i++) {
 			vring_unmap_extra_packed(vq,
-						 &vq->packed.desc_extra[curr]);
+						 &vq->packed.desc_extra[curr],
+						 premapped);
 			curr = vq->packed.desc_extra[curr].next;
 		}
 	}
@@ -1722,7 +1775,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
 		if (!desc)
 			return;
 
-		if (vq->use_dma_api) {
+		if (vq->use_dma_api && !premapped) {
 			len = vq->packed.desc_extra[id].len;
 			for (i = 0; i < len / sizeof(struct vring_packed_desc);
 					i++)
@@ -2265,8 +2318,10 @@ static inline int virtqueue_add_premapped(struct virtqueue *_vq,
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	return virtqueue_add_split_premapped(_vq, sgs, total_sg, out_sgs,
-					     in_sgs, data, ctx, gfp);
+	return vq->packed_ring ? virtqueue_add_packed_premapped(_vq, sgs, total_sg, out_sgs,
+								in_sgs, data, ctx, gfp) :
+				virtqueue_add_split_premapped(_vq, sgs, total_sg, out_sgs,
+							      in_sgs, data, ctx, gfp);
 }
 
 /**
-- 
2.32.0.3.g01195cf9f

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH vhost 06/10] virtio_ring: introduce virtqueue_add_inbuf_premapped()
  2023-02-14  7:26 [PATCH vhost 00/10] virtio core prepares for AF_XDP Xuan Zhuo
                   ` (4 preceding siblings ...)
  2023-02-14  7:26 ` [PATCH vhost 05/10] virtio_ring: packed: introduce virtqueue_add_packed_premapped() Xuan Zhuo
@ 2023-02-14  7:27 ` Xuan Zhuo
  2023-02-14  7:27 ` [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma Xuan Zhuo
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-14  7:27 UTC (permalink / raw)
  To: virtualization; +Cc: Michael S. Tsirkin

Introduce virtqueue_add_inbuf_premapped() to submit premapped sgs.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 25 +++++++++++++++++++++++++
 include/linux/virtio.h       |  5 +++++
 2 files changed, 30 insertions(+)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 79244ccbae9e..cd9364eb2345 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2452,6 +2452,31 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
 
+/**
+ * virtqueue_add_inbuf_premapped - expose input buffers to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg writable by other side
+ * @data: the token identifying the buffer.
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * It is required that all addrs have completed DMA operations. And use
+ * sg->dma_address, sg->length to pass addr and length.
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_inbuf_premapped(struct virtqueue *vq,
+				  struct scatterlist *sg, unsigned int num,
+				  void *data,
+				  gfp_t gfp)
+{
+	return virtqueue_add_premapped(vq, &sg, num, 0, 1, data, NULL, gfp);
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped);
+
 /**
  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
  * @_vq: the struct virtqueue
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index d8b472a7dcae..3ebb346ebb7c 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -59,6 +59,11 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
 			    void *ctx,
 			    gfp_t gfp);
 
+int virtqueue_add_inbuf_premapped(struct virtqueue *vq,
+				  struct scatterlist *sg, unsigned int num,
+				  void *data,
+				  gfp_t gfp);
+
 int virtqueue_add_sgs(struct virtqueue *vq,
 		      struct scatterlist *sgs[],
 		      unsigned int out_sgs,
-- 
2.32.0.3.g01195cf9f

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-02-14  7:26 [PATCH vhost 00/10] virtio core prepares for AF_XDP Xuan Zhuo
                   ` (5 preceding siblings ...)
  2023-02-14  7:27 ` [PATCH vhost 06/10] virtio_ring: introduce virtqueue_add_inbuf_premapped() Xuan Zhuo
@ 2023-02-14  7:27 ` Xuan Zhuo
  2023-02-20  5:38   ` Jason Wang
  2023-02-14  7:27 ` [PATCH vhost 08/10] virtio_ring: introduce dma sync api for virtio Xuan Zhuo
                   ` (3 subsequent siblings)
  10 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-14  7:27 UTC (permalink / raw)
  To: virtualization; +Cc: Michael S. Tsirkin

Added virtio_dma_map() to map DMA addresses for virtual memory in
advance. The purpose is to keep memory mapped across multiple add/get
buf operations.

Added virtio_dma_unmap() for unmap DMA address.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
 include/linux/virtio.h       |  9 ++++
 2 files changed, 101 insertions(+)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index cd9364eb2345..855338609c7f 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
 
+/**
+ * virtio_dma_map_page - get the DMA addr of the memory for virtio device
+ * @dev: virtio device
+ * @page: the page of the memory to DMA
+ * @offset: the offset of the memory inside page
+ * @length: memory length
+ * @dir: DMA direction
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ *
+ * Returns the DMA addr. DMA_MAPPING_ERROR means error.
+ */
+dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
+			       unsigned int length, enum dma_data_direction dir)
+{
+	struct virtio_device *vdev = dev_to_virtio(dev);
+
+	if (!vring_use_dma_api(vdev))
+		return page_to_phys(page) + offset;
+
+	return dma_map_page(vdev->dev.parent, page, offset, length, dir);
+}
+
+/**
+ * virtio_dma_map - get the DMA addr of the memory for virtio device
+ * @dev: virtio device
+ * @addr: the addr to DMA
+ * @length: memory length
+ * @dir: DMA direction
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ *
+ * Returns the DMA addr.
+ */
+dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
+			  enum dma_data_direction dir)
+{
+	struct page *page;
+	size_t offset;
+
+	page = virt_to_page(addr);
+	offset = offset_in_page(addr);
+
+	return virtio_dma_map_page(dev, page, offset, length, dir);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_map);
+
+/**
+ * virtio_dma_mapping_error - check dma address
+ * @dev: virtio device
+ * @addr: DMA address
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ *
+ * Returns 0 means dma valid. Other means invalid dma address.
+ */
+int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
+{
+	struct virtio_device *vdev = dev_to_virtio(dev);
+
+	if (!vring_use_dma_api(vdev))
+		return 0;
+
+	return dma_mapping_error(vdev->dev.parent, addr);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
+
+/**
+ * virtio_dma_unmap - unmap DMA addr
+ * @dev: virtio device
+ * @dma: DMA address
+ * @length: memory length
+ * @dir: DMA direction
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ */
+void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
+		      enum dma_data_direction dir)
+{
+	struct virtio_device *vdev = dev_to_virtio(dev);
+
+	if (!vring_use_dma_api(vdev))
+		return;
+
+	dma_unmap_page(vdev->dev.parent, dma, length, dir);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_unmap);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 3ebb346ebb7c..b5fa71476737 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -9,6 +9,7 @@
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
 #include <linux/gfp.h>
+#include <linux/dma-mapping.h>
 
 /**
  * struct virtqueue - a queue to register buffers for sending or receiving.
@@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
 #define module_virtio_driver(__virtio_driver) \
 	module_driver(__virtio_driver, register_virtio_driver, \
 			unregister_virtio_driver)
+
+dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
+			       unsigned int length, enum dma_data_direction dir);
+dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
+			  enum dma_data_direction dir);
+int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
+void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
+		      enum dma_data_direction dir);
 #endif /* _LINUX_VIRTIO_H */
-- 
2.32.0.3.g01195cf9f

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH vhost 08/10] virtio_ring: introduce dma sync api for virtio
  2023-02-14  7:26 [PATCH vhost 00/10] virtio core prepares for AF_XDP Xuan Zhuo
                   ` (6 preceding siblings ...)
  2023-02-14  7:27 ` [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma Xuan Zhuo
@ 2023-02-14  7:27 ` Xuan Zhuo
  2023-02-20  5:38   ` Jason Wang
  2023-02-14  7:27 ` [PATCH vhost 09/10] virtio_ring: correct the expression of the description of virtqueue_resize() Xuan Zhuo
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-14  7:27 UTC (permalink / raw)
  To: virtualization; +Cc: Michael S. Tsirkin

These API has been introduced:

* virtio_dma_need_sync
* virtio_dma_sync_single_range_for_cpu
* virtio_dma_sync_single_range_for_device

These APIs can be used together with the premapped mechanism to sync the
DMA address.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 70 ++++++++++++++++++++++++++++++++++++
 include/linux/virtio.h       |  8 +++++
 2 files changed, 78 insertions(+)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 855338609c7f..84129b8c3e2a 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -3264,4 +3264,74 @@ void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
 }
 EXPORT_SYMBOL_GPL(virtio_dma_unmap);
 
+/**
+ * virtio_dma_need_sync - check a dma address needs sync
+ * @dev: virtio device
+ * @addr: DMA address
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ */
+bool virtio_dma_need_sync(struct device *dev, dma_addr_t addr)
+{
+	struct virtio_device *vdev = dev_to_virtio(dev);
+
+	if (!vring_use_dma_api(vdev))
+		return 0;
+
+	return dma_need_sync(vdev->dev.parent, addr);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_need_sync);
+
+/**
+ * virtio_dma_sync_single_range_for_cpu - dma sync for cpu
+ * @dev: virtio device
+ * @addr: DMA address
+ * @offset: DMA address offset
+ * @size: mem size for sync
+ * @dir: DMA direction
+ *
+ * Before calling this function, use virtio_dma_need_sync() to confirm that the
+ * DMA address really needs to be synchronized
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ */
+void virtio_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr,
+					  unsigned long offset, size_t size,
+					  enum dma_data_direction dir)
+{
+	struct virtio_device *vdev = dev_to_virtio(dev);
+
+	dma_sync_single_range_for_cpu(vdev->dev.parent, addr, offset,
+				      size, DMA_BIDIRECTIONAL);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_cpu);
+
+/**
+ * virtio_dma_sync_single_range_for_device - dma sync for device
+ * @dev: virtio device
+ * @addr: DMA address
+ * @offset: DMA address offset
+ * @size: mem size for sync
+ * @dir: DMA direction
+ *
+ * Before calling this function, use virtio_dma_need_sync() to confirm that the
+ * DMA address really needs to be synchronized
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ */
+void virtio_dma_sync_single_range_for_device(struct device *dev,
+					     dma_addr_t addr,
+					     unsigned long offset, size_t size,
+					     enum dma_data_direction dir)
+{
+	struct virtio_device *vdev = dev_to_virtio(dev);
+
+	dma_sync_single_range_for_device(vdev->dev.parent, addr, offset,
+					 size, DMA_BIDIRECTIONAL);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_device);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b5fa71476737..d0e707d744a0 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -225,4 +225,12 @@ dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
 int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
 void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
 		      enum dma_data_direction dir);
+bool virtio_dma_need_sync(struct device *dev, dma_addr_t addr);
+void virtio_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr,
+					  unsigned long offset, size_t size,
+					  enum dma_data_direction dir);
+void virtio_dma_sync_single_range_for_device(struct device *dev,
+					     dma_addr_t addr,
+					     unsigned long offset, size_t size,
+					     enum dma_data_direction dir);
 #endif /* _LINUX_VIRTIO_H */
-- 
2.32.0.3.g01195cf9f

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH vhost 09/10] virtio_ring: correct the expression of the description of virtqueue_resize()
  2023-02-14  7:26 [PATCH vhost 00/10] virtio core prepares for AF_XDP Xuan Zhuo
                   ` (7 preceding siblings ...)
  2023-02-14  7:27 ` [PATCH vhost 08/10] virtio_ring: introduce dma sync api for virtio Xuan Zhuo
@ 2023-02-14  7:27 ` Xuan Zhuo
  2023-02-20  5:38   ` Jason Wang
  2023-02-14  7:27 ` [PATCH vhost 10/10] virtio_ring: introduce virtqueue_reset() Xuan Zhuo
  2023-02-16  5:27 ` [PATCH vhost 00/10] virtio core prepares for AF_XDP Jason Wang
  10 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-14  7:27 UTC (permalink / raw)
  To: virtualization; +Cc: Michael S. Tsirkin

Modify the "useless" to a more accurate "unused".

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 84129b8c3e2a..2ba60a14f557 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2865,7 +2865,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma);
  * virtqueue_resize - resize the vring of vq
  * @_vq: the struct virtqueue we're talking about.
  * @num: new ring num
- * @recycle: callback for recycle the useless buffer
+ * @recycle: callback to recycle unused buffers
  *
  * When it is really necessary to create a new vring, it will set the current vq
  * into the reset state. Then call the passed callback to recycle the buffer
-- 
2.32.0.3.g01195cf9f

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH vhost 10/10] virtio_ring: introduce virtqueue_reset()
  2023-02-14  7:26 [PATCH vhost 00/10] virtio core prepares for AF_XDP Xuan Zhuo
                   ` (8 preceding siblings ...)
  2023-02-14  7:27 ` [PATCH vhost 09/10] virtio_ring: correct the expression of the description of virtqueue_resize() Xuan Zhuo
@ 2023-02-14  7:27 ` Xuan Zhuo
  2023-02-20  5:38   ` Jason Wang
  2023-02-16  5:27 ` [PATCH vhost 00/10] virtio core prepares for AF_XDP Jason Wang
  10 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-14  7:27 UTC (permalink / raw)
  To: virtualization; +Cc: Michael S. Tsirkin

Introduce virtqueue_reset() to release all buffer inside vq.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 50 ++++++++++++++++++++++++++++++++++++
 include/linux/virtio.h       |  2 ++
 2 files changed, 52 insertions(+)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 2ba60a14f557..2750a365439a 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2930,6 +2930,56 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
 }
 EXPORT_SYMBOL_GPL(virtqueue_resize);
 
+/**
+ * virtqueue_reset - detach and recycle all unused buffers
+ * @_vq: the struct virtqueue we're talking about.
+ * @recycle: callback to recycle unused buffers
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error.
+ * 0: success.
+ * -EBUSY: Failed to sync with device, vq may not work properly
+ * -ENOENT: Transport or device not supported
+ * -EPERM: Operation not permitted
+ */
+int virtqueue_reset(struct virtqueue *_vq,
+		    void (*recycle)(struct virtqueue *vq, void *buf))
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct virtio_device *vdev = vq->vq.vdev;
+	void *buf;
+	int err;
+
+	if (!vq->we_own_ring)
+		return -EPERM;
+
+	if (!vdev->config->disable_vq_and_reset)
+		return -ENOENT;
+
+	if (!vdev->config->enable_vq_after_reset)
+		return -ENOENT;
+
+	err = vdev->config->disable_vq_and_reset(_vq);
+	if (err)
+		return err;
+
+	while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
+		recycle(_vq, buf);
+
+	if (vq->packed_ring)
+		virtqueue_reinit_packed(vq);
+	else
+		virtqueue_reinit_split(vq);
+
+	if (vdev->config->enable_vq_after_reset(_vq))
+		return -EBUSY;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(virtqueue_reset);
+
 /* Only available for split ring */
 struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      unsigned int num,
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index d0e707d744a0..cf4c157e4e75 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -106,6 +106,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
 
 int virtqueue_resize(struct virtqueue *vq, u32 num,
 		     void (*recycle)(struct virtqueue *vq, void *buf));
+int virtqueue_reset(struct virtqueue *vq,
+		    void (*recycle)(struct virtqueue *vq, void *buf));
 
 /**
  * struct virtio_device - representation of a device using virtio
-- 
2.32.0.3.g01195cf9f

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 00/10] virtio core prepares for AF_XDP
  2023-02-14  7:26 [PATCH vhost 00/10] virtio core prepares for AF_XDP Xuan Zhuo
                   ` (9 preceding siblings ...)
  2023-02-14  7:27 ` [PATCH vhost 10/10] virtio_ring: introduce virtqueue_reset() Xuan Zhuo
@ 2023-02-16  5:27 ` Jason Wang
  2023-02-16 11:46   ` Xuan Zhuo
  10 siblings, 1 reply; 46+ messages in thread
From: Jason Wang @ 2023-02-16  5:27 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: Michael S. Tsirkin, virtualization

On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero
> copy feature of xsk (XDP socket) needs to be supported by the driver. The
> performance of zero copy is very good.
>
> ENV: Qemu with vhost.
>
>                    vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS
> -----------------------------|---------------|------------------|------------
> xmit by sockperf:     90%    |   100%        |                  |  318967
> xmit by xsk:          100%   |   30%         |   33%            | 1192064

What's the setup of this test?

CPU model/frequency, packet size, zerocopy enabled or not.

(I remember I can get better performance with my old laptop through
pktgen (about 2Mpps))

Thanks

> recv by sockperf:     100%   |   68%         |   100%           |  692288
> recv by xsk:          100%   |   33%         |   43%            |  771670
>
> Before achieving the function of Virtio-Net, we also have to let virtio core
> support these features:
>
> 1. virtio core support premapped
> 2. virtio core support reset per-queue
> 3. introduce DMA APIs to virtio core
>
> Please review.
>
> Thanks.
>
> Xuan Zhuo (10):
>   virtio_ring: split: refactor virtqueue_add_split() for premapped
>   virtio_ring: packed: separate prepare code from
>     virtuque_add_indirect_packed()
>   virtio_ring: packed: refactor virtqueue_add_packed() for premapped
>   virtio_ring: split: introduce virtqueue_add_split_premapped()
>   virtio_ring: packed: introduce virtqueue_add_packed_premapped()
>   virtio_ring: introduce virtqueue_add_inbuf_premapped()
>   virtio_ring: add api virtio_dma_map() for advance dma
>   virtio_ring: introduce dma sync api for virtio
>   virtio_ring: correct the expression of the description of
>     virtqueue_resize()
>   virtio_ring: introduce virtqueue_reset()
>
>  drivers/virtio/virtio_ring.c | 792 ++++++++++++++++++++++++++++-------
>  include/linux/virtio.h       |  29 ++
>  2 files changed, 659 insertions(+), 162 deletions(-)
>
> --
> 2.32.0.3.g01195cf9f
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 00/10] virtio core prepares for AF_XDP
  2023-02-16  5:27 ` [PATCH vhost 00/10] virtio core prepares for AF_XDP Jason Wang
@ 2023-02-16 11:46   ` Xuan Zhuo
  2023-02-17  5:23     ` Jason Wang
  0 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-16 11:46 UTC (permalink / raw)
  To: Jason Wang; +Cc: Michael S. Tsirkin, virtualization

On Thu, 16 Feb 2023 13:27:00 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero
> > copy feature of xsk (XDP socket) needs to be supported by the driver. The
> > performance of zero copy is very good.
> >
> > ENV: Qemu with vhost.
> >
> >                    vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS
> > -----------------------------|---------------|------------------|------------
> > xmit by sockperf:     90%    |   100%        |                  |  318967
> > xmit by xsk:          100%   |   30%         |   33%            | 1192064
>
> What's the setup of this test?
>
> CPU model/frequency, packet size, zerocopy enabled or not.

Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz

zerocopy: enabled

size: 64


>
> (I remember I can get better performance with my old laptop through
> pktgen (about 2Mpps))

Let's compare sockperf just.

The result of the test on Alibaba Cloud was 3.5M+PPS/60%cpu.

Thanks.


>
> Thanks
>
> > recv by sockperf:     100%   |   68%         |   100%           |  692288
> > recv by xsk:          100%   |   33%         |   43%            |  771670
> >
> > Before achieving the function of Virtio-Net, we also have to let virtio core
> > support these features:
> >
> > 1. virtio core support premapped
> > 2. virtio core support reset per-queue
> > 3. introduce DMA APIs to virtio core
> >
> > Please review.
> >
> > Thanks.
> >
> > Xuan Zhuo (10):
> >   virtio_ring: split: refactor virtqueue_add_split() for premapped
> >   virtio_ring: packed: separate prepare code from
> >     virtuque_add_indirect_packed()
> >   virtio_ring: packed: refactor virtqueue_add_packed() for premapped
> >   virtio_ring: split: introduce virtqueue_add_split_premapped()
> >   virtio_ring: packed: introduce virtqueue_add_packed_premapped()
> >   virtio_ring: introduce virtqueue_add_inbuf_premapped()
> >   virtio_ring: add api virtio_dma_map() for advance dma
> >   virtio_ring: introduce dma sync api for virtio
> >   virtio_ring: correct the expression of the description of
> >     virtqueue_resize()
> >   virtio_ring: introduce virtqueue_reset()
> >
> >  drivers/virtio/virtio_ring.c | 792 ++++++++++++++++++++++++++++-------
> >  include/linux/virtio.h       |  29 ++
> >  2 files changed, 659 insertions(+), 162 deletions(-)
> >
> > --
> > 2.32.0.3.g01195cf9f
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 00/10] virtio core prepares for AF_XDP
  2023-02-16 11:46   ` Xuan Zhuo
@ 2023-02-17  5:23     ` Jason Wang
  2023-02-17  9:02       ` Xuan Zhuo
  0 siblings, 1 reply; 46+ messages in thread
From: Jason Wang @ 2023-02-17  5:23 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: Michael S. Tsirkin, virtualization

On Thu, Feb 16, 2023 at 7:50 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Thu, 16 Feb 2023 13:27:00 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero
> > > copy feature of xsk (XDP socket) needs to be supported by the driver. The
> > > performance of zero copy is very good.
> > >
> > > ENV: Qemu with vhost.
> > >
> > >                    vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS
> > > -----------------------------|---------------|------------------|------------
> > > xmit by sockperf:     90%    |   100%        |                  |  318967
> > > xmit by xsk:          100%   |   30%         |   33%            | 1192064
> >
> > What's the setup of this test?
> >
> > CPU model/frequency, packet size, zerocopy enabled or not.
>
> Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz
>
> zerocopy: enabled
>
> size: 64
>
>
> >
> > (I remember I can get better performance with my old laptop through
> > pktgen (about 2Mpps))
>
> Let's compare sockperf just.
>
> The result of the test on Alibaba Cloud was 3.5M+PPS/60%cpu.

Just to make sure I understand here, the above said:

 xmit by sockperf:     90%    |   100%        |                  |  318967

It's 0.3 Mpps, what's the difference between those two?

Thanks

>
> Thanks.
>
>
> >
> > Thanks
> >
> > > recv by sockperf:     100%   |   68%         |   100%           |  692288
> > > recv by xsk:          100%   |   33%         |   43%            |  771670
> > >
> > > Before achieving the function of Virtio-Net, we also have to let virtio core
> > > support these features:
> > >
> > > 1. virtio core support premapped
> > > 2. virtio core support reset per-queue
> > > 3. introduce DMA APIs to virtio core
> > >
> > > Please review.
> > >
> > > Thanks.
> > >
> > > Xuan Zhuo (10):
> > >   virtio_ring: split: refactor virtqueue_add_split() for premapped
> > >   virtio_ring: packed: separate prepare code from
> > >     virtuque_add_indirect_packed()
> > >   virtio_ring: packed: refactor virtqueue_add_packed() for premapped
> > >   virtio_ring: split: introduce virtqueue_add_split_premapped()
> > >   virtio_ring: packed: introduce virtqueue_add_packed_premapped()
> > >   virtio_ring: introduce virtqueue_add_inbuf_premapped()
> > >   virtio_ring: add api virtio_dma_map() for advance dma
> > >   virtio_ring: introduce dma sync api for virtio
> > >   virtio_ring: correct the expression of the description of
> > >     virtqueue_resize()
> > >   virtio_ring: introduce virtqueue_reset()
> > >
> > >  drivers/virtio/virtio_ring.c | 792 ++++++++++++++++++++++++++++-------
> > >  include/linux/virtio.h       |  29 ++
> > >  2 files changed, 659 insertions(+), 162 deletions(-)
> > >
> > > --
> > > 2.32.0.3.g01195cf9f
> > >
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 00/10] virtio core prepares for AF_XDP
  2023-02-17  5:23     ` Jason Wang
@ 2023-02-17  9:02       ` Xuan Zhuo
  0 siblings, 0 replies; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-17  9:02 UTC (permalink / raw)
  To: Jason Wang; +Cc: Michael S. Tsirkin, virtualization

On Fri, 17 Feb 2023 13:23:14 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Thu, Feb 16, 2023 at 7:50 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Thu, 16 Feb 2023 13:27:00 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero
> > > > copy feature of xsk (XDP socket) needs to be supported by the driver. The
> > > > performance of zero copy is very good.
> > > >
> > > > ENV: Qemu with vhost.
> > > >
> > > >                    vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS
> > > > -----------------------------|---------------|------------------|------------
> > > > xmit by sockperf:     90%    |   100%        |                  |  318967
> > > > xmit by xsk:          100%   |   30%         |   33%            | 1192064
> > >
> > > What's the setup of this test?
> > >
> > > CPU model/frequency, packet size, zerocopy enabled or not.
> >
> > Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz
> >
> > zerocopy: enabled
> >
> > size: 64
> >
> >
> > >
> > > (I remember I can get better performance with my old laptop through
> > > pktgen (about 2Mpps))
> >
> > Let's compare sockperf just.
> >
> > The result of the test on Alibaba Cloud was 3.5M+PPS/60%cpu.
>
> Just to make sure I understand here, the above said:
>

sockperf: https://github.com/Mellanox/sockperf

It should be my problem, I didn't make it clear.

sockperf uses the sentdo() syscall to send udp packets.
xsk send udp by AF_XDP. I write an app with AF_XDP.

Thanks.


>  xmit by sockperf:     90%    |   100%        |                  |  318967
>
> It's 0.3 Mpps, what's the difference between those two?
>
> Thanks
>
> >
> > Thanks.
> >
> >
> > >
> > > Thanks
> > >
> > > > recv by sockperf:     100%   |   68%         |   100%           |  692288
> > > > recv by xsk:          100%   |   33%         |   43%            |  771670
> > > >
> > > > Before achieving the function of Virtio-Net, we also have to let virtio core
> > > > support these features:
> > > >
> > > > 1. virtio core support premapped
> > > > 2. virtio core support reset per-queue
> > > > 3. introduce DMA APIs to virtio core
> > > >
> > > > Please review.
> > > >
> > > > Thanks.
> > > >
> > > > Xuan Zhuo (10):
> > > >   virtio_ring: split: refactor virtqueue_add_split() for premapped
> > > >   virtio_ring: packed: separate prepare code from
> > > >     virtuque_add_indirect_packed()
> > > >   virtio_ring: packed: refactor virtqueue_add_packed() for premapped
> > > >   virtio_ring: split: introduce virtqueue_add_split_premapped()
> > > >   virtio_ring: packed: introduce virtqueue_add_packed_premapped()
> > > >   virtio_ring: introduce virtqueue_add_inbuf_premapped()
> > > >   virtio_ring: add api virtio_dma_map() for advance dma
> > > >   virtio_ring: introduce dma sync api for virtio
> > > >   virtio_ring: correct the expression of the description of
> > > >     virtqueue_resize()
> > > >   virtio_ring: introduce virtqueue_reset()
> > > >
> > > >  drivers/virtio/virtio_ring.c | 792 ++++++++++++++++++++++++++++-------
> > > >  include/linux/virtio.h       |  29 ++
> > > >  2 files changed, 659 insertions(+), 162 deletions(-)
> > > >
> > > > --
> > > > 2.32.0.3.g01195cf9f
> > > >
> > >
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 01/10] virtio_ring: split: refactor virtqueue_add_split() for premapped
  2023-02-14  7:26 ` [PATCH vhost 01/10] virtio_ring: split: refactor virtqueue_add_split() for premapped Xuan Zhuo
@ 2023-02-20  5:37   ` Jason Wang
  2023-02-20  6:57     ` Xuan Zhuo
  2023-02-20 12:12     ` Michael S. Tsirkin
  0 siblings, 2 replies; 46+ messages in thread
From: Jason Wang @ 2023-02-20  5:37 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: Michael S. Tsirkin, virtualization

On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> DMA-related logic is separated from the virtqueue_add_split to prepare
> for subsequent support for premapped.

The patch seems to do more than what is described here.

To simplify reviewers, I'd suggest to split this patch into three:

1) virtqueue_add_split_prepare() (could we have a better name?)
2) virtqueue_map_sgs()
3) virtqueue_add_split_vring()

(Or only factor DMA parts out, I haven't gone through the reset of the patches)

Thanks


>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  drivers/virtio/virtio_ring.c | 219 ++++++++++++++++++++++++-----------
>  1 file changed, 152 insertions(+), 67 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 41144b5246a8..560ee30d942c 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -520,29 +520,83 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
>         return next;
>  }
>
> -static inline int virtqueue_add_split(struct virtqueue *_vq,
> -                                     struct scatterlist *sgs[],
> -                                     unsigned int total_sg,
> -                                     unsigned int out_sgs,
> -                                     unsigned int in_sgs,
> -                                     void *data,
> -                                     void *ctx,
> -                                     gfp_t gfp)
> +static int virtqueue_map_sgs(struct vring_virtqueue *vq,
> +                            struct scatterlist *sgs[],
> +                            unsigned int total_sg,
> +                            unsigned int out_sgs,
> +                            unsigned int in_sgs)
>  {
> -       struct vring_virtqueue *vq = to_vvq(_vq);
>         struct scatterlist *sg;
> -       struct vring_desc *desc;
> -       unsigned int i, n, avail, descs_used, prev, err_idx;
> -       int head;
> -       bool indirect;
> +       unsigned int n;
>
> -       START_USE(vq);
> +       for (n = 0; n < out_sgs; n++) {
> +               for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> +                       dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
> +
> +                       if (vring_mapping_error(vq, addr))
> +                               return -ENOMEM;
> +
> +                       sg->dma_address = addr;
> +               }
> +       }
> +       for (; n < (out_sgs + in_sgs); n++) {
> +               for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> +                       dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
> +
> +                       if (vring_mapping_error(vq, addr))
> +                               return -ENOMEM;
> +
> +                       sg->dma_address = addr;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
> +static void virtqueue_unmap_sgs(struct vring_virtqueue *vq,
> +                               struct scatterlist *sgs[],
> +                               unsigned int total_sg,
> +                               unsigned int out_sgs,
> +                               unsigned int in_sgs)
> +{
> +       struct scatterlist *sg;
> +       unsigned int n;
> +
> +       for (n = 0; n < out_sgs; n++) {
> +               for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> +                       if (!sg->dma_address)
> +                               return;
> +
> +                       dma_unmap_single(vring_dma_dev(vq), sg->dma_address,
> +                                        sg->length, DMA_TO_DEVICE);
> +               }
> +       }
> +       for (; n < (out_sgs + in_sgs); n++) {
> +               for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> +                       if (!sg->dma_address)
> +                               return;
> +
> +                       dma_unmap_single(vring_dma_dev(vq), sg->dma_address,
> +                                        sg->length, DMA_FROM_DEVICE);
> +               }
> +       }
> +}
> +
> +static inline int virtqueue_add_split_prepare(struct vring_virtqueue *vq,
> +                                             unsigned int total_sg,
> +                                             unsigned int out_sgs,
> +                                             void *data,
> +                                             void *ctx,
> +                                             gfp_t gfp,
> +                                             struct vring_desc **pdesc)
> +{
> +       struct vring_desc *desc;
> +       unsigned int descs_used;
>
>         BUG_ON(data == NULL);
>         BUG_ON(ctx && vq->indirect);
>
>         if (unlikely(vq->broken)) {
> -               END_USE(vq);
>                 return -EIO;
>         }
>
> @@ -550,27 +604,17 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
>
>         BUG_ON(total_sg == 0);
>
> -       head = vq->free_head;
> -
>         if (virtqueue_use_indirect(vq, total_sg))
> -               desc = alloc_indirect_split(_vq, total_sg, gfp);
> +               desc = alloc_indirect_split(&vq->vq, total_sg, gfp);
>         else {
>                 desc = NULL;
>                 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
>         }
>
> -       if (desc) {
> -               /* Use a single buffer which doesn't continue */
> -               indirect = true;
> -               /* Set up rest to use this indirect table. */
> -               i = 0;
> +       if (desc)
>                 descs_used = 1;
> -       } else {
> -               indirect = false;
> -               desc = vq->split.vring.desc;
> -               i = head;
> +       else
>                 descs_used = total_sg;
> -       }
>
>         if (unlikely(vq->vq.num_free < descs_used)) {
>                 pr_debug("Can't add buf len %i - avail = %i\n",
> @@ -580,38 +624,64 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
>                  * host should service the ring ASAP. */
>                 if (out_sgs)
>                         vq->notify(&vq->vq);
> -               if (indirect)
> -                       kfree(desc);
> -               END_USE(vq);
> +               kfree(desc);
>                 return -ENOSPC;
>         }
>
> +       *pdesc = desc;
> +
> +       return 0;
> +}
> +
> +static inline int virtqueue_add_split_vring(struct vring_virtqueue *vq,
> +                                           struct scatterlist *sgs[],
> +                                           unsigned int total_sg,
> +                                           unsigned int out_sgs,
> +                                           unsigned int in_sgs,
> +                                           struct vring_desc *desc)
> +{
> +       unsigned int n, i, avail, descs_used, prev;
> +       struct virtqueue *_vq = &vq->vq;
> +       struct scatterlist *sg;
> +       bool indirect;
> +       int head;
> +
> +       head = vq->free_head;
> +
> +       if (desc) {
> +               /* Use a single buffer which doesn't continue */
> +               indirect = true;
> +               /* Set up rest to use this indirect table. */
> +               i = 0;
> +               descs_used = 1;
> +       } else {
> +               indirect = false;
> +               desc = vq->split.vring.desc;
> +               i = head;
> +               descs_used = total_sg;
> +       }
> +
>         for (n = 0; n < out_sgs; n++) {
>                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> -                       dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
> -                       if (vring_mapping_error(vq, addr))
> -                               goto unmap_release;
> -
>                         prev = i;
>                         /* Note that we trust indirect descriptor
>                          * table since it use stream DMA mapping.
>                          */
> -                       i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
> +                       i = virtqueue_add_desc_split(_vq, desc, i,
> +                                                    sg->dma_address,
> +                                                    sg->length,
>                                                      VRING_DESC_F_NEXT,
>                                                      indirect);
>                 }
>         }
>         for (; n < (out_sgs + in_sgs); n++) {
>                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> -                       dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
> -                       if (vring_mapping_error(vq, addr))
> -                               goto unmap_release;
> -
>                         prev = i;
>                         /* Note that we trust indirect descriptor
>                          * table since it use stream DMA mapping.
>                          */
> -                       i = virtqueue_add_desc_split(_vq, desc, i, addr,
> +                       i = virtqueue_add_desc_split(_vq, desc, i,
> +                                                    sg->dma_address,
>                                                      sg->length,
>                                                      VRING_DESC_F_NEXT |
>                                                      VRING_DESC_F_WRITE,
> @@ -630,7 +700,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
>                         vq, desc, total_sg * sizeof(struct vring_desc),
>                         DMA_TO_DEVICE);
>                 if (vring_mapping_error(vq, addr))
> -                       goto unmap_release;
> +                       return -ENOMEM;
>
>                 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
>                                          head, addr,
> @@ -648,13 +718,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
>         else
>                 vq->free_head = i;
>
> -       /* Store token and indirect buffer state. */
> -       vq->split.desc_state[head].data = data;
> -       if (indirect)
> -               vq->split.desc_state[head].indir_desc = desc;
> -       else
> -               vq->split.desc_state[head].indir_desc = ctx;
> -
>         /* Put entry in available array (but don't update avail->idx until they
>          * do sync). */
>         avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
> @@ -677,30 +740,52 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
>                 virtqueue_kick(_vq);
>
>         return 0;
> +}
>
> -unmap_release:
> -       err_idx = i;
> +static inline int virtqueue_add_split(struct virtqueue *_vq,
> +                                     struct scatterlist *sgs[],
> +                                     unsigned int total_sg,
> +                                     unsigned int out_sgs,
> +                                     unsigned int in_sgs,
> +                                     void *data,
> +                                     void *ctx,
> +                                     gfp_t gfp)
> +{
> +       struct vring_virtqueue *vq = to_vvq(_vq);
> +       struct vring_desc *desc;
> +       int head;
> +       int err;
>
> -       if (indirect)
> -               i = 0;
> -       else
> -               i = head;
> +       START_USE(vq);
>
> -       for (n = 0; n < total_sg; n++) {
> -               if (i == err_idx)
> -                       break;
> -               if (indirect) {
> -                       vring_unmap_one_split_indirect(vq, &desc[i]);
> -                       i = virtio16_to_cpu(_vq->vdev, desc[i].next);
> -               } else
> -                       i = vring_unmap_one_split(vq, i);
> -       }
> +       /* check vq state and try to alloc desc for indirect. */
> +       err = virtqueue_add_split_prepare(vq, total_sg, out_sgs, data, ctx, gfp, &desc);
> +       if (err)
> +               goto end;
>
> -       if (indirect)
> -               kfree(desc);
> +       err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
> +       if (err)
> +               goto err;
>
> +       head = vq->free_head;
> +       err = virtqueue_add_split_vring(vq, sgs, total_sg, out_sgs, in_sgs, desc);
> +       if (err)
> +               goto err;
> +
> +       /* Store token and indirect buffer state. */
> +       vq->split.desc_state[head].data = data;
> +       vq->split.desc_state[head].indir_desc = desc ? desc : ctx;
> +
> +       goto end;
> +
> +err:
> +       virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
> +
> +       kfree(desc);
> +
> +end:
>         END_USE(vq);
> -       return -ENOMEM;
> +       return err;
>  }
>
>  static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
> --
> 2.32.0.3.g01195cf9f
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 02/10] virtio_ring: packed: separate prepare code from virtuque_add_indirect_packed()
  2023-02-14  7:26 ` [PATCH vhost 02/10] virtio_ring: packed: separate prepare code from virtuque_add_indirect_packed() Xuan Zhuo
@ 2023-02-20  5:37   ` Jason Wang
  2023-02-20  6:56     ` Xuan Zhuo
  0 siblings, 1 reply; 46+ messages in thread
From: Jason Wang @ 2023-02-20  5:37 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: Michael S. Tsirkin, virtualization

On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> Separating the logic of allocating indirect desc and checking queue
> status to the upper layer function.
>
> The proposal of this is convenient to refactor virtqueue_add_packed()
> for premapped.
>

Ok, so this is another hint that we should do the same thing for split.

Thanks


> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  drivers/virtio/virtio_ring.c | 29 ++++++++++++-----------------
>  1 file changed, 12 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 560ee30d942c..42b1ff87518e 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -1330,25 +1330,14 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
>                                          unsigned int out_sgs,
>                                          unsigned int in_sgs,
>                                          void *data,
> -                                        gfp_t gfp)
> +                                        struct vring_packed_desc *desc)
>  {
> -       struct vring_packed_desc *desc;
>         struct scatterlist *sg;
>         unsigned int i, n, err_idx;
>         u16 head, id;
>         dma_addr_t addr;
>
>         head = vq->packed.next_avail_idx;
> -       desc = alloc_indirect_packed(total_sg, gfp);
> -       if (!desc)
> -               return -ENOMEM;
> -
> -       if (unlikely(vq->vq.num_free < 1)) {
> -               pr_debug("Can't add buf len 1 - avail = 0\n");
> -               kfree(desc);
> -               END_USE(vq);
> -               return -ENOSPC;
> -       }
>
>         i = 0;
>         id = vq->free_head;
> @@ -1470,11 +1459,17 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
>         BUG_ON(total_sg == 0);
>
>         if (virtqueue_use_indirect(vq, total_sg)) {
> -               err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
> -                                                   in_sgs, data, gfp);
> -               if (err != -ENOMEM) {
> -                       END_USE(vq);
> -                       return err;
> +               desc = alloc_indirect_packed(total_sg, gfp);
> +               if (desc) {
> +                       if (unlikely(vq->vq.num_free < 1)) {
> +                               pr_debug("Can't add buf len 1 - avail = 0\n");
> +                               kfree(desc);
> +                               END_USE(vq);
> +                               return -ENOSPC;
> +                       }
> +
> +                       return virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
> +                                                            in_sgs, data, desc);
>                 }
>
>                 /* fall back on direct */
> --
> 2.32.0.3.g01195cf9f
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 03/10] virtio_ring: packed: refactor virtqueue_add_packed() for premapped
  2023-02-14  7:26 ` [PATCH vhost 03/10] virtio_ring: packed: refactor virtqueue_add_packed() for premapped Xuan Zhuo
@ 2023-02-20  5:37   ` Jason Wang
  0 siblings, 0 replies; 46+ messages in thread
From: Jason Wang @ 2023-02-20  5:37 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: Michael S. Tsirkin, virtualization

On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> DMA-related logic is separated from virtqueue_add_packed to prepare for
> the subsequent support for premapped.

As stated in patch 1, I think it's better to split.

>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  drivers/virtio/virtio_ring.c | 150 ++++++++++++++++++-----------------
>  1 file changed, 78 insertions(+), 72 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 42b1ff87518e..47b6f9152f9f 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -1329,7 +1329,6 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
>                                          unsigned int total_sg,
>                                          unsigned int out_sgs,
>                                          unsigned int in_sgs,
> -                                        void *data,
>                                          struct vring_packed_desc *desc)
>  {
>         struct scatterlist *sg;
> @@ -1345,14 +1344,9 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
>
>         for (n = 0; n < out_sgs + in_sgs; n++) {
>                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> -                       addr = vring_map_one_sg(vq, sg, n < out_sgs ?
> -                                       DMA_TO_DEVICE : DMA_FROM_DEVICE);
> -                       if (vring_mapping_error(vq, addr))
> -                               goto unmap_release;
> -
>                         desc[i].flags = cpu_to_le16(n < out_sgs ?
>                                                 0 : VRING_DESC_F_WRITE);
> -                       desc[i].addr = cpu_to_le64(addr);
> +                       desc[i].addr = cpu_to_le64(sg->dma_address);
>                         desc[i].len = cpu_to_le32(sg->length);
>                         i++;
>                 }
> @@ -1363,7 +1357,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
>                         total_sg * sizeof(struct vring_packed_desc),
>                         DMA_TO_DEVICE);
>         if (vring_mapping_error(vq, addr))
> -               goto unmap_release;
> +               return -ENOMEM;
>
>         vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
>         vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
> @@ -1404,53 +1398,30 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
>
>         /* Store token and indirect buffer state. */
>         vq->packed.desc_state[id].num = 1;
> -       vq->packed.desc_state[id].data = data;
>         vq->packed.desc_state[id].indir_desc = desc;
>         vq->packed.desc_state[id].last = id;
>
>         vq->num_added += 1;
>
>         pr_debug("Added buffer head %i to %p\n", head, vq);
> -       END_USE(vq);
>
>         return 0;
> -
> -unmap_release:
> -       err_idx = i;
> -
> -       for (i = 0; i < err_idx; i++)
> -               vring_unmap_desc_packed(vq, &desc[i]);
> -
> -       kfree(desc);
> -
> -       END_USE(vq);
> -       return -ENOMEM;
>  }
>
> -static inline int virtqueue_add_packed(struct virtqueue *_vq,
> -                                      struct scatterlist *sgs[],
> -                                      unsigned int total_sg,
> -                                      unsigned int out_sgs,
> -                                      unsigned int in_sgs,
> -                                      void *data,
> -                                      void *ctx,
> -                                      gfp_t gfp)
> +static inline int virtqueue_add_packed_prepare(struct vring_virtqueue *vq,
> +                                              unsigned int total_sg,
> +                                              void *data,
> +                                              void *ctx,
> +                                              struct vring_packed_desc **pdesc,
> +                                              gfp_t gfp)
>  {
> -       struct vring_virtqueue *vq = to_vvq(_vq);
>         struct vring_packed_desc *desc;
> -       struct scatterlist *sg;
> -       unsigned int i, n, c, descs_used, err_idx;
> -       __le16 head_flags, flags;
> -       u16 head, id, prev, curr, avail_used_flags;
> -       int err;
> -
> -       START_USE(vq);
> +       unsigned int descs_used;
>
>         BUG_ON(data == NULL);
>         BUG_ON(ctx && vq->indirect);
>
>         if (unlikely(vq->broken)) {
> -               END_USE(vq);
>                 return -EIO;
>         }
>
> @@ -1458,39 +1429,55 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
>
>         BUG_ON(total_sg == 0);
>
> +       desc = NULL;
> +
>         if (virtqueue_use_indirect(vq, total_sg)) {
>                 desc = alloc_indirect_packed(total_sg, gfp);
>                 if (desc) {
>                         if (unlikely(vq->vq.num_free < 1)) {
>                                 pr_debug("Can't add buf len 1 - avail = 0\n");
>                                 kfree(desc);
> -                               END_USE(vq);
>                                 return -ENOSPC;
>                         }
>
> -                       return virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
> -                                                            in_sgs, data, desc);
> +                       return 0;
>                 }
>
>                 /* fall back on direct */
>         }
>
> -       head = vq->packed.next_avail_idx;
> -       avail_used_flags = vq->packed.avail_used_flags;
> -
>         WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
>
> -       desc = vq->packed.vring.desc;
> -       i = head;
>         descs_used = total_sg;
>
>         if (unlikely(vq->vq.num_free < descs_used)) {
>                 pr_debug("Can't add buf len %i - avail = %i\n",
>                          descs_used, vq->vq.num_free);
> -               END_USE(vq);
>                 return -ENOSPC;
>         }
>
> +       *pdesc = desc;
> +
> +       return 0;
> +}
> +
> +static void virtqueue_add_packed_vring(struct vring_virtqueue *vq,
> +                                      struct scatterlist *sgs[],
> +                                      unsigned int total_sg,
> +                                      unsigned int out_sgs,
> +                                      unsigned int in_sgs)
> +{
> +       struct vring_packed_desc *desc;
> +       struct scatterlist *sg;
> +       unsigned int i, n, c, descs_used;
> +       __le16 head_flags, flags;
> +       u16 head, id, prev, curr;
> +
> +       desc = vq->packed.vring.desc;
> +       head = vq->packed.next_avail_idx;
> +       i = head;
> +       descs_used = total_sg;
> +
>         id = vq->free_head;
>         BUG_ON(id == vq->packed.vring.num);
>
> @@ -1498,11 +1485,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
>         c = 0;
>         for (n = 0; n < out_sgs + in_sgs; n++) {
>                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> -                       dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
> -                                       DMA_TO_DEVICE : DMA_FROM_DEVICE);
> -                       if (vring_mapping_error(vq, addr))
> -                               goto unmap_release;
> -
>                         flags = cpu_to_le16(vq->packed.avail_used_flags |
>                                     (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
>                                     (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
> @@ -1511,12 +1493,12 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
>                         else
>                                 desc[i].flags = flags;
>
> -                       desc[i].addr = cpu_to_le64(addr);
> +                       desc[i].addr = cpu_to_le64(sg->dma_address);
>                         desc[i].len = cpu_to_le32(sg->length);
>                         desc[i].id = cpu_to_le16(id);
>
>                         if (unlikely(vq->use_dma_api)) {
> -                               vq->packed.desc_extra[curr].addr = addr;
> +                               vq->packed.desc_extra[curr].addr = sg->dma_address;
>                                 vq->packed.desc_extra[curr].len = sg->length;
>                                 vq->packed.desc_extra[curr].flags =
>                                         le16_to_cpu(flags);
> @@ -1545,8 +1527,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
>
>         /* Store token. */
>         vq->packed.desc_state[id].num = descs_used;
> -       vq->packed.desc_state[id].data = data;
> -       vq->packed.desc_state[id].indir_desc = ctx;
>         vq->packed.desc_state[id].last = prev;
>
>         /*
> @@ -1559,29 +1539,55 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
>         vq->num_added += descs_used;
>
>         pr_debug("Added buffer head %i to %p\n", head, vq);
> -       END_USE(vq);
> +}
>
> -       return 0;
> +static inline int virtqueue_add_packed(struct virtqueue *_vq,
> +                                      struct scatterlist *sgs[],
> +                                      unsigned int total_sg,
> +                                      unsigned int out_sgs,
> +                                      unsigned int in_sgs,
> +                                      void *data,
> +                                      void *ctx,
> +                                      gfp_t gfp)
> +{
> +       struct vring_virtqueue *vq = to_vvq(_vq);
> +       struct vring_packed_desc *desc;
> +       u16 id;
> +       int err;
>
> -unmap_release:
> -       err_idx = i;
> -       i = head;
> -       curr = vq->free_head;
> +       START_USE(vq);
>
> -       vq->packed.avail_used_flags = avail_used_flags;
> +       /* check vq state and try to alloc desc for indirect. */
> +       err = virtqueue_add_packed_prepare(vq, total_sg, data, ctx, &desc, gfp);
> +       if (err)
> +               goto end;
>
> -       for (n = 0; n < total_sg; n++) {
> -               if (i == err_idx)
> -                       break;
> -               vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
> -               curr = vq->packed.desc_extra[curr].next;
> -               i++;
> -               if (i >= vq->packed.vring.num)
> -                       i = 0;
> +       err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
> +       if (err)
> +               goto err;
> +
> +       id = vq->free_head;
> +
> +       if (desc) {
> +               err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, in_sgs, desc);
> +               if (err)
> +                       goto err;
> +       } else {
> +               virtqueue_add_packed_vring(vq, sgs, total_sg, out_sgs, in_sgs);
> +               vq->packed.desc_state[id].indir_desc = ctx;

I think it's better to be consistent here, e.g split hides those into
virtqueue_add_split_vring().

Thanks


>         }
>
> +       vq->packed.desc_state[id].data = data;
> +
> +       goto end;
> +
> +err:
> +       virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
> +       kfree(desc);
> +
> +end:
>         END_USE(vq);
> -       return -EIO;
> +       return err;
>  }
>
>  static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> --
> 2.32.0.3.g01195cf9f
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 04/10] virtio_ring: split: introduce virtqueue_add_split_premapped()
  2023-02-14  7:26 ` [PATCH vhost 04/10] virtio_ring: split: introduce virtqueue_add_split_premapped() Xuan Zhuo
@ 2023-02-20  5:38   ` Jason Wang
  2023-02-20  6:43     ` Xuan Zhuo
  0 siblings, 1 reply; 46+ messages in thread
From: Jason Wang @ 2023-02-20  5:38 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: Michael S. Tsirkin, virtualization

On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> virtqueue_add_split() only supports virtual addresses, dma is completed
> in virtqueue_add_split().
>
> In some scenarios (such as the AF_XDP scenario), the memory is allocated
> and DMA is completed in advance, so it is necessary for us to support
> passing the DMA address to virtio core.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  drivers/virtio/virtio_ring.c | 100 +++++++++++++++++++++++++++++++++--
>  include/linux/virtio.h       |   5 ++
>  2 files changed, 100 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 47b6f9152f9f..a31155abe101 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -70,6 +70,7 @@
>  struct vring_desc_state_split {
>         void *data;                     /* Data for callback. */
>         struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
> +       bool premapped;

Better with a comment.

Not native speaker, but "dma_addr" might be better?

>  };
>
>  struct vring_desc_state_packed {
> @@ -440,7 +441,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
>  }
>
>  static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> -                                         unsigned int i)
> +                                         unsigned int i, bool premapped)
>  {
>         struct vring_desc_extra *extra = vq->split.desc_extra;
>         u16 flags;
> @@ -457,6 +458,9 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
>                                  (flags & VRING_DESC_F_WRITE) ?
>                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
>         } else {
> +               if (premapped)
> +                       goto out;
> +
>                 dma_unmap_page(vring_dma_dev(vq),
>                                extra[i].addr,
>                                extra[i].len,
> @@ -788,6 +792,47 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
>         return err;
>  }
>
> +static inline int virtqueue_add_split_premapped(struct virtqueue *_vq,
> +                                               struct scatterlist *sgs[],
> +                                               unsigned int total_sg,
> +                                               unsigned int out_sgs,
> +                                               unsigned int in_sgs,
> +                                               void *data,
> +                                               void *ctx,
> +                                               gfp_t gfp)
> +{
> +       struct vring_virtqueue *vq = to_vvq(_vq);
> +       struct vring_desc *desc;
> +       int head;
> +       int err;
> +
> +       START_USE(vq);
> +
> +       /* check vq state and try to alloc desc for indirect. */
> +       err = virtqueue_add_split_prepare(vq, total_sg, out_sgs, data, ctx, gfp, &desc);
> +       if (err)
> +               goto end;
> +
> +       head = vq->free_head;
> +       err = virtqueue_add_split_vring(vq, sgs, total_sg, out_sgs, in_sgs, desc);
> +       if (err)
> +               goto err;
> +
> +       /* Store token and indirect buffer state. */
> +       vq->split.desc_state[head].data = data;
> +       vq->split.desc_state[head].indir_desc = desc ? desc : ctx;
> +       vq->split.desc_state[head].premapped = true;

This function duplicates most of the logic of virtqueue_add_split()
let's unify it.

probably:

__virtqueue_add_split(..., bool premapped);
virtqueue_add_split()
{
    __virtqueue_add_split(..., false);
}

virtqueue_add_split_premapped()
{
   __virtqueue_add_split(..., true);
}

?

And so did for packed (patch 5).

Thanks



> +
> +       goto end;
> +
> +err:
> +       kfree(desc);
> +
> +end:
> +       END_USE(vq);
> +       return err;
> +}
> +
>  static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
>  {
>         struct vring_virtqueue *vq = to_vvq(_vq);
> @@ -824,20 +869,23 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
>  {
>         unsigned int i, j;
>         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
> +       bool premapped;
>
>         /* Clear data ptr. */
>         vq->split.desc_state[head].data = NULL;
>
> +       premapped = vq->split.desc_state[head].premapped;
> +
>         /* Put back on free list: unmap first-level descriptors and find end */
>         i = head;
>
>         while (vq->split.vring.desc[i].flags & nextflag) {
> -               vring_unmap_one_split(vq, i);
> +               vring_unmap_one_split(vq, i, premapped);
>                 i = vq->split.desc_extra[i].next;
>                 vq->vq.num_free++;
>         }
>
> -       vring_unmap_one_split(vq, i);
> +       vring_unmap_one_split(vq, i, premapped);
>         vq->split.desc_extra[i].next = vq->free_head;
>         vq->free_head = head;
>
> @@ -859,8 +907,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
>                                 VRING_DESC_F_INDIRECT));
>                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
>
> -               for (j = 0; j < len / sizeof(struct vring_desc); j++)
> -                       vring_unmap_one_split_indirect(vq, &indir_desc[j]);
> +               if (!premapped) {
> +                       for (j = 0; j < len / sizeof(struct vring_desc); j++)
> +                               vring_unmap_one_split_indirect(vq, &indir_desc[j]);
> +               }
>
>                 kfree(indir_desc);
>                 vq->split.desc_state[head].indir_desc = NULL;
> @@ -2204,6 +2254,21 @@ static inline int virtqueue_add(struct virtqueue *_vq,
>                                         out_sgs, in_sgs, data, ctx, gfp);
>  }
>
> +static inline int virtqueue_add_premapped(struct virtqueue *_vq,
> +                                         struct scatterlist *sgs[],
> +                                         unsigned int total_sg,
> +                                         unsigned int out_sgs,
> +                                         unsigned int in_sgs,
> +                                         void *data,
> +                                         void *ctx,
> +                                         gfp_t gfp)
> +{
> +       struct vring_virtqueue *vq = to_vvq(_vq);
> +
> +       return virtqueue_add_split_premapped(_vq, sgs, total_sg, out_sgs,
> +                                            in_sgs, data, ctx, gfp);
> +}
> +
>  /**
>   * virtqueue_add_sgs - expose buffers to other end
>   * @_vq: the struct virtqueue we're talking about.
> @@ -2261,6 +2326,31 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
>
> +/**
> + * virtqueue_add_outbuf_premapped - expose output buffers to other end
> + * @vq: the struct virtqueue we're talking about.
> + * @sg: scatterlist (must be well-formed and terminated!)
> + * @num: the number of entries in @sg readable by other side
> + * @data: the token identifying the buffer.
> + * @gfp: how to do memory allocations (if necessary).
> + *
> + * Caller must ensure we don't call this with other virtqueue operations
> + * at the same time (except where noted).
> + *
> + * It is required that all addrs have completed DMA operations. And use
> + * sg->dma_address, sg->length to pass addr and length.
> + *
> + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
> + */
> +int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
> +                                  struct scatterlist *sg, unsigned int num,
> +                                  void *data,
> +                                  gfp_t gfp)
> +{
> +       return virtqueue_add_premapped(vq, &sg, num, 1, 0, data, NULL, gfp);
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped);
> +
>  /**
>   * virtqueue_add_inbuf - expose input buffers to other end
>   * @vq: the struct virtqueue we're talking about.
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index dcab9c7e8784..d8b472a7dcae 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -43,6 +43,11 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
>                          void *data,
>                          gfp_t gfp);
>
> +int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
> +                                  struct scatterlist *sg, unsigned int num,
> +                                  void *data,
> +                                  gfp_t gfp);
> +
>  int virtqueue_add_inbuf(struct virtqueue *vq,
>                         struct scatterlist sg[], unsigned int num,
>                         void *data,
> --
> 2.32.0.3.g01195cf9f
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 08/10] virtio_ring: introduce dma sync api for virtio
  2023-02-14  7:27 ` [PATCH vhost 08/10] virtio_ring: introduce dma sync api for virtio Xuan Zhuo
@ 2023-02-20  5:38   ` Jason Wang
  2023-02-20  7:04     ` Xuan Zhuo
  0 siblings, 1 reply; 46+ messages in thread
From: Jason Wang @ 2023-02-20  5:38 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: Michael S. Tsirkin, virtualization

On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> These API has been introduced:
>
> * virtio_dma_need_sync
> * virtio_dma_sync_single_range_for_cpu
> * virtio_dma_sync_single_range_for_device

What's the advantages of exporting internal logic like
virtio_dma_need_sync() over hiding it in
virtio_dma_sync_single_range_for_cpu() and
virtio_dma_sync_single_range_for_device()?

Thanks


>
> These APIs can be used together with the premapped mechanism to sync the
> DMA address.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  drivers/virtio/virtio_ring.c | 70 ++++++++++++++++++++++++++++++++++++
>  include/linux/virtio.h       |  8 +++++
>  2 files changed, 78 insertions(+)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 855338609c7f..84129b8c3e2a 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -3264,4 +3264,74 @@ void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
>  }
>  EXPORT_SYMBOL_GPL(virtio_dma_unmap);
>
> +/**
> + * virtio_dma_need_sync - check a dma address needs sync
> + * @dev: virtio device
> + * @addr: DMA address
> + *
> + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> + * core handles DMA API internally.
> + */
> +bool virtio_dma_need_sync(struct device *dev, dma_addr_t addr)
> +{
> +       struct virtio_device *vdev = dev_to_virtio(dev);
> +
> +       if (!vring_use_dma_api(vdev))
> +               return 0;
> +
> +       return dma_need_sync(vdev->dev.parent, addr);
> +}
> +EXPORT_SYMBOL_GPL(virtio_dma_need_sync);
> +
> +/**
> + * virtio_dma_sync_single_range_for_cpu - dma sync for cpu
> + * @dev: virtio device
> + * @addr: DMA address
> + * @offset: DMA address offset
> + * @size: mem size for sync
> + * @dir: DMA direction
> + *
> + * Before calling this function, use virtio_dma_need_sync() to confirm that the
> + * DMA address really needs to be synchronized
> + *
> + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> + * core handles DMA API internally.
> + */
> +void virtio_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr,
> +                                         unsigned long offset, size_t size,
> +                                         enum dma_data_direction dir)
> +{
> +       struct virtio_device *vdev = dev_to_virtio(dev);
> +
> +       dma_sync_single_range_for_cpu(vdev->dev.parent, addr, offset,
> +                                     size, DMA_BIDIRECTIONAL);
> +}
> +EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_cpu);
> +
> +/**
> + * virtio_dma_sync_single_range_for_device - dma sync for device
> + * @dev: virtio device
> + * @addr: DMA address
> + * @offset: DMA address offset
> + * @size: mem size for sync
> + * @dir: DMA direction
> + *
> + * Before calling this function, use virtio_dma_need_sync() to confirm that the
> + * DMA address really needs to be synchronized
> + *
> + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> + * core handles DMA API internally.
> + */
> +void virtio_dma_sync_single_range_for_device(struct device *dev,
> +                                            dma_addr_t addr,
> +                                            unsigned long offset, size_t size,
> +                                            enum dma_data_direction dir)
> +{
> +       struct virtio_device *vdev = dev_to_virtio(dev);
> +
> +       dma_sync_single_range_for_device(vdev->dev.parent, addr, offset,
> +                                        size, DMA_BIDIRECTIONAL);
> +}
> +EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_device);
> +
>  MODULE_LICENSE("GPL");
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index b5fa71476737..d0e707d744a0 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -225,4 +225,12 @@ dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
>  int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
>  void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
>                       enum dma_data_direction dir);
> +bool virtio_dma_need_sync(struct device *dev, dma_addr_t addr);
> +void virtio_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr,
> +                                         unsigned long offset, size_t size,
> +                                         enum dma_data_direction dir);
> +void virtio_dma_sync_single_range_for_device(struct device *dev,
> +                                            dma_addr_t addr,
> +                                            unsigned long offset, size_t size,
> +                                            enum dma_data_direction dir);
>  #endif /* _LINUX_VIRTIO_H */
> --
> 2.32.0.3.g01195cf9f
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-02-14  7:27 ` [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma Xuan Zhuo
@ 2023-02-20  5:38   ` Jason Wang
  2023-02-20  6:59     ` Xuan Zhuo
  0 siblings, 1 reply; 46+ messages in thread
From: Jason Wang @ 2023-02-20  5:38 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: Michael S. Tsirkin, virtualization

On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> Added virtio_dma_map() to map DMA addresses for virtual memory in
> advance. The purpose is to keep memory mapped across multiple add/get
> buf operations.

I wonder if instead of exporting helpers like this, it might be simple
to just export dma_dev then the upper layer can use DMA API at will?

(Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?)

>
> Added virtio_dma_unmap() for unmap DMA address.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
>  include/linux/virtio.h       |  9 ++++
>  2 files changed, 101 insertions(+)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index cd9364eb2345..855338609c7f 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
>
> +/**
> + * virtio_dma_map_page - get the DMA addr of the memory for virtio device
> + * @dev: virtio device
> + * @page: the page of the memory to DMA
> + * @offset: the offset of the memory inside page
> + * @length: memory length
> + * @dir: DMA direction
> + *
> + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> + * core handles DMA API internally.
> + *
> + * Returns the DMA addr. DMA_MAPPING_ERROR means error.
> + */
> +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> +                              unsigned int length, enum dma_data_direction dir)
> +{

This (and the reset) needs to be done per virtqueue instead per device
after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per
virtqueue dma device").

> +       struct virtio_device *vdev = dev_to_virtio(dev);
> +
> +       if (!vring_use_dma_api(vdev))
> +               return page_to_phys(page) + offset;
> +
> +       return dma_map_page(vdev->dev.parent, page, offset, length, dir);
> +}

Need either inline or EXPORT_SYMBOL_GPL() here.

Thanks


> +
> +/**
> + * virtio_dma_map - get the DMA addr of the memory for virtio device
> + * @dev: virtio device
> + * @addr: the addr to DMA
> + * @length: memory length
> + * @dir: DMA direction
> + *
> + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> + * core handles DMA API internally.
> + *
> + * Returns the DMA addr.
> + */
> +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> +                         enum dma_data_direction dir)
> +{
> +       struct page *page;
> +       size_t offset;
> +
> +       page = virt_to_page(addr);
> +       offset = offset_in_page(addr);
> +
> +       return virtio_dma_map_page(dev, page, offset, length, dir);
> +}
> +EXPORT_SYMBOL_GPL(virtio_dma_map);
> +
> +/**
> + * virtio_dma_mapping_error - check dma address
> + * @dev: virtio device
> + * @addr: DMA address
> + *
> + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> + * core handles DMA API internally.
> + *
> + * Returns 0 means dma valid. Other means invalid dma address.
> + */
> +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
> +{
> +       struct virtio_device *vdev = dev_to_virtio(dev);
> +
> +       if (!vring_use_dma_api(vdev))
> +               return 0;
> +
> +       return dma_mapping_error(vdev->dev.parent, addr);
> +}
> +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
> +
> +/**
> + * virtio_dma_unmap - unmap DMA addr
> + * @dev: virtio device
> + * @dma: DMA address
> + * @length: memory length
> + * @dir: DMA direction
> + *
> + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> + * core handles DMA API internally.
> + */
> +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> +                     enum dma_data_direction dir)
> +{
> +       struct virtio_device *vdev = dev_to_virtio(dev);
> +
> +       if (!vring_use_dma_api(vdev))
> +               return;
> +
> +       dma_unmap_page(vdev->dev.parent, dma, length, dir);
> +}
> +EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> +
>  MODULE_LICENSE("GPL");
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index 3ebb346ebb7c..b5fa71476737 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -9,6 +9,7 @@
>  #include <linux/device.h>
>  #include <linux/mod_devicetable.h>
>  #include <linux/gfp.h>
> +#include <linux/dma-mapping.h>
>
>  /**
>   * struct virtqueue - a queue to register buffers for sending or receiving.
> @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
>  #define module_virtio_driver(__virtio_driver) \
>         module_driver(__virtio_driver, register_virtio_driver, \
>                         unregister_virtio_driver)
> +
> +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> +                              unsigned int length, enum dma_data_direction dir);
> +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> +                         enum dma_data_direction dir);
> +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> +                     enum dma_data_direction dir);
>  #endif /* _LINUX_VIRTIO_H */
> --
> 2.32.0.3.g01195cf9f
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 09/10] virtio_ring: correct the expression of the description of virtqueue_resize()
  2023-02-14  7:27 ` [PATCH vhost 09/10] virtio_ring: correct the expression of the description of virtqueue_resize() Xuan Zhuo
@ 2023-02-20  5:38   ` Jason Wang
  0 siblings, 0 replies; 46+ messages in thread
From: Jason Wang @ 2023-02-20  5:38 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: Michael S. Tsirkin, virtualization

On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> Modify the "useless" to a more accurate "unused".
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>

Acked-by: Jason Wang <jasowang@redhat.com>

Thanks


> ---
>  drivers/virtio/virtio_ring.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 84129b8c3e2a..2ba60a14f557 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -2865,7 +2865,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma);
>   * virtqueue_resize - resize the vring of vq
>   * @_vq: the struct virtqueue we're talking about.
>   * @num: new ring num
> - * @recycle: callback for recycle the useless buffer
> + * @recycle: callback to recycle unused buffers
>   *
>   * When it is really necessary to create a new vring, it will set the current vq
>   * into the reset state. Then call the passed callback to recycle the buffer
> --
> 2.32.0.3.g01195cf9f
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 10/10] virtio_ring: introduce virtqueue_reset()
  2023-02-14  7:27 ` [PATCH vhost 10/10] virtio_ring: introduce virtqueue_reset() Xuan Zhuo
@ 2023-02-20  5:38   ` Jason Wang
  2023-02-20  7:03     ` Xuan Zhuo
  0 siblings, 1 reply; 46+ messages in thread
From: Jason Wang @ 2023-02-20  5:38 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: Michael S. Tsirkin, virtualization

On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> Introduce virtqueue_reset() to release all buffer inside vq.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  drivers/virtio/virtio_ring.c | 50 ++++++++++++++++++++++++++++++++++++
>  include/linux/virtio.h       |  2 ++
>  2 files changed, 52 insertions(+)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 2ba60a14f557..2750a365439a 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -2930,6 +2930,56 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_resize);
>
> +/**
> + * virtqueue_reset - detach and recycle all unused buffers
> + * @_vq: the struct virtqueue we're talking about.
> + * @recycle: callback to recycle unused buffers
> + *
> + * Caller must ensure we don't call this with other virtqueue operations
> + * at the same time (except where noted).
> + *
> + * Returns zero or a negative error.
> + * 0: success.
> + * -EBUSY: Failed to sync with device, vq may not work properly
> + * -ENOENT: Transport or device not supported
> + * -EPERM: Operation not permitted
> + */
> +int virtqueue_reset(struct virtqueue *_vq,
> +                   void (*recycle)(struct virtqueue *vq, void *buf))
> +{
> +       struct vring_virtqueue *vq = to_vvq(_vq);
> +       struct virtio_device *vdev = vq->vq.vdev;
> +       void *buf;
> +       int err;
> +
> +       if (!vq->we_own_ring)
> +               return -EPERM;
> +
> +       if (!vdev->config->disable_vq_and_reset)
> +               return -ENOENT;
> +
> +       if (!vdev->config->enable_vq_after_reset)
> +               return -ENOENT;
> +
> +       err = vdev->config->disable_vq_and_reset(_vq);
> +       if (err)
> +               return err;
> +
> +       while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
> +               recycle(_vq, buf);
> +
> +       if (vq->packed_ring)
> +               virtqueue_reinit_packed(vq);
> +       else
> +               virtqueue_reinit_split(vq);
> +
> +       if (vdev->config->enable_vq_after_reset(_vq))
> +               return -EBUSY;
> +
> +       return 0;
> +}

I don't get why not factor the similar logic from virtqueue_resize()?

Thanks


> +EXPORT_SYMBOL_GPL(virtqueue_reset);
> +
>  /* Only available for split ring */
>  struct virtqueue *vring_new_virtqueue(unsigned int index,
>                                       unsigned int num,
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index d0e707d744a0..cf4c157e4e75 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -106,6 +106,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
>
>  int virtqueue_resize(struct virtqueue *vq, u32 num,
>                      void (*recycle)(struct virtqueue *vq, void *buf));
> +int virtqueue_reset(struct virtqueue *vq,
> +                   void (*recycle)(struct virtqueue *vq, void *buf));
>
>  /**
>   * struct virtio_device - representation of a device using virtio
> --
> 2.32.0.3.g01195cf9f
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 04/10] virtio_ring: split: introduce virtqueue_add_split_premapped()
  2023-02-20  5:38   ` Jason Wang
@ 2023-02-20  6:43     ` Xuan Zhuo
  2023-02-21  1:49       ` Jason Wang
  0 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-20  6:43 UTC (permalink / raw)
  To: Jason Wang; +Cc: Michael S. Tsirkin, virtualization

On Mon, 20 Feb 2023 13:38:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > virtqueue_add_split() only supports virtual addresses, dma is completed
> > in virtqueue_add_split().
> >
> > In some scenarios (such as the AF_XDP scenario), the memory is allocated
> > and DMA is completed in advance, so it is necessary for us to support
> > passing the DMA address to virtio core.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  drivers/virtio/virtio_ring.c | 100 +++++++++++++++++++++++++++++++++--
> >  include/linux/virtio.h       |   5 ++
> >  2 files changed, 100 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 47b6f9152f9f..a31155abe101 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -70,6 +70,7 @@
> >  struct vring_desc_state_split {
> >         void *data;                     /* Data for callback. */
> >         struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
> > +       bool premapped;
>
> Better with a comment.
>
> Not native speaker, but "dma_addr" might be better?
>
> >  };
> >
> >  struct vring_desc_state_packed {
> > @@ -440,7 +441,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> >  }
> >
> >  static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > -                                         unsigned int i)
> > +                                         unsigned int i, bool premapped)
> >  {
> >         struct vring_desc_extra *extra = vq->split.desc_extra;
> >         u16 flags;
> > @@ -457,6 +458,9 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> >                                  (flags & VRING_DESC_F_WRITE) ?
> >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> >         } else {
> > +               if (premapped)
> > +                       goto out;
> > +
> >                 dma_unmap_page(vring_dma_dev(vq),
> >                                extra[i].addr,
> >                                extra[i].len,
> > @@ -788,6 +792,47 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> >         return err;
> >  }
> >
> > +static inline int virtqueue_add_split_premapped(struct virtqueue *_vq,
> > +                                               struct scatterlist *sgs[],
> > +                                               unsigned int total_sg,
> > +                                               unsigned int out_sgs,
> > +                                               unsigned int in_sgs,
> > +                                               void *data,
> > +                                               void *ctx,
> > +                                               gfp_t gfp)
> > +{
> > +       struct vring_virtqueue *vq = to_vvq(_vq);
> > +       struct vring_desc *desc;
> > +       int head;
> > +       int err;
> > +
> > +       START_USE(vq);
> > +
> > +       /* check vq state and try to alloc desc for indirect. */
> > +       err = virtqueue_add_split_prepare(vq, total_sg, out_sgs, data, ctx, gfp, &desc);
> > +       if (err)
> > +               goto end;
> > +
> > +       head = vq->free_head;
> > +       err = virtqueue_add_split_vring(vq, sgs, total_sg, out_sgs, in_sgs, desc);
> > +       if (err)
> > +               goto err;
> > +
> > +       /* Store token and indirect buffer state. */
> > +       vq->split.desc_state[head].data = data;
> > +       vq->split.desc_state[head].indir_desc = desc ? desc : ctx;
> > +       vq->split.desc_state[head].premapped = true;
>
> This function duplicates most of the logic of virtqueue_add_split()
> let's unify it.

I want to know that the __virtqueue_add_split is the original
virtqueue_add_split or my refactor virtqueue_add_split?

>
> probably:
>
> __virtqueue_add_split(..., bool premapped);
> virtqueue_add_split()
> {
>     __virtqueue_add_split(..., false);
> }
>
> virtqueue_add_split_premapped()
> {
>    __virtqueue_add_split(..., true);
> }

I am trying to reduce the inspection of premapped.

In fact, this is Michael's request, although I am not particularly sure that my
implementation has met his requirements.

https://lore.kernel.org/all/20230203041006-mutt-send-email-mst@kernel.org/

Thanks.


>
> ?
>
> And so did for packed (patch 5).
>
> Thanks
>
>
>
> > +
> > +       goto end;
> > +
> > +err:
> > +       kfree(desc);
> > +
> > +end:
> > +       END_USE(vq);
> > +       return err;
> > +}
> > +
> >  static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
> >  {
> >         struct vring_virtqueue *vq = to_vvq(_vq);
> > @@ -824,20 +869,23 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> >  {
> >         unsigned int i, j;
> >         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
> > +       bool premapped;
> >
> >         /* Clear data ptr. */
> >         vq->split.desc_state[head].data = NULL;
> >
> > +       premapped = vq->split.desc_state[head].premapped;
> > +
> >         /* Put back on free list: unmap first-level descriptors and find end */
> >         i = head;
> >
> >         while (vq->split.vring.desc[i].flags & nextflag) {
> > -               vring_unmap_one_split(vq, i);
> > +               vring_unmap_one_split(vq, i, premapped);
> >                 i = vq->split.desc_extra[i].next;
> >                 vq->vq.num_free++;
> >         }
> >
> > -       vring_unmap_one_split(vq, i);
> > +       vring_unmap_one_split(vq, i, premapped);
> >         vq->split.desc_extra[i].next = vq->free_head;
> >         vq->free_head = head;
> >
> > @@ -859,8 +907,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> >                                 VRING_DESC_F_INDIRECT));
> >                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
> >
> > -               for (j = 0; j < len / sizeof(struct vring_desc); j++)
> > -                       vring_unmap_one_split_indirect(vq, &indir_desc[j]);
> > +               if (!premapped) {
> > +                       for (j = 0; j < len / sizeof(struct vring_desc); j++)
> > +                               vring_unmap_one_split_indirect(vq, &indir_desc[j]);
> > +               }
> >
> >                 kfree(indir_desc);
> >                 vq->split.desc_state[head].indir_desc = NULL;
> > @@ -2204,6 +2254,21 @@ static inline int virtqueue_add(struct virtqueue *_vq,
> >                                         out_sgs, in_sgs, data, ctx, gfp);
> >  }
> >
> > +static inline int virtqueue_add_premapped(struct virtqueue *_vq,
> > +                                         struct scatterlist *sgs[],
> > +                                         unsigned int total_sg,
> > +                                         unsigned int out_sgs,
> > +                                         unsigned int in_sgs,
> > +                                         void *data,
> > +                                         void *ctx,
> > +                                         gfp_t gfp)
> > +{
> > +       struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > +       return virtqueue_add_split_premapped(_vq, sgs, total_sg, out_sgs,
> > +                                            in_sgs, data, ctx, gfp);
> > +}
> > +
> >  /**
> >   * virtqueue_add_sgs - expose buffers to other end
> >   * @_vq: the struct virtqueue we're talking about.
> > @@ -2261,6 +2326,31 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
> >  }
> >  EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
> >
> > +/**
> > + * virtqueue_add_outbuf_premapped - expose output buffers to other end
> > + * @vq: the struct virtqueue we're talking about.
> > + * @sg: scatterlist (must be well-formed and terminated!)
> > + * @num: the number of entries in @sg readable by other side
> > + * @data: the token identifying the buffer.
> > + * @gfp: how to do memory allocations (if necessary).
> > + *
> > + * Caller must ensure we don't call this with other virtqueue operations
> > + * at the same time (except where noted).
> > + *
> > + * It is required that all addrs have completed DMA operations. And use
> > + * sg->dma_address, sg->length to pass addr and length.
> > + *
> > + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
> > + */
> > +int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
> > +                                  struct scatterlist *sg, unsigned int num,
> > +                                  void *data,
> > +                                  gfp_t gfp)
> > +{
> > +       return virtqueue_add_premapped(vq, &sg, num, 1, 0, data, NULL, gfp);
> > +}
> > +EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped);
> > +
> >  /**
> >   * virtqueue_add_inbuf - expose input buffers to other end
> >   * @vq: the struct virtqueue we're talking about.
> > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > index dcab9c7e8784..d8b472a7dcae 100644
> > --- a/include/linux/virtio.h
> > +++ b/include/linux/virtio.h
> > @@ -43,6 +43,11 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
> >                          void *data,
> >                          gfp_t gfp);
> >
> > +int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
> > +                                  struct scatterlist *sg, unsigned int num,
> > +                                  void *data,
> > +                                  gfp_t gfp);
> > +
> >  int virtqueue_add_inbuf(struct virtqueue *vq,
> >                         struct scatterlist sg[], unsigned int num,
> >                         void *data,
> > --
> > 2.32.0.3.g01195cf9f
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 02/10] virtio_ring: packed: separate prepare code from virtuque_add_indirect_packed()
  2023-02-20  5:37   ` Jason Wang
@ 2023-02-20  6:56     ` Xuan Zhuo
  0 siblings, 0 replies; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-20  6:56 UTC (permalink / raw)
  To: Jason Wang; +Cc: Michael S. Tsirkin, virtualization

On Mon, 20 Feb 2023 13:37:41 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > Separating the logic of allocating indirect desc and checking queue
> > status to the upper layer function.
> >
> > The proposal of this is convenient to refactor virtqueue_add_packed()
> > for premapped.
> >
>
> Ok, so this is another hint that we should do the same thing for split.


Since split does not have an independent indirect function, both are different
here. Therefore, there will be no the same thing for split.

Thanks.


>
> Thanks
>
>
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  drivers/virtio/virtio_ring.c | 29 ++++++++++++-----------------
> >  1 file changed, 12 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 560ee30d942c..42b1ff87518e 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -1330,25 +1330,14 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
> >                                          unsigned int out_sgs,
> >                                          unsigned int in_sgs,
> >                                          void *data,
> > -                                        gfp_t gfp)
> > +                                        struct vring_packed_desc *desc)
> >  {
> > -       struct vring_packed_desc *desc;
> >         struct scatterlist *sg;
> >         unsigned int i, n, err_idx;
> >         u16 head, id;
> >         dma_addr_t addr;
> >
> >         head = vq->packed.next_avail_idx;
> > -       desc = alloc_indirect_packed(total_sg, gfp);
> > -       if (!desc)
> > -               return -ENOMEM;
> > -
> > -       if (unlikely(vq->vq.num_free < 1)) {
> > -               pr_debug("Can't add buf len 1 - avail = 0\n");
> > -               kfree(desc);
> > -               END_USE(vq);
> > -               return -ENOSPC;
> > -       }
> >
> >         i = 0;
> >         id = vq->free_head;
> > @@ -1470,11 +1459,17 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> >         BUG_ON(total_sg == 0);
> >
> >         if (virtqueue_use_indirect(vq, total_sg)) {
> > -               err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
> > -                                                   in_sgs, data, gfp);
> > -               if (err != -ENOMEM) {
> > -                       END_USE(vq);
> > -                       return err;
> > +               desc = alloc_indirect_packed(total_sg, gfp);
> > +               if (desc) {
> > +                       if (unlikely(vq->vq.num_free < 1)) {
> > +                               pr_debug("Can't add buf len 1 - avail = 0\n");
> > +                               kfree(desc);
> > +                               END_USE(vq);
> > +                               return -ENOSPC;
> > +                       }
> > +
> > +                       return virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
> > +                                                            in_sgs, data, desc);
> >                 }
> >
> >                 /* fall back on direct */
> > --
> > 2.32.0.3.g01195cf9f
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 01/10] virtio_ring: split: refactor virtqueue_add_split() for premapped
  2023-02-20  5:37   ` Jason Wang
@ 2023-02-20  6:57     ` Xuan Zhuo
  2023-02-20 12:12     ` Michael S. Tsirkin
  1 sibling, 0 replies; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-20  6:57 UTC (permalink / raw)
  To: Jason Wang; +Cc: Michael S. Tsirkin, virtualization

On Mon, 20 Feb 2023 13:37:37 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > DMA-related logic is separated from the virtqueue_add_split to prepare
> > for subsequent support for premapped.
>
> The patch seems to do more than what is described here.
>
> To simplify reviewers, I'd suggest to split this patch into three:
>
> 1) virtqueue_add_split_prepare() (could we have a better name?)
> 2) virtqueue_map_sgs()
> 3) virtqueue_add_split_vring()
>
> (Or only factor DMA parts out, I haven't gone through the reset of the patches)

OK

Thanks.


>
> Thanks
>
>
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  drivers/virtio/virtio_ring.c | 219 ++++++++++++++++++++++++-----------
> >  1 file changed, 152 insertions(+), 67 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 41144b5246a8..560ee30d942c 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -520,29 +520,83 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
> >         return next;
> >  }
> >
> > -static inline int virtqueue_add_split(struct virtqueue *_vq,
> > -                                     struct scatterlist *sgs[],
> > -                                     unsigned int total_sg,
> > -                                     unsigned int out_sgs,
> > -                                     unsigned int in_sgs,
> > -                                     void *data,
> > -                                     void *ctx,
> > -                                     gfp_t gfp)
> > +static int virtqueue_map_sgs(struct vring_virtqueue *vq,
> > +                            struct scatterlist *sgs[],
> > +                            unsigned int total_sg,
> > +                            unsigned int out_sgs,
> > +                            unsigned int in_sgs)
> >  {
> > -       struct vring_virtqueue *vq = to_vvq(_vq);
> >         struct scatterlist *sg;
> > -       struct vring_desc *desc;
> > -       unsigned int i, n, avail, descs_used, prev, err_idx;
> > -       int head;
> > -       bool indirect;
> > +       unsigned int n;
> >
> > -       START_USE(vq);
> > +       for (n = 0; n < out_sgs; n++) {
> > +               for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > +                       dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
> > +
> > +                       if (vring_mapping_error(vq, addr))
> > +                               return -ENOMEM;
> > +
> > +                       sg->dma_address = addr;
> > +               }
> > +       }
> > +       for (; n < (out_sgs + in_sgs); n++) {
> > +               for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > +                       dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
> > +
> > +                       if (vring_mapping_error(vq, addr))
> > +                               return -ENOMEM;
> > +
> > +                       sg->dma_address = addr;
> > +               }
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +static void virtqueue_unmap_sgs(struct vring_virtqueue *vq,
> > +                               struct scatterlist *sgs[],
> > +                               unsigned int total_sg,
> > +                               unsigned int out_sgs,
> > +                               unsigned int in_sgs)
> > +{
> > +       struct scatterlist *sg;
> > +       unsigned int n;
> > +
> > +       for (n = 0; n < out_sgs; n++) {
> > +               for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > +                       if (!sg->dma_address)
> > +                               return;
> > +
> > +                       dma_unmap_single(vring_dma_dev(vq), sg->dma_address,
> > +                                        sg->length, DMA_TO_DEVICE);
> > +               }
> > +       }
> > +       for (; n < (out_sgs + in_sgs); n++) {
> > +               for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > +                       if (!sg->dma_address)
> > +                               return;
> > +
> > +                       dma_unmap_single(vring_dma_dev(vq), sg->dma_address,
> > +                                        sg->length, DMA_FROM_DEVICE);
> > +               }
> > +       }
> > +}
> > +
> > +static inline int virtqueue_add_split_prepare(struct vring_virtqueue *vq,
> > +                                             unsigned int total_sg,
> > +                                             unsigned int out_sgs,
> > +                                             void *data,
> > +                                             void *ctx,
> > +                                             gfp_t gfp,
> > +                                             struct vring_desc **pdesc)
> > +{
> > +       struct vring_desc *desc;
> > +       unsigned int descs_used;
> >
> >         BUG_ON(data == NULL);
> >         BUG_ON(ctx && vq->indirect);
> >
> >         if (unlikely(vq->broken)) {
> > -               END_USE(vq);
> >                 return -EIO;
> >         }
> >
> > @@ -550,27 +604,17 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> >
> >         BUG_ON(total_sg == 0);
> >
> > -       head = vq->free_head;
> > -
> >         if (virtqueue_use_indirect(vq, total_sg))
> > -               desc = alloc_indirect_split(_vq, total_sg, gfp);
> > +               desc = alloc_indirect_split(&vq->vq, total_sg, gfp);
> >         else {
> >                 desc = NULL;
> >                 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
> >         }
> >
> > -       if (desc) {
> > -               /* Use a single buffer which doesn't continue */
> > -               indirect = true;
> > -               /* Set up rest to use this indirect table. */
> > -               i = 0;
> > +       if (desc)
> >                 descs_used = 1;
> > -       } else {
> > -               indirect = false;
> > -               desc = vq->split.vring.desc;
> > -               i = head;
> > +       else
> >                 descs_used = total_sg;
> > -       }
> >
> >         if (unlikely(vq->vq.num_free < descs_used)) {
> >                 pr_debug("Can't add buf len %i - avail = %i\n",
> > @@ -580,38 +624,64 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> >                  * host should service the ring ASAP. */
> >                 if (out_sgs)
> >                         vq->notify(&vq->vq);
> > -               if (indirect)
> > -                       kfree(desc);
> > -               END_USE(vq);
> > +               kfree(desc);
> >                 return -ENOSPC;
> >         }
> >
> > +       *pdesc = desc;
> > +
> > +       return 0;
> > +}
> > +
> > +static inline int virtqueue_add_split_vring(struct vring_virtqueue *vq,
> > +                                           struct scatterlist *sgs[],
> > +                                           unsigned int total_sg,
> > +                                           unsigned int out_sgs,
> > +                                           unsigned int in_sgs,
> > +                                           struct vring_desc *desc)
> > +{
> > +       unsigned int n, i, avail, descs_used, prev;
> > +       struct virtqueue *_vq = &vq->vq;
> > +       struct scatterlist *sg;
> > +       bool indirect;
> > +       int head;
> > +
> > +       head = vq->free_head;
> > +
> > +       if (desc) {
> > +               /* Use a single buffer which doesn't continue */
> > +               indirect = true;
> > +               /* Set up rest to use this indirect table. */
> > +               i = 0;
> > +               descs_used = 1;
> > +       } else {
> > +               indirect = false;
> > +               desc = vq->split.vring.desc;
> > +               i = head;
> > +               descs_used = total_sg;
> > +       }
> > +
> >         for (n = 0; n < out_sgs; n++) {
> >                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > -                       dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
> > -                       if (vring_mapping_error(vq, addr))
> > -                               goto unmap_release;
> > -
> >                         prev = i;
> >                         /* Note that we trust indirect descriptor
> >                          * table since it use stream DMA mapping.
> >                          */
> > -                       i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
> > +                       i = virtqueue_add_desc_split(_vq, desc, i,
> > +                                                    sg->dma_address,
> > +                                                    sg->length,
> >                                                      VRING_DESC_F_NEXT,
> >                                                      indirect);
> >                 }
> >         }
> >         for (; n < (out_sgs + in_sgs); n++) {
> >                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > -                       dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
> > -                       if (vring_mapping_error(vq, addr))
> > -                               goto unmap_release;
> > -
> >                         prev = i;
> >                         /* Note that we trust indirect descriptor
> >                          * table since it use stream DMA mapping.
> >                          */
> > -                       i = virtqueue_add_desc_split(_vq, desc, i, addr,
> > +                       i = virtqueue_add_desc_split(_vq, desc, i,
> > +                                                    sg->dma_address,
> >                                                      sg->length,
> >                                                      VRING_DESC_F_NEXT |
> >                                                      VRING_DESC_F_WRITE,
> > @@ -630,7 +700,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> >                         vq, desc, total_sg * sizeof(struct vring_desc),
> >                         DMA_TO_DEVICE);
> >                 if (vring_mapping_error(vq, addr))
> > -                       goto unmap_release;
> > +                       return -ENOMEM;
> >
> >                 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
> >                                          head, addr,
> > @@ -648,13 +718,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> >         else
> >                 vq->free_head = i;
> >
> > -       /* Store token and indirect buffer state. */
> > -       vq->split.desc_state[head].data = data;
> > -       if (indirect)
> > -               vq->split.desc_state[head].indir_desc = desc;
> > -       else
> > -               vq->split.desc_state[head].indir_desc = ctx;
> > -
> >         /* Put entry in available array (but don't update avail->idx until they
> >          * do sync). */
> >         avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
> > @@ -677,30 +740,52 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> >                 virtqueue_kick(_vq);
> >
> >         return 0;
> > +}
> >
> > -unmap_release:
> > -       err_idx = i;
> > +static inline int virtqueue_add_split(struct virtqueue *_vq,
> > +                                     struct scatterlist *sgs[],
> > +                                     unsigned int total_sg,
> > +                                     unsigned int out_sgs,
> > +                                     unsigned int in_sgs,
> > +                                     void *data,
> > +                                     void *ctx,
> > +                                     gfp_t gfp)
> > +{
> > +       struct vring_virtqueue *vq = to_vvq(_vq);
> > +       struct vring_desc *desc;
> > +       int head;
> > +       int err;
> >
> > -       if (indirect)
> > -               i = 0;
> > -       else
> > -               i = head;
> > +       START_USE(vq);
> >
> > -       for (n = 0; n < total_sg; n++) {
> > -               if (i == err_idx)
> > -                       break;
> > -               if (indirect) {
> > -                       vring_unmap_one_split_indirect(vq, &desc[i]);
> > -                       i = virtio16_to_cpu(_vq->vdev, desc[i].next);
> > -               } else
> > -                       i = vring_unmap_one_split(vq, i);
> > -       }
> > +       /* check vq state and try to alloc desc for indirect. */
> > +       err = virtqueue_add_split_prepare(vq, total_sg, out_sgs, data, ctx, gfp, &desc);
> > +       if (err)
> > +               goto end;
> >
> > -       if (indirect)
> > -               kfree(desc);
> > +       err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
> > +       if (err)
> > +               goto err;
> >
> > +       head = vq->free_head;
> > +       err = virtqueue_add_split_vring(vq, sgs, total_sg, out_sgs, in_sgs, desc);
> > +       if (err)
> > +               goto err;
> > +
> > +       /* Store token and indirect buffer state. */
> > +       vq->split.desc_state[head].data = data;
> > +       vq->split.desc_state[head].indir_desc = desc ? desc : ctx;
> > +
> > +       goto end;
> > +
> > +err:
> > +       virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
> > +
> > +       kfree(desc);
> > +
> > +end:
> >         END_USE(vq);
> > -       return -ENOMEM;
> > +       return err;
> >  }
> >
> >  static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
> > --
> > 2.32.0.3.g01195cf9f
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-02-20  5:38   ` Jason Wang
@ 2023-02-20  6:59     ` Xuan Zhuo
  2023-02-21  1:51       ` Jason Wang
  0 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-20  6:59 UTC (permalink / raw)
  To: Jason Wang; +Cc: Michael S. Tsirkin, virtualization

On Mon, 20 Feb 2023 13:38:24 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > Added virtio_dma_map() to map DMA addresses for virtual memory in
> > advance. The purpose is to keep memory mapped across multiple add/get
> > buf operations.
>
> I wonder if instead of exporting helpers like this, it might be simple
> to just export dma_dev then the upper layer can use DMA API at will?


The reason for not doing this, Virtio is not just using DMA_DEV to mapp, but
also check whether DMA is used.


>
> (Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?)
>
> >
> > Added virtio_dma_unmap() for unmap DMA address.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
> >  include/linux/virtio.h       |  9 ++++
> >  2 files changed, 101 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index cd9364eb2345..855338609c7f 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> >  }
> >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> >
> > +/**
> > + * virtio_dma_map_page - get the DMA addr of the memory for virtio device
> > + * @dev: virtio device
> > + * @page: the page of the memory to DMA
> > + * @offset: the offset of the memory inside page
> > + * @length: memory length
> > + * @dir: DMA direction
> > + *
> > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > + * core handles DMA API internally.
> > + *
> > + * Returns the DMA addr. DMA_MAPPING_ERROR means error.
> > + */
> > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > +                              unsigned int length, enum dma_data_direction dir)
> > +{
>
> This (and the reset) needs to be done per virtqueue instead per device
> after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per
> virtqueue dma device").


YES.


>
> > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > +
> > +       if (!vring_use_dma_api(vdev))
> > +               return page_to_phys(page) + offset;
> > +
> > +       return dma_map_page(vdev->dev.parent, page, offset, length, dir);
> > +}
>
> Need either inline or EXPORT_SYMBOL_GPL() here.

Because I did not use this interface, I did not  export it.

Thanks.


>
> Thanks
>
>
> > +
> > +/**
> > + * virtio_dma_map - get the DMA addr of the memory for virtio device
> > + * @dev: virtio device
> > + * @addr: the addr to DMA
> > + * @length: memory length
> > + * @dir: DMA direction
> > + *
> > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > + * core handles DMA API internally.
> > + *
> > + * Returns the DMA addr.
> > + */
> > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > +                         enum dma_data_direction dir)
> > +{
> > +       struct page *page;
> > +       size_t offset;
> > +
> > +       page = virt_to_page(addr);
> > +       offset = offset_in_page(addr);
> > +
> > +       return virtio_dma_map_page(dev, page, offset, length, dir);
> > +}
> > +EXPORT_SYMBOL_GPL(virtio_dma_map);
> > +
> > +/**
> > + * virtio_dma_mapping_error - check dma address
> > + * @dev: virtio device
> > + * @addr: DMA address
> > + *
> > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > + * core handles DMA API internally.
> > + *
> > + * Returns 0 means dma valid. Other means invalid dma address.
> > + */
> > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
> > +{
> > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > +
> > +       if (!vring_use_dma_api(vdev))
> > +               return 0;
> > +
> > +       return dma_mapping_error(vdev->dev.parent, addr);
> > +}
> > +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
> > +
> > +/**
> > + * virtio_dma_unmap - unmap DMA addr
> > + * @dev: virtio device
> > + * @dma: DMA address
> > + * @length: memory length
> > + * @dir: DMA direction
> > + *
> > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > + * core handles DMA API internally.
> > + */
> > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > +                     enum dma_data_direction dir)
> > +{
> > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > +
> > +       if (!vring_use_dma_api(vdev))
> > +               return;
> > +
> > +       dma_unmap_page(vdev->dev.parent, dma, length, dir);
> > +}
> > +EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> > +
> >  MODULE_LICENSE("GPL");
> > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > index 3ebb346ebb7c..b5fa71476737 100644
> > --- a/include/linux/virtio.h
> > +++ b/include/linux/virtio.h
> > @@ -9,6 +9,7 @@
> >  #include <linux/device.h>
> >  #include <linux/mod_devicetable.h>
> >  #include <linux/gfp.h>
> > +#include <linux/dma-mapping.h>
> >
> >  /**
> >   * struct virtqueue - a queue to register buffers for sending or receiving.
> > @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
> >  #define module_virtio_driver(__virtio_driver) \
> >         module_driver(__virtio_driver, register_virtio_driver, \
> >                         unregister_virtio_driver)
> > +
> > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > +                              unsigned int length, enum dma_data_direction dir);
> > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > +                         enum dma_data_direction dir);
> > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > +                     enum dma_data_direction dir);
> >  #endif /* _LINUX_VIRTIO_H */
> > --
> > 2.32.0.3.g01195cf9f
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 10/10] virtio_ring: introduce virtqueue_reset()
  2023-02-20  5:38   ` Jason Wang
@ 2023-02-20  7:03     ` Xuan Zhuo
  2023-02-21  1:51       ` Jason Wang
  0 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-20  7:03 UTC (permalink / raw)
  To: Jason Wang; +Cc: Michael S. Tsirkin, virtualization

On Mon, 20 Feb 2023 13:38:30 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > Introduce virtqueue_reset() to release all buffer inside vq.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  drivers/virtio/virtio_ring.c | 50 ++++++++++++++++++++++++++++++++++++
> >  include/linux/virtio.h       |  2 ++
> >  2 files changed, 52 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 2ba60a14f557..2750a365439a 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -2930,6 +2930,56 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
> >  }
> >  EXPORT_SYMBOL_GPL(virtqueue_resize);
> >
> > +/**
> > + * virtqueue_reset - detach and recycle all unused buffers
> > + * @_vq: the struct virtqueue we're talking about.
> > + * @recycle: callback to recycle unused buffers
> > + *
> > + * Caller must ensure we don't call this with other virtqueue operations
> > + * at the same time (except where noted).
> > + *
> > + * Returns zero or a negative error.
> > + * 0: success.
> > + * -EBUSY: Failed to sync with device, vq may not work properly
> > + * -ENOENT: Transport or device not supported
> > + * -EPERM: Operation not permitted
> > + */
> > +int virtqueue_reset(struct virtqueue *_vq,
> > +                   void (*recycle)(struct virtqueue *vq, void *buf))
> > +{
> > +       struct vring_virtqueue *vq = to_vvq(_vq);
> > +       struct virtio_device *vdev = vq->vq.vdev;
> > +       void *buf;
> > +       int err;
> > +
> > +       if (!vq->we_own_ring)
> > +               return -EPERM;
> > +
> > +       if (!vdev->config->disable_vq_and_reset)
> > +               return -ENOENT;
> > +
> > +       if (!vdev->config->enable_vq_after_reset)
> > +               return -ENOENT;
> > +
> > +       err = vdev->config->disable_vq_and_reset(_vq);
> > +       if (err)
> > +               return err;
> > +
> > +       while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
> > +               recycle(_vq, buf);
> > +
> > +       if (vq->packed_ring)
> > +               virtqueue_reinit_packed(vq);
> > +       else
> > +               virtqueue_reinit_split(vq);
> > +
> > +       if (vdev->config->enable_vq_after_reset(_vq))
> > +               return -EBUSY;
> > +
> > +       return 0;
> > +}
>
> I don't get why not factor the similar logic from virtqueue_resize()?


I can do this, if you prefer this.

THanks.



>
> Thanks
>
>
> > +EXPORT_SYMBOL_GPL(virtqueue_reset);
> > +
> >  /* Only available for split ring */
> >  struct virtqueue *vring_new_virtqueue(unsigned int index,
> >                                       unsigned int num,
> > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > index d0e707d744a0..cf4c157e4e75 100644
> > --- a/include/linux/virtio.h
> > +++ b/include/linux/virtio.h
> > @@ -106,6 +106,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> >
> >  int virtqueue_resize(struct virtqueue *vq, u32 num,
> >                      void (*recycle)(struct virtqueue *vq, void *buf));
> > +int virtqueue_reset(struct virtqueue *vq,
> > +                   void (*recycle)(struct virtqueue *vq, void *buf));
> >
> >  /**
> >   * struct virtio_device - representation of a device using virtio
> > --
> > 2.32.0.3.g01195cf9f
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 08/10] virtio_ring: introduce dma sync api for virtio
  2023-02-20  5:38   ` Jason Wang
@ 2023-02-20  7:04     ` Xuan Zhuo
  2023-02-21  1:52       ` Jason Wang
  0 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-20  7:04 UTC (permalink / raw)
  To: Jason Wang; +Cc: Michael S. Tsirkin, virtualization

On Mon, 20 Feb 2023 13:38:20 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > These API has been introduced:
> >
> > * virtio_dma_need_sync
> > * virtio_dma_sync_single_range_for_cpu
> > * virtio_dma_sync_single_range_for_device
>
> What's the advantages of exporting internal logic like
> virtio_dma_need_sync() over hiding it in
> virtio_dma_sync_single_range_for_cpu() and
> virtio_dma_sync_single_range_for_device()?

Sorry, I didn't understand it.

Thanks.

>
> Thanks
>
>
> >
> > These APIs can be used together with the premapped mechanism to sync the
> > DMA address.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  drivers/virtio/virtio_ring.c | 70 ++++++++++++++++++++++++++++++++++++
> >  include/linux/virtio.h       |  8 +++++
> >  2 files changed, 78 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 855338609c7f..84129b8c3e2a 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -3264,4 +3264,74 @@ void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> >  }
> >  EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> >
> > +/**
> > + * virtio_dma_need_sync - check a dma address needs sync
> > + * @dev: virtio device
> > + * @addr: DMA address
> > + *
> > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > + * core handles DMA API internally.
> > + */
> > +bool virtio_dma_need_sync(struct device *dev, dma_addr_t addr)
> > +{
> > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > +
> > +       if (!vring_use_dma_api(vdev))
> > +               return 0;
> > +
> > +       return dma_need_sync(vdev->dev.parent, addr);
> > +}
> > +EXPORT_SYMBOL_GPL(virtio_dma_need_sync);
> > +
> > +/**
> > + * virtio_dma_sync_single_range_for_cpu - dma sync for cpu
> > + * @dev: virtio device
> > + * @addr: DMA address
> > + * @offset: DMA address offset
> > + * @size: mem size for sync
> > + * @dir: DMA direction
> > + *
> > + * Before calling this function, use virtio_dma_need_sync() to confirm that the
> > + * DMA address really needs to be synchronized
> > + *
> > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > + * core handles DMA API internally.
> > + */
> > +void virtio_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr,
> > +                                         unsigned long offset, size_t size,
> > +                                         enum dma_data_direction dir)
> > +{
> > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > +
> > +       dma_sync_single_range_for_cpu(vdev->dev.parent, addr, offset,
> > +                                     size, DMA_BIDIRECTIONAL);
> > +}
> > +EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_cpu);
> > +
> > +/**
> > + * virtio_dma_sync_single_range_for_device - dma sync for device
> > + * @dev: virtio device
> > + * @addr: DMA address
> > + * @offset: DMA address offset
> > + * @size: mem size for sync
> > + * @dir: DMA direction
> > + *
> > + * Before calling this function, use virtio_dma_need_sync() to confirm that the
> > + * DMA address really needs to be synchronized
> > + *
> > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > + * core handles DMA API internally.
> > + */
> > +void virtio_dma_sync_single_range_for_device(struct device *dev,
> > +                                            dma_addr_t addr,
> > +                                            unsigned long offset, size_t size,
> > +                                            enum dma_data_direction dir)
> > +{
> > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > +
> > +       dma_sync_single_range_for_device(vdev->dev.parent, addr, offset,
> > +                                        size, DMA_BIDIRECTIONAL);
> > +}
> > +EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_device);
> > +
> >  MODULE_LICENSE("GPL");
> > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > index b5fa71476737..d0e707d744a0 100644
> > --- a/include/linux/virtio.h
> > +++ b/include/linux/virtio.h
> > @@ -225,4 +225,12 @@ dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> >  int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> >  void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> >                       enum dma_data_direction dir);
> > +bool virtio_dma_need_sync(struct device *dev, dma_addr_t addr);
> > +void virtio_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr,
> > +                                         unsigned long offset, size_t size,
> > +                                         enum dma_data_direction dir);
> > +void virtio_dma_sync_single_range_for_device(struct device *dev,
> > +                                            dma_addr_t addr,
> > +                                            unsigned long offset, size_t size,
> > +                                            enum dma_data_direction dir);
> >  #endif /* _LINUX_VIRTIO_H */
> > --
> > 2.32.0.3.g01195cf9f
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 01/10] virtio_ring: split: refactor virtqueue_add_split() for premapped
  2023-02-20  5:37   ` Jason Wang
  2023-02-20  6:57     ` Xuan Zhuo
@ 2023-02-20 12:12     ` Michael S. Tsirkin
  1 sibling, 0 replies; 46+ messages in thread
From: Michael S. Tsirkin @ 2023-02-20 12:12 UTC (permalink / raw)
  To: Jason Wang; +Cc: virtualization

On Mon, Feb 20, 2023 at 01:37:37PM +0800, Jason Wang wrote:
> On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > DMA-related logic is separated from the virtqueue_add_split to prepare
> > for subsequent support for premapped.
> 
> The patch seems to do more than what is described here.
> 
> To simplify reviewers, I'd suggest to split this patch into three:
> 
> 1) virtqueue_add_split_prepare() (could we have a better name?)
> 2) virtqueue_map_sgs()
> 3) virtqueue_add_split_vring()
> 
> (Or only factor DMA parts out, I haven't gone through the reset of the patches)
> 
> Thanks
> 

It's pretty small, even split is not mandatary imho.
But definitely please do document what is done fully.



> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  drivers/virtio/virtio_ring.c | 219 ++++++++++++++++++++++++-----------
> >  1 file changed, 152 insertions(+), 67 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 41144b5246a8..560ee30d942c 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -520,29 +520,83 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
> >         return next;
> >  }
> >
> > -static inline int virtqueue_add_split(struct virtqueue *_vq,
> > -                                     struct scatterlist *sgs[],
> > -                                     unsigned int total_sg,
> > -                                     unsigned int out_sgs,
> > -                                     unsigned int in_sgs,
> > -                                     void *data,
> > -                                     void *ctx,
> > -                                     gfp_t gfp)
> > +static int virtqueue_map_sgs(struct vring_virtqueue *vq,
> > +                            struct scatterlist *sgs[],
> > +                            unsigned int total_sg,
> > +                            unsigned int out_sgs,
> > +                            unsigned int in_sgs)
> >  {
> > -       struct vring_virtqueue *vq = to_vvq(_vq);
> >         struct scatterlist *sg;
> > -       struct vring_desc *desc;
> > -       unsigned int i, n, avail, descs_used, prev, err_idx;
> > -       int head;
> > -       bool indirect;
> > +       unsigned int n;
> >
> > -       START_USE(vq);
> > +       for (n = 0; n < out_sgs; n++) {
> > +               for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > +                       dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
> > +
> > +                       if (vring_mapping_error(vq, addr))
> > +                               return -ENOMEM;
> > +
> > +                       sg->dma_address = addr;
> > +               }
> > +       }
> > +       for (; n < (out_sgs + in_sgs); n++) {
> > +               for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > +                       dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
> > +
> > +                       if (vring_mapping_error(vq, addr))
> > +                               return -ENOMEM;
> > +
> > +                       sg->dma_address = addr;
> > +               }
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +static void virtqueue_unmap_sgs(struct vring_virtqueue *vq,
> > +                               struct scatterlist *sgs[],
> > +                               unsigned int total_sg,
> > +                               unsigned int out_sgs,
> > +                               unsigned int in_sgs)
> > +{
> > +       struct scatterlist *sg;
> > +       unsigned int n;
> > +
> > +       for (n = 0; n < out_sgs; n++) {
> > +               for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > +                       if (!sg->dma_address)
> > +                               return;
> > +
> > +                       dma_unmap_single(vring_dma_dev(vq), sg->dma_address,
> > +                                        sg->length, DMA_TO_DEVICE);
> > +               }
> > +       }
> > +       for (; n < (out_sgs + in_sgs); n++) {
> > +               for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > +                       if (!sg->dma_address)
> > +                               return;
> > +
> > +                       dma_unmap_single(vring_dma_dev(vq), sg->dma_address,
> > +                                        sg->length, DMA_FROM_DEVICE);
> > +               }
> > +       }
> > +}
> > +
> > +static inline int virtqueue_add_split_prepare(struct vring_virtqueue *vq,
> > +                                             unsigned int total_sg,
> > +                                             unsigned int out_sgs,
> > +                                             void *data,
> > +                                             void *ctx,
> > +                                             gfp_t gfp,
> > +                                             struct vring_desc **pdesc)
> > +{
> > +       struct vring_desc *desc;
> > +       unsigned int descs_used;
> >
> >         BUG_ON(data == NULL);
> >         BUG_ON(ctx && vq->indirect);
> >
> >         if (unlikely(vq->broken)) {
> > -               END_USE(vq);
> >                 return -EIO;
> >         }
> >
> > @@ -550,27 +604,17 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> >
> >         BUG_ON(total_sg == 0);
> >
> > -       head = vq->free_head;
> > -
> >         if (virtqueue_use_indirect(vq, total_sg))
> > -               desc = alloc_indirect_split(_vq, total_sg, gfp);
> > +               desc = alloc_indirect_split(&vq->vq, total_sg, gfp);
> >         else {
> >                 desc = NULL;
> >                 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
> >         }
> >
> > -       if (desc) {
> > -               /* Use a single buffer which doesn't continue */
> > -               indirect = true;
> > -               /* Set up rest to use this indirect table. */
> > -               i = 0;
> > +       if (desc)
> >                 descs_used = 1;
> > -       } else {
> > -               indirect = false;
> > -               desc = vq->split.vring.desc;
> > -               i = head;
> > +       else
> >                 descs_used = total_sg;
> > -       }
> >
> >         if (unlikely(vq->vq.num_free < descs_used)) {
> >                 pr_debug("Can't add buf len %i - avail = %i\n",
> > @@ -580,38 +624,64 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> >                  * host should service the ring ASAP. */
> >                 if (out_sgs)
> >                         vq->notify(&vq->vq);
> > -               if (indirect)
> > -                       kfree(desc);
> > -               END_USE(vq);
> > +               kfree(desc);
> >                 return -ENOSPC;
> >         }
> >
> > +       *pdesc = desc;
> > +
> > +       return 0;
> > +}
> > +
> > +static inline int virtqueue_add_split_vring(struct vring_virtqueue *vq,
> > +                                           struct scatterlist *sgs[],
> > +                                           unsigned int total_sg,
> > +                                           unsigned int out_sgs,
> > +                                           unsigned int in_sgs,
> > +                                           struct vring_desc *desc)
> > +{
> > +       unsigned int n, i, avail, descs_used, prev;
> > +       struct virtqueue *_vq = &vq->vq;
> > +       struct scatterlist *sg;
> > +       bool indirect;
> > +       int head;
> > +
> > +       head = vq->free_head;
> > +
> > +       if (desc) {
> > +               /* Use a single buffer which doesn't continue */
> > +               indirect = true;
> > +               /* Set up rest to use this indirect table. */
> > +               i = 0;
> > +               descs_used = 1;
> > +       } else {
> > +               indirect = false;
> > +               desc = vq->split.vring.desc;
> > +               i = head;
> > +               descs_used = total_sg;
> > +       }
> > +
> >         for (n = 0; n < out_sgs; n++) {
> >                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > -                       dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
> > -                       if (vring_mapping_error(vq, addr))
> > -                               goto unmap_release;
> > -
> >                         prev = i;
> >                         /* Note that we trust indirect descriptor
> >                          * table since it use stream DMA mapping.
> >                          */
> > -                       i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
> > +                       i = virtqueue_add_desc_split(_vq, desc, i,
> > +                                                    sg->dma_address,
> > +                                                    sg->length,
> >                                                      VRING_DESC_F_NEXT,
> >                                                      indirect);
> >                 }
> >         }
> >         for (; n < (out_sgs + in_sgs); n++) {
> >                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > -                       dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
> > -                       if (vring_mapping_error(vq, addr))
> > -                               goto unmap_release;
> > -
> >                         prev = i;
> >                         /* Note that we trust indirect descriptor
> >                          * table since it use stream DMA mapping.
> >                          */
> > -                       i = virtqueue_add_desc_split(_vq, desc, i, addr,
> > +                       i = virtqueue_add_desc_split(_vq, desc, i,
> > +                                                    sg->dma_address,
> >                                                      sg->length,
> >                                                      VRING_DESC_F_NEXT |
> >                                                      VRING_DESC_F_WRITE,
> > @@ -630,7 +700,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> >                         vq, desc, total_sg * sizeof(struct vring_desc),
> >                         DMA_TO_DEVICE);
> >                 if (vring_mapping_error(vq, addr))
> > -                       goto unmap_release;
> > +                       return -ENOMEM;
> >
> >                 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
> >                                          head, addr,
> > @@ -648,13 +718,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> >         else
> >                 vq->free_head = i;
> >
> > -       /* Store token and indirect buffer state. */
> > -       vq->split.desc_state[head].data = data;
> > -       if (indirect)
> > -               vq->split.desc_state[head].indir_desc = desc;
> > -       else
> > -               vq->split.desc_state[head].indir_desc = ctx;
> > -
> >         /* Put entry in available array (but don't update avail->idx until they
> >          * do sync). */
> >         avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
> > @@ -677,30 +740,52 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> >                 virtqueue_kick(_vq);
> >
> >         return 0;
> > +}
> >
> > -unmap_release:
> > -       err_idx = i;
> > +static inline int virtqueue_add_split(struct virtqueue *_vq,
> > +                                     struct scatterlist *sgs[],
> > +                                     unsigned int total_sg,
> > +                                     unsigned int out_sgs,
> > +                                     unsigned int in_sgs,
> > +                                     void *data,
> > +                                     void *ctx,
> > +                                     gfp_t gfp)
> > +{
> > +       struct vring_virtqueue *vq = to_vvq(_vq);
> > +       struct vring_desc *desc;
> > +       int head;
> > +       int err;
> >
> > -       if (indirect)
> > -               i = 0;
> > -       else
> > -               i = head;
> > +       START_USE(vq);
> >
> > -       for (n = 0; n < total_sg; n++) {
> > -               if (i == err_idx)
> > -                       break;
> > -               if (indirect) {
> > -                       vring_unmap_one_split_indirect(vq, &desc[i]);
> > -                       i = virtio16_to_cpu(_vq->vdev, desc[i].next);
> > -               } else
> > -                       i = vring_unmap_one_split(vq, i);
> > -       }
> > +       /* check vq state and try to alloc desc for indirect. */
> > +       err = virtqueue_add_split_prepare(vq, total_sg, out_sgs, data, ctx, gfp, &desc);
> > +       if (err)
> > +               goto end;
> >
> > -       if (indirect)
> > -               kfree(desc);
> > +       err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
> > +       if (err)
> > +               goto err;
> >
> > +       head = vq->free_head;
> > +       err = virtqueue_add_split_vring(vq, sgs, total_sg, out_sgs, in_sgs, desc);
> > +       if (err)
> > +               goto err;
> > +
> > +       /* Store token and indirect buffer state. */
> > +       vq->split.desc_state[head].data = data;
> > +       vq->split.desc_state[head].indir_desc = desc ? desc : ctx;
> > +
> > +       goto end;
> > +
> > +err:
> > +       virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
> > +
> > +       kfree(desc);
> > +
> > +end:
> >         END_USE(vq);
> > -       return -ENOMEM;
> > +       return err;
> >  }
> >
> >  static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
> > --
> > 2.32.0.3.g01195cf9f
> >

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 04/10] virtio_ring: split: introduce virtqueue_add_split_premapped()
  2023-02-20  6:43     ` Xuan Zhuo
@ 2023-02-21  1:49       ` Jason Wang
  0 siblings, 0 replies; 46+ messages in thread
From: Jason Wang @ 2023-02-21  1:49 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: Michael S. Tsirkin, virtualization

On Mon, Feb 20, 2023 at 2:56 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Mon, 20 Feb 2023 13:38:13 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > virtqueue_add_split() only supports virtual addresses, dma is completed
> > > in virtqueue_add_split().
> > >
> > > In some scenarios (such as the AF_XDP scenario), the memory is allocated
> > > and DMA is completed in advance, so it is necessary for us to support
> > > passing the DMA address to virtio core.
> > >
> > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > ---
> > >  drivers/virtio/virtio_ring.c | 100 +++++++++++++++++++++++++++++++++--
> > >  include/linux/virtio.h       |   5 ++
> > >  2 files changed, 100 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 47b6f9152f9f..a31155abe101 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -70,6 +70,7 @@
> > >  struct vring_desc_state_split {
> > >         void *data;                     /* Data for callback. */
> > >         struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
> > > +       bool premapped;
> >
> > Better with a comment.
> >
> > Not native speaker, but "dma_addr" might be better?
> >
> > >  };
> > >
> > >  struct vring_desc_state_packed {
> > > @@ -440,7 +441,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > >  }
> > >
> > >  static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > -                                         unsigned int i)
> > > +                                         unsigned int i, bool premapped)
> > >  {
> > >         struct vring_desc_extra *extra = vq->split.desc_extra;
> > >         u16 flags;
> > > @@ -457,6 +458,9 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> > >                                  (flags & VRING_DESC_F_WRITE) ?
> > >                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > >         } else {
> > > +               if (premapped)
> > > +                       goto out;
> > > +
> > >                 dma_unmap_page(vring_dma_dev(vq),
> > >                                extra[i].addr,
> > >                                extra[i].len,
> > > @@ -788,6 +792,47 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> > >         return err;
> > >  }
> > >
> > > +static inline int virtqueue_add_split_premapped(struct virtqueue *_vq,
> > > +                                               struct scatterlist *sgs[],
> > > +                                               unsigned int total_sg,
> > > +                                               unsigned int out_sgs,
> > > +                                               unsigned int in_sgs,
> > > +                                               void *data,
> > > +                                               void *ctx,
> > > +                                               gfp_t gfp)
> > > +{
> > > +       struct vring_virtqueue *vq = to_vvq(_vq);
> > > +       struct vring_desc *desc;
> > > +       int head;
> > > +       int err;
> > > +
> > > +       START_USE(vq);
> > > +
> > > +       /* check vq state and try to alloc desc for indirect. */
> > > +       err = virtqueue_add_split_prepare(vq, total_sg, out_sgs, data, ctx, gfp, &desc);
> > > +       if (err)
> > > +               goto end;
> > > +
> > > +       head = vq->free_head;
> > > +       err = virtqueue_add_split_vring(vq, sgs, total_sg, out_sgs, in_sgs, desc);
> > > +       if (err)
> > > +               goto err;
> > > +
> > > +       /* Store token and indirect buffer state. */
> > > +       vq->split.desc_state[head].data = data;
> > > +       vq->split.desc_state[head].indir_desc = desc ? desc : ctx;
> > > +       vq->split.desc_state[head].premapped = true;
> >
> > This function duplicates most of the logic of virtqueue_add_split()
> > let's unify it.
>
> I want to know that the __virtqueue_add_split is the original
> virtqueue_add_split or my refactor virtqueue_add_split?

It's basically just the virtqueue_add_split_premapped() but with a
boolean to say if it is premapped.

>
> >
> > probably:
> >
> > __virtqueue_add_split(..., bool premapped);
> > virtqueue_add_split()
> > {
> >     __virtqueue_add_split(..., false);
> > }
> >
> > virtqueue_add_split_premapped()
> > {
> >    __virtqueue_add_split(..., true);
> > }
>
> I am trying to reduce the inspection of premapped.
>
> In fact, this is Michael's request, although I am not particularly sure that my
> implementation has met his requirements.
>
> https://lore.kernel.org/all/20230203041006-mutt-send-email-mst@kernel.org/

I think there should be no conflict,  the use of premapped was limited
to the above two functions?

Thanks

>
> Thanks.
>
>
> >
> > ?
> >
> > And so did for packed (patch 5).
> >
> > Thanks
> >
> >
> >
> > > +
> > > +       goto end;
> > > +
> > > +err:
> > > +       kfree(desc);
> > > +
> > > +end:
> > > +       END_USE(vq);
> > > +       return err;
> > > +}
> > > +
> > >  static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
> > >  {
> > >         struct vring_virtqueue *vq = to_vvq(_vq);
> > > @@ -824,20 +869,23 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > >  {
> > >         unsigned int i, j;
> > >         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
> > > +       bool premapped;
> > >
> > >         /* Clear data ptr. */
> > >         vq->split.desc_state[head].data = NULL;
> > >
> > > +       premapped = vq->split.desc_state[head].premapped;
> > > +
> > >         /* Put back on free list: unmap first-level descriptors and find end */
> > >         i = head;
> > >
> > >         while (vq->split.vring.desc[i].flags & nextflag) {
> > > -               vring_unmap_one_split(vq, i);
> > > +               vring_unmap_one_split(vq, i, premapped);
> > >                 i = vq->split.desc_extra[i].next;
> > >                 vq->vq.num_free++;
> > >         }
> > >
> > > -       vring_unmap_one_split(vq, i);
> > > +       vring_unmap_one_split(vq, i, premapped);
> > >         vq->split.desc_extra[i].next = vq->free_head;
> > >         vq->free_head = head;
> > >
> > > @@ -859,8 +907,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > >                                 VRING_DESC_F_INDIRECT));
> > >                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
> > >
> > > -               for (j = 0; j < len / sizeof(struct vring_desc); j++)
> > > -                       vring_unmap_one_split_indirect(vq, &indir_desc[j]);
> > > +               if (!premapped) {
> > > +                       for (j = 0; j < len / sizeof(struct vring_desc); j++)
> > > +                               vring_unmap_one_split_indirect(vq, &indir_desc[j]);
> > > +               }
> > >
> > >                 kfree(indir_desc);
> > >                 vq->split.desc_state[head].indir_desc = NULL;
> > > @@ -2204,6 +2254,21 @@ static inline int virtqueue_add(struct virtqueue *_vq,
> > >                                         out_sgs, in_sgs, data, ctx, gfp);
> > >  }
> > >
> > > +static inline int virtqueue_add_premapped(struct virtqueue *_vq,
> > > +                                         struct scatterlist *sgs[],
> > > +                                         unsigned int total_sg,
> > > +                                         unsigned int out_sgs,
> > > +                                         unsigned int in_sgs,
> > > +                                         void *data,
> > > +                                         void *ctx,
> > > +                                         gfp_t gfp)
> > > +{
> > > +       struct vring_virtqueue *vq = to_vvq(_vq);
> > > +
> > > +       return virtqueue_add_split_premapped(_vq, sgs, total_sg, out_sgs,
> > > +                                            in_sgs, data, ctx, gfp);
> > > +}
> > > +
> > >  /**
> > >   * virtqueue_add_sgs - expose buffers to other end
> > >   * @_vq: the struct virtqueue we're talking about.
> > > @@ -2261,6 +2326,31 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
> > >  }
> > >  EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
> > >
> > > +/**
> > > + * virtqueue_add_outbuf_premapped - expose output buffers to other end
> > > + * @vq: the struct virtqueue we're talking about.
> > > + * @sg: scatterlist (must be well-formed and terminated!)
> > > + * @num: the number of entries in @sg readable by other side
> > > + * @data: the token identifying the buffer.
> > > + * @gfp: how to do memory allocations (if necessary).
> > > + *
> > > + * Caller must ensure we don't call this with other virtqueue operations
> > > + * at the same time (except where noted).
> > > + *
> > > + * It is required that all addrs have completed DMA operations. And use
> > > + * sg->dma_address, sg->length to pass addr and length.
> > > + *
> > > + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
> > > + */
> > > +int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
> > > +                                  struct scatterlist *sg, unsigned int num,
> > > +                                  void *data,
> > > +                                  gfp_t gfp)
> > > +{
> > > +       return virtqueue_add_premapped(vq, &sg, num, 1, 0, data, NULL, gfp);
> > > +}
> > > +EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped);
> > > +
> > >  /**
> > >   * virtqueue_add_inbuf - expose input buffers to other end
> > >   * @vq: the struct virtqueue we're talking about.
> > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > index dcab9c7e8784..d8b472a7dcae 100644
> > > --- a/include/linux/virtio.h
> > > +++ b/include/linux/virtio.h
> > > @@ -43,6 +43,11 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
> > >                          void *data,
> > >                          gfp_t gfp);
> > >
> > > +int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
> > > +                                  struct scatterlist *sg, unsigned int num,
> > > +                                  void *data,
> > > +                                  gfp_t gfp);
> > > +
> > >  int virtqueue_add_inbuf(struct virtqueue *vq,
> > >                         struct scatterlist sg[], unsigned int num,
> > >                         void *data,
> > > --
> > > 2.32.0.3.g01195cf9f
> > >
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-02-20  6:59     ` Xuan Zhuo
@ 2023-02-21  1:51       ` Jason Wang
  2023-02-28 11:15         ` Xuan Zhuo
  2023-03-01 11:47         ` Xuan Zhuo
  0 siblings, 2 replies; 46+ messages in thread
From: Jason Wang @ 2023-02-21  1:51 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: Michael S. Tsirkin, virtualization

On Mon, Feb 20, 2023 at 3:02 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Mon, 20 Feb 2023 13:38:24 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > Added virtio_dma_map() to map DMA addresses for virtual memory in
> > > advance. The purpose is to keep memory mapped across multiple add/get
> > > buf operations.
> >
> > I wonder if instead of exporting helpers like this, it might be simple
> > to just export dma_dev then the upper layer can use DMA API at will?
>
>
> The reason for not doing this, Virtio is not just using DMA_DEV to mapp, but
> also check whether DMA is used.

We should let the DMA API decide by exporting a correct dma_dev. E.g
when ACCESS_PLATFORM is not negotiated, advertising a DMA dev without
dma_ops.

Thanks

>
>
> >
> > (Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?)
> >
> > >
> > > Added virtio_dma_unmap() for unmap DMA address.
> > >
> > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > ---
> > >  drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
> > >  include/linux/virtio.h       |  9 ++++
> > >  2 files changed, 101 insertions(+)
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index cd9364eb2345..855338609c7f 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > >  }
> > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> > >
> > > +/**
> > > + * virtio_dma_map_page - get the DMA addr of the memory for virtio device
> > > + * @dev: virtio device
> > > + * @page: the page of the memory to DMA
> > > + * @offset: the offset of the memory inside page
> > > + * @length: memory length
> > > + * @dir: DMA direction
> > > + *
> > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > + * core handles DMA API internally.
> > > + *
> > > + * Returns the DMA addr. DMA_MAPPING_ERROR means error.
> > > + */
> > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > +                              unsigned int length, enum dma_data_direction dir)
> > > +{
> >
> > This (and the reset) needs to be done per virtqueue instead per device
> > after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per
> > virtqueue dma device").
>
>
> YES.
>
>
> >
> > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > +
> > > +       if (!vring_use_dma_api(vdev))
> > > +               return page_to_phys(page) + offset;
> > > +
> > > +       return dma_map_page(vdev->dev.parent, page, offset, length, dir);
> > > +}
> >
> > Need either inline or EXPORT_SYMBOL_GPL() here.
>
> Because I did not use this interface, I did not  export it.
>
> Thanks.
>
>
> >
> > Thanks
> >
> >
> > > +
> > > +/**
> > > + * virtio_dma_map - get the DMA addr of the memory for virtio device
> > > + * @dev: virtio device
> > > + * @addr: the addr to DMA
> > > + * @length: memory length
> > > + * @dir: DMA direction
> > > + *
> > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > + * core handles DMA API internally.
> > > + *
> > > + * Returns the DMA addr.
> > > + */
> > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > +                         enum dma_data_direction dir)
> > > +{
> > > +       struct page *page;
> > > +       size_t offset;
> > > +
> > > +       page = virt_to_page(addr);
> > > +       offset = offset_in_page(addr);
> > > +
> > > +       return virtio_dma_map_page(dev, page, offset, length, dir);
> > > +}
> > > +EXPORT_SYMBOL_GPL(virtio_dma_map);
> > > +
> > > +/**
> > > + * virtio_dma_mapping_error - check dma address
> > > + * @dev: virtio device
> > > + * @addr: DMA address
> > > + *
> > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > + * core handles DMA API internally.
> > > + *
> > > + * Returns 0 means dma valid. Other means invalid dma address.
> > > + */
> > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
> > > +{
> > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > +
> > > +       if (!vring_use_dma_api(vdev))
> > > +               return 0;
> > > +
> > > +       return dma_mapping_error(vdev->dev.parent, addr);
> > > +}
> > > +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
> > > +
> > > +/**
> > > + * virtio_dma_unmap - unmap DMA addr
> > > + * @dev: virtio device
> > > + * @dma: DMA address
> > > + * @length: memory length
> > > + * @dir: DMA direction
> > > + *
> > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > + * core handles DMA API internally.
> > > + */
> > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > +                     enum dma_data_direction dir)
> > > +{
> > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > +
> > > +       if (!vring_use_dma_api(vdev))
> > > +               return;
> > > +
> > > +       dma_unmap_page(vdev->dev.parent, dma, length, dir);
> > > +}
> > > +EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> > > +
> > >  MODULE_LICENSE("GPL");
> > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > index 3ebb346ebb7c..b5fa71476737 100644
> > > --- a/include/linux/virtio.h
> > > +++ b/include/linux/virtio.h
> > > @@ -9,6 +9,7 @@
> > >  #include <linux/device.h>
> > >  #include <linux/mod_devicetable.h>
> > >  #include <linux/gfp.h>
> > > +#include <linux/dma-mapping.h>
> > >
> > >  /**
> > >   * struct virtqueue - a queue to register buffers for sending or receiving.
> > > @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
> > >  #define module_virtio_driver(__virtio_driver) \
> > >         module_driver(__virtio_driver, register_virtio_driver, \
> > >                         unregister_virtio_driver)
> > > +
> > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > +                              unsigned int length, enum dma_data_direction dir);
> > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > +                         enum dma_data_direction dir);
> > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > +                     enum dma_data_direction dir);
> > >  #endif /* _LINUX_VIRTIO_H */
> > > --
> > > 2.32.0.3.g01195cf9f
> > >
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 10/10] virtio_ring: introduce virtqueue_reset()
  2023-02-20  7:03     ` Xuan Zhuo
@ 2023-02-21  1:51       ` Jason Wang
  0 siblings, 0 replies; 46+ messages in thread
From: Jason Wang @ 2023-02-21  1:51 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: Michael S. Tsirkin, virtualization

On Mon, Feb 20, 2023 at 3:04 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Mon, 20 Feb 2023 13:38:30 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > Introduce virtqueue_reset() to release all buffer inside vq.
> > >
> > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > ---
> > >  drivers/virtio/virtio_ring.c | 50 ++++++++++++++++++++++++++++++++++++
> > >  include/linux/virtio.h       |  2 ++
> > >  2 files changed, 52 insertions(+)
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 2ba60a14f557..2750a365439a 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -2930,6 +2930,56 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
> > >  }
> > >  EXPORT_SYMBOL_GPL(virtqueue_resize);
> > >
> > > +/**
> > > + * virtqueue_reset - detach and recycle all unused buffers
> > > + * @_vq: the struct virtqueue we're talking about.
> > > + * @recycle: callback to recycle unused buffers
> > > + *
> > > + * Caller must ensure we don't call this with other virtqueue operations
> > > + * at the same time (except where noted).
> > > + *
> > > + * Returns zero or a negative error.
> > > + * 0: success.
> > > + * -EBUSY: Failed to sync with device, vq may not work properly
> > > + * -ENOENT: Transport or device not supported
> > > + * -EPERM: Operation not permitted
> > > + */
> > > +int virtqueue_reset(struct virtqueue *_vq,
> > > +                   void (*recycle)(struct virtqueue *vq, void *buf))
> > > +{
> > > +       struct vring_virtqueue *vq = to_vvq(_vq);
> > > +       struct virtio_device *vdev = vq->vq.vdev;
> > > +       void *buf;
> > > +       int err;
> > > +
> > > +       if (!vq->we_own_ring)
> > > +               return -EPERM;
> > > +
> > > +       if (!vdev->config->disable_vq_and_reset)
> > > +               return -ENOENT;
> > > +
> > > +       if (!vdev->config->enable_vq_after_reset)
> > > +               return -ENOENT;
> > > +
> > > +       err = vdev->config->disable_vq_and_reset(_vq);
> > > +       if (err)
> > > +               return err;
> > > +
> > > +       while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
> > > +               recycle(_vq, buf);
> > > +
> > > +       if (vq->packed_ring)
> > > +               virtqueue_reinit_packed(vq);
> > > +       else
> > > +               virtqueue_reinit_split(vq);
> > > +
> > > +       if (vdev->config->enable_vq_after_reset(_vq))
> > > +               return -EBUSY;
> > > +
> > > +       return 0;
> > > +}
> >
> > I don't get why not factor the similar logic from virtqueue_resize()?
>
>
> I can do this, if you prefer this.
>
> THanks.

Please do that, reset is a step of resize if I understand correctly.

Thanks

>
>
>
> >
> > Thanks
> >
> >
> > > +EXPORT_SYMBOL_GPL(virtqueue_reset);
> > > +
> > >  /* Only available for split ring */
> > >  struct virtqueue *vring_new_virtqueue(unsigned int index,
> > >                                       unsigned int num,
> > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > index d0e707d744a0..cf4c157e4e75 100644
> > > --- a/include/linux/virtio.h
> > > +++ b/include/linux/virtio.h
> > > @@ -106,6 +106,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
> > >
> > >  int virtqueue_resize(struct virtqueue *vq, u32 num,
> > >                      void (*recycle)(struct virtqueue *vq, void *buf));
> > > +int virtqueue_reset(struct virtqueue *vq,
> > > +                   void (*recycle)(struct virtqueue *vq, void *buf));
> > >
> > >  /**
> > >   * struct virtio_device - representation of a device using virtio
> > > --
> > > 2.32.0.3.g01195cf9f
> > >
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 08/10] virtio_ring: introduce dma sync api for virtio
  2023-02-20  7:04     ` Xuan Zhuo
@ 2023-02-21  1:52       ` Jason Wang
  0 siblings, 0 replies; 46+ messages in thread
From: Jason Wang @ 2023-02-21  1:52 UTC (permalink / raw)
  To: Xuan Zhuo; +Cc: Michael S. Tsirkin, virtualization

On Mon, Feb 20, 2023 at 3:05 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Mon, 20 Feb 2023 13:38:20 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > These API has been introduced:
> > >
> > > * virtio_dma_need_sync
> > > * virtio_dma_sync_single_range_for_cpu
> > > * virtio_dma_sync_single_range_for_device
> >
> > What's the advantages of exporting internal logic like
> > virtio_dma_need_sync() over hiding it in
> > virtio_dma_sync_single_range_for_cpu() and
> > virtio_dma_sync_single_range_for_device()?
>
> Sorry, I didn't understand it.

I meant:

virtio_dma_sync_single_range_for_cpu()
{
    if (!virtio_dma_need_sync())
        return;
    ......
}

Thanks

>
> Thanks.
>
> >
> > Thanks
> >
> >
> > >
> > > These APIs can be used together with the premapped mechanism to sync the
> > > DMA address.
> > >
> > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > ---
> > >  drivers/virtio/virtio_ring.c | 70 ++++++++++++++++++++++++++++++++++++
> > >  include/linux/virtio.h       |  8 +++++
> > >  2 files changed, 78 insertions(+)
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 855338609c7f..84129b8c3e2a 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -3264,4 +3264,74 @@ void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > >  }
> > >  EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> > >
> > > +/**
> > > + * virtio_dma_need_sync - check a dma address needs sync
> > > + * @dev: virtio device
> > > + * @addr: DMA address
> > > + *
> > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > + * core handles DMA API internally.
> > > + */
> > > +bool virtio_dma_need_sync(struct device *dev, dma_addr_t addr)
> > > +{
> > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > +
> > > +       if (!vring_use_dma_api(vdev))
> > > +               return 0;
> > > +
> > > +       return dma_need_sync(vdev->dev.parent, addr);
> > > +}
> > > +EXPORT_SYMBOL_GPL(virtio_dma_need_sync);
> > > +
> > > +/**
> > > + * virtio_dma_sync_single_range_for_cpu - dma sync for cpu
> > > + * @dev: virtio device
> > > + * @addr: DMA address
> > > + * @offset: DMA address offset
> > > + * @size: mem size for sync
> > > + * @dir: DMA direction
> > > + *
> > > + * Before calling this function, use virtio_dma_need_sync() to confirm that the
> > > + * DMA address really needs to be synchronized
> > > + *
> > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > + * core handles DMA API internally.
> > > + */
> > > +void virtio_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr,
> > > +                                         unsigned long offset, size_t size,
> > > +                                         enum dma_data_direction dir)
> > > +{
> > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > +
> > > +       dma_sync_single_range_for_cpu(vdev->dev.parent, addr, offset,
> > > +                                     size, DMA_BIDIRECTIONAL);
> > > +}
> > > +EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_cpu);
> > > +
> > > +/**
> > > + * virtio_dma_sync_single_range_for_device - dma sync for device
> > > + * @dev: virtio device
> > > + * @addr: DMA address
> > > + * @offset: DMA address offset
> > > + * @size: mem size for sync
> > > + * @dir: DMA direction
> > > + *
> > > + * Before calling this function, use virtio_dma_need_sync() to confirm that the
> > > + * DMA address really needs to be synchronized
> > > + *
> > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > + * core handles DMA API internally.
> > > + */
> > > +void virtio_dma_sync_single_range_for_device(struct device *dev,
> > > +                                            dma_addr_t addr,
> > > +                                            unsigned long offset, size_t size,
> > > +                                            enum dma_data_direction dir)
> > > +{
> > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > +
> > > +       dma_sync_single_range_for_device(vdev->dev.parent, addr, offset,
> > > +                                        size, DMA_BIDIRECTIONAL);
> > > +}
> > > +EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_device);
> > > +
> > >  MODULE_LICENSE("GPL");
> > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > index b5fa71476737..d0e707d744a0 100644
> > > --- a/include/linux/virtio.h
> > > +++ b/include/linux/virtio.h
> > > @@ -225,4 +225,12 @@ dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > >  int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> > >  void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > >                       enum dma_data_direction dir);
> > > +bool virtio_dma_need_sync(struct device *dev, dma_addr_t addr);
> > > +void virtio_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr,
> > > +                                         unsigned long offset, size_t size,
> > > +                                         enum dma_data_direction dir);
> > > +void virtio_dma_sync_single_range_for_device(struct device *dev,
> > > +                                            dma_addr_t addr,
> > > +                                            unsigned long offset, size_t size,
> > > +                                            enum dma_data_direction dir);
> > >  #endif /* _LINUX_VIRTIO_H */
> > > --
> > > 2.32.0.3.g01195cf9f
> > >
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-02-21  1:51       ` Jason Wang
@ 2023-02-28 11:15         ` Xuan Zhuo
  2023-03-02  2:04           ` Xuan Zhuo
  2023-03-01 11:47         ` Xuan Zhuo
  1 sibling, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-02-28 11:15 UTC (permalink / raw)
  To: Jason Wang; +Cc: Michael S. Tsirkin, virtualization

On Tue, 21 Feb 2023 09:51:07 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Mon, Feb 20, 2023 at 3:02 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Mon, 20 Feb 2023 13:38:24 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > Added virtio_dma_map() to map DMA addresses for virtual memory in
> > > > advance. The purpose is to keep memory mapped across multiple add/get
> > > > buf operations.
> > >
> > > I wonder if instead of exporting helpers like this, it might be simple
> > > to just export dma_dev then the upper layer can use DMA API at will?
> >
> >
> > The reason for not doing this, Virtio is not just using DMA_DEV to mapp, but
> > also check whether DMA is used.
>
> We should let the DMA API decide by exporting a correct dma_dev. E.g
> when ACCESS_PLATFORM is not negotiated, advertising a DMA dev without
> dma_ops.


Do you mean we provide this API?

virtio_get_dma_dev()

If it returns NULL, the caller will use the physical memory address directly. If
this func return a dma_dev, the caller should use DMA API.

Thanks.


>
> Thanks
>
> >
> >
> > >
> > > (Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?)
> > >
> > > >
> > > > Added virtio_dma_unmap() for unmap DMA address.
> > > >
> > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > ---
> > > >  drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
> > > >  include/linux/virtio.h       |  9 ++++
> > > >  2 files changed, 101 insertions(+)
> > > >
> > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > index cd9364eb2345..855338609c7f 100644
> > > > --- a/drivers/virtio/virtio_ring.c
> > > > +++ b/drivers/virtio/virtio_ring.c
> > > > @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> > > >
> > > > +/**
> > > > + * virtio_dma_map_page - get the DMA addr of the memory for virtio device
> > > > + * @dev: virtio device
> > > > + * @page: the page of the memory to DMA
> > > > + * @offset: the offset of the memory inside page
> > > > + * @length: memory length
> > > > + * @dir: DMA direction
> > > > + *
> > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > + * core handles DMA API internally.
> > > > + *
> > > > + * Returns the DMA addr. DMA_MAPPING_ERROR means error.
> > > > + */
> > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > +                              unsigned int length, enum dma_data_direction dir)
> > > > +{
> > >
> > > This (and the reset) needs to be done per virtqueue instead per device
> > > after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per
> > > virtqueue dma device").
> >
> >
> > YES.
> >
> >
> > >
> > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > +
> > > > +       if (!vring_use_dma_api(vdev))
> > > > +               return page_to_phys(page) + offset;
> > > > +
> > > > +       return dma_map_page(vdev->dev.parent, page, offset, length, dir);
> > > > +}
> > >
> > > Need either inline or EXPORT_SYMBOL_GPL() here.
> >
> > Because I did not use this interface, I did not  export it.
> >
> > Thanks.
> >
> >
> > >
> > > Thanks
> > >
> > >
> > > > +
> > > > +/**
> > > > + * virtio_dma_map - get the DMA addr of the memory for virtio device
> > > > + * @dev: virtio device
> > > > + * @addr: the addr to DMA
> > > > + * @length: memory length
> > > > + * @dir: DMA direction
> > > > + *
> > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > + * core handles DMA API internally.
> > > > + *
> > > > + * Returns the DMA addr.
> > > > + */
> > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > +                         enum dma_data_direction dir)
> > > > +{
> > > > +       struct page *page;
> > > > +       size_t offset;
> > > > +
> > > > +       page = virt_to_page(addr);
> > > > +       offset = offset_in_page(addr);
> > > > +
> > > > +       return virtio_dma_map_page(dev, page, offset, length, dir);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(virtio_dma_map);
> > > > +
> > > > +/**
> > > > + * virtio_dma_mapping_error - check dma address
> > > > + * @dev: virtio device
> > > > + * @addr: DMA address
> > > > + *
> > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > + * core handles DMA API internally.
> > > > + *
> > > > + * Returns 0 means dma valid. Other means invalid dma address.
> > > > + */
> > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
> > > > +{
> > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > +
> > > > +       if (!vring_use_dma_api(vdev))
> > > > +               return 0;
> > > > +
> > > > +       return dma_mapping_error(vdev->dev.parent, addr);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
> > > > +
> > > > +/**
> > > > + * virtio_dma_unmap - unmap DMA addr
> > > > + * @dev: virtio device
> > > > + * @dma: DMA address
> > > > + * @length: memory length
> > > > + * @dir: DMA direction
> > > > + *
> > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > + * core handles DMA API internally.
> > > > + */
> > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > +                     enum dma_data_direction dir)
> > > > +{
> > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > +
> > > > +       if (!vring_use_dma_api(vdev))
> > > > +               return;
> > > > +
> > > > +       dma_unmap_page(vdev->dev.parent, dma, length, dir);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> > > > +
> > > >  MODULE_LICENSE("GPL");
> > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > index 3ebb346ebb7c..b5fa71476737 100644
> > > > --- a/include/linux/virtio.h
> > > > +++ b/include/linux/virtio.h
> > > > @@ -9,6 +9,7 @@
> > > >  #include <linux/device.h>
> > > >  #include <linux/mod_devicetable.h>
> > > >  #include <linux/gfp.h>
> > > > +#include <linux/dma-mapping.h>
> > > >
> > > >  /**
> > > >   * struct virtqueue - a queue to register buffers for sending or receiving.
> > > > @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
> > > >  #define module_virtio_driver(__virtio_driver) \
> > > >         module_driver(__virtio_driver, register_virtio_driver, \
> > > >                         unregister_virtio_driver)
> > > > +
> > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > +                              unsigned int length, enum dma_data_direction dir);
> > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > +                         enum dma_data_direction dir);
> > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > +                     enum dma_data_direction dir);
> > > >  #endif /* _LINUX_VIRTIO_H */
> > > > --
> > > > 2.32.0.3.g01195cf9f
> > > >
> > >
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-02-21  1:51       ` Jason Wang
  2023-02-28 11:15         ` Xuan Zhuo
@ 2023-03-01 11:47         ` Xuan Zhuo
  1 sibling, 0 replies; 46+ messages in thread
From: Xuan Zhuo @ 2023-03-01 11:47 UTC (permalink / raw)
  To: Jason Wang; +Cc: Michael S. Tsirkin, virtualization

On Tue, 21 Feb 2023 09:51:07 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Mon, Feb 20, 2023 at 3:02 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Mon, 20 Feb 2023 13:38:24 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > Added virtio_dma_map() to map DMA addresses for virtual memory in
> > > > advance. The purpose is to keep memory mapped across multiple add/get
> > > > buf operations.
> > >
> > > I wonder if instead of exporting helpers like this, it might be simple
> > > to just export dma_dev then the upper layer can use DMA API at will?
> >
> >
> > The reason for not doing this, Virtio is not just using DMA_DEV to mapp, but
> > also check whether DMA is used.
>
> We should let the DMA API decide by exporting a correct dma_dev. E.g
> when ACCESS_PLATFORM is not negotiated, advertising a DMA dev without
> dma_ops.


In XSK, we need to pass a device to XSK.

If vdev->dev is passed, we can't get dma_dev in XSK. Because each VQ has a
dma_dev. So we should pass dma_dev to XSK. But how do we determine whether to
use DMA_OPS based on dma_dev?

At present, my API design, the caller should determine whether it is a Virtio
device. If we also need the caller to determine whether to use DMA_OPS, this is
too unfriendly for the caller.

Thanks.


>
> Thanks
>
> >
> >
> > >
> > > (Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?)
> > >
> > > >
> > > > Added virtio_dma_unmap() for unmap DMA address.
> > > >
> > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > ---
> > > >  drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
> > > >  include/linux/virtio.h       |  9 ++++
> > > >  2 files changed, 101 insertions(+)
> > > >
> > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > index cd9364eb2345..855338609c7f 100644
> > > > --- a/drivers/virtio/virtio_ring.c
> > > > +++ b/drivers/virtio/virtio_ring.c
> > > > @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> > > >
> > > > +/**
> > > > + * virtio_dma_map_page - get the DMA addr of the memory for virtio device
> > > > + * @dev: virtio device
> > > > + * @page: the page of the memory to DMA
> > > > + * @offset: the offset of the memory inside page
> > > > + * @length: memory length
> > > > + * @dir: DMA direction
> > > > + *
> > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > + * core handles DMA API internally.
> > > > + *
> > > > + * Returns the DMA addr. DMA_MAPPING_ERROR means error.
> > > > + */
> > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > +                              unsigned int length, enum dma_data_direction dir)
> > > > +{
> > >
> > > This (and the reset) needs to be done per virtqueue instead per device
> > > after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per
> > > virtqueue dma device").
> >
> >
> > YES.
> >
> >
> > >
> > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > +
> > > > +       if (!vring_use_dma_api(vdev))
> > > > +               return page_to_phys(page) + offset;
> > > > +
> > > > +       return dma_map_page(vdev->dev.parent, page, offset, length, dir);
> > > > +}
> > >
> > > Need either inline or EXPORT_SYMBOL_GPL() here.
> >
> > Because I did not use this interface, I did not  export it.
> >
> > Thanks.
> >
> >
> > >
> > > Thanks
> > >
> > >
> > > > +
> > > > +/**
> > > > + * virtio_dma_map - get the DMA addr of the memory for virtio device
> > > > + * @dev: virtio device
> > > > + * @addr: the addr to DMA
> > > > + * @length: memory length
> > > > + * @dir: DMA direction
> > > > + *
> > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > + * core handles DMA API internally.
> > > > + *
> > > > + * Returns the DMA addr.
> > > > + */
> > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > +                         enum dma_data_direction dir)
> > > > +{
> > > > +       struct page *page;
> > > > +       size_t offset;
> > > > +
> > > > +       page = virt_to_page(addr);
> > > > +       offset = offset_in_page(addr);
> > > > +
> > > > +       return virtio_dma_map_page(dev, page, offset, length, dir);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(virtio_dma_map);
> > > > +
> > > > +/**
> > > > + * virtio_dma_mapping_error - check dma address
> > > > + * @dev: virtio device
> > > > + * @addr: DMA address
> > > > + *
> > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > + * core handles DMA API internally.
> > > > + *
> > > > + * Returns 0 means dma valid. Other means invalid dma address.
> > > > + */
> > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
> > > > +{
> > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > +
> > > > +       if (!vring_use_dma_api(vdev))
> > > > +               return 0;
> > > > +
> > > > +       return dma_mapping_error(vdev->dev.parent, addr);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
> > > > +
> > > > +/**
> > > > + * virtio_dma_unmap - unmap DMA addr
> > > > + * @dev: virtio device
> > > > + * @dma: DMA address
> > > > + * @length: memory length
> > > > + * @dir: DMA direction
> > > > + *
> > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > + * core handles DMA API internally.
> > > > + */
> > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > +                     enum dma_data_direction dir)
> > > > +{
> > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > +
> > > > +       if (!vring_use_dma_api(vdev))
> > > > +               return;
> > > > +
> > > > +       dma_unmap_page(vdev->dev.parent, dma, length, dir);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> > > > +
> > > >  MODULE_LICENSE("GPL");
> > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > index 3ebb346ebb7c..b5fa71476737 100644
> > > > --- a/include/linux/virtio.h
> > > > +++ b/include/linux/virtio.h
> > > > @@ -9,6 +9,7 @@
> > > >  #include <linux/device.h>
> > > >  #include <linux/mod_devicetable.h>
> > > >  #include <linux/gfp.h>
> > > > +#include <linux/dma-mapping.h>
> > > >
> > > >  /**
> > > >   * struct virtqueue - a queue to register buffers for sending or receiving.
> > > > @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
> > > >  #define module_virtio_driver(__virtio_driver) \
> > > >         module_driver(__virtio_driver, register_virtio_driver, \
> > > >                         unregister_virtio_driver)
> > > > +
> > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > +                              unsigned int length, enum dma_data_direction dir);
> > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > +                         enum dma_data_direction dir);
> > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > +                     enum dma_data_direction dir);
> > > >  #endif /* _LINUX_VIRTIO_H */
> > > > --
> > > > 2.32.0.3.g01195cf9f
> > > >
> > >
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-02-28 11:15         ` Xuan Zhuo
@ 2023-03-02  2:04           ` Xuan Zhuo
  2023-03-02  3:05             ` Jason Wang
  0 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-03-02  2:04 UTC (permalink / raw)
  To: Jason Wang
  Cc: maciej.fijalkowski, Michael S. Tsirkin, virtualization, bjorn,
	jonathan.lemon, magnus.karlsson

On Tue, 28 Feb 2023 19:15:23 +0800, Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> On Tue, 21 Feb 2023 09:51:07 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Mon, Feb 20, 2023 at 3:02 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Mon, 20 Feb 2023 13:38:24 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > Added virtio_dma_map() to map DMA addresses for virtual memory in
> > > > > advance. The purpose is to keep memory mapped across multiple add/get
> > > > > buf operations.
> > > >
> > > > I wonder if instead of exporting helpers like this, it might be simple
> > > > to just export dma_dev then the upper layer can use DMA API at will?
> > >
> > >
> > > The reason for not doing this, Virtio is not just using DMA_DEV to mapp, but
> > > also check whether DMA is used.
> >
> > We should let the DMA API decide by exporting a correct dma_dev. E.g
> > when ACCESS_PLATFORM is not negotiated, advertising a DMA dev without
> > dma_ops.
>
>
> Do you mean we provide this API?
>
> virtio_get_dma_dev()
>
> If it returns NULL, the caller will use the physical memory address directly. If
> this func return a dma_dev, the caller should use DMA API.


cc the XDP_SOCKET's maintainers.

First of all, Jason does not want to encapsulate the API of DMA by Virtio. It is
best to pass DMA Device to XSK, XSK uses DMA API for DMA MAP operation directly.
I agree with this idea.

However, there are several problems under Virtio:
1. In some virtualization scenarios, we do not have to perform DMA operations,
   just use the physical address directly.
2. The latest Virtio Core supports each rx/tx queue with one DMA device.
   Generally, the physical network card has only one device. All queues use
   it for DMA operation.

So I consider this problem again, Virtio Core provides only one API.

* virtio_get_dma_dev(queue)

If the return value is NULL, it means that there is no DMA operation. If it is
not NULL, use DMA API for DMA operation.

The modification of XSK is like this. We may pass NULL as dev to xp_dma_map().
If dev is NULL, then there is no need to perform DMA and Sync operations.
Otherwise, it will perform DMA operations like other devices.

And if the dma_dev of rx and tx is different, then we can only disable
XDP_SOCKET.

Looking forward to everyone's reply.

Thanks.

>
> Thanks.
>
>
> >
> > Thanks
> >
> > >
> > >
> > > >
> > > > (Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?)
> > > >
> > > > >
> > > > > Added virtio_dma_unmap() for unmap DMA address.
> > > > >
> > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > ---
> > > > >  drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
> > > > >  include/linux/virtio.h       |  9 ++++
> > > > >  2 files changed, 101 insertions(+)
> > > > >
> > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > index cd9364eb2345..855338609c7f 100644
> > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > > > >  }
> > > > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> > > > >
> > > > > +/**
> > > > > + * virtio_dma_map_page - get the DMA addr of the memory for virtio device
> > > > > + * @dev: virtio device
> > > > > + * @page: the page of the memory to DMA
> > > > > + * @offset: the offset of the memory inside page
> > > > > + * @length: memory length
> > > > > + * @dir: DMA direction
> > > > > + *
> > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > + * core handles DMA API internally.
> > > > > + *
> > > > > + * Returns the DMA addr. DMA_MAPPING_ERROR means error.
> > > > > + */
> > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > +                              unsigned int length, enum dma_data_direction dir)
> > > > > +{
> > > >
> > > > This (and the reset) needs to be done per virtqueue instead per device
> > > > after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per
> > > > virtqueue dma device").
> > >
> > >
> > > YES.
> > >
> > >
> > > >
> > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > +
> > > > > +       if (!vring_use_dma_api(vdev))
> > > > > +               return page_to_phys(page) + offset;
> > > > > +
> > > > > +       return dma_map_page(vdev->dev.parent, page, offset, length, dir);
> > > > > +}
> > > >
> > > > Need either inline or EXPORT_SYMBOL_GPL() here.
> > >
> > > Because I did not use this interface, I did not  export it.
> > >
> > > Thanks.
> > >
> > >
> > > >
> > > > Thanks
> > > >
> > > >
> > > > > +
> > > > > +/**
> > > > > + * virtio_dma_map - get the DMA addr of the memory for virtio device
> > > > > + * @dev: virtio device
> > > > > + * @addr: the addr to DMA
> > > > > + * @length: memory length
> > > > > + * @dir: DMA direction
> > > > > + *
> > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > + * core handles DMA API internally.
> > > > > + *
> > > > > + * Returns the DMA addr.
> > > > > + */
> > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > +                         enum dma_data_direction dir)
> > > > > +{
> > > > > +       struct page *page;
> > > > > +       size_t offset;
> > > > > +
> > > > > +       page = virt_to_page(addr);
> > > > > +       offset = offset_in_page(addr);
> > > > > +
> > > > > +       return virtio_dma_map_page(dev, page, offset, length, dir);
> > > > > +}
> > > > > +EXPORT_SYMBOL_GPL(virtio_dma_map);
> > > > > +
> > > > > +/**
> > > > > + * virtio_dma_mapping_error - check dma address
> > > > > + * @dev: virtio device
> > > > > + * @addr: DMA address
> > > > > + *
> > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > + * core handles DMA API internally.
> > > > > + *
> > > > > + * Returns 0 means dma valid. Other means invalid dma address.
> > > > > + */
> > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
> > > > > +{
> > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > +
> > > > > +       if (!vring_use_dma_api(vdev))
> > > > > +               return 0;
> > > > > +
> > > > > +       return dma_mapping_error(vdev->dev.parent, addr);
> > > > > +}
> > > > > +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
> > > > > +
> > > > > +/**
> > > > > + * virtio_dma_unmap - unmap DMA addr
> > > > > + * @dev: virtio device
> > > > > + * @dma: DMA address
> > > > > + * @length: memory length
> > > > > + * @dir: DMA direction
> > > > > + *
> > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > + * core handles DMA API internally.
> > > > > + */
> > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > +                     enum dma_data_direction dir)
> > > > > +{
> > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > +
> > > > > +       if (!vring_use_dma_api(vdev))
> > > > > +               return;
> > > > > +
> > > > > +       dma_unmap_page(vdev->dev.parent, dma, length, dir);
> > > > > +}
> > > > > +EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> > > > > +
> > > > >  MODULE_LICENSE("GPL");
> > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > index 3ebb346ebb7c..b5fa71476737 100644
> > > > > --- a/include/linux/virtio.h
> > > > > +++ b/include/linux/virtio.h
> > > > > @@ -9,6 +9,7 @@
> > > > >  #include <linux/device.h>
> > > > >  #include <linux/mod_devicetable.h>
> > > > >  #include <linux/gfp.h>
> > > > > +#include <linux/dma-mapping.h>
> > > > >
> > > > >  /**
> > > > >   * struct virtqueue - a queue to register buffers for sending or receiving.
> > > > > @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
> > > > >  #define module_virtio_driver(__virtio_driver) \
> > > > >         module_driver(__virtio_driver, register_virtio_driver, \
> > > > >                         unregister_virtio_driver)
> > > > > +
> > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > +                              unsigned int length, enum dma_data_direction dir);
> > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > +                         enum dma_data_direction dir);
> > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > +                     enum dma_data_direction dir);
> > > > >  #endif /* _LINUX_VIRTIO_H */
> > > > > --
> > > > > 2.32.0.3.g01195cf9f
> > > > >
> > > >
> > >
> >
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-03-02  2:04           ` Xuan Zhuo
@ 2023-03-02  3:05             ` Jason Wang
  2023-03-02  3:21               ` Xuan Zhuo
  0 siblings, 1 reply; 46+ messages in thread
From: Jason Wang @ 2023-03-02  3:05 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: maciej.fijalkowski, Michael S. Tsirkin, virtualization, bjorn,
	jonathan.lemon, magnus.karlsson

On Thu, Mar 2, 2023 at 10:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Tue, 28 Feb 2023 19:15:23 +0800, Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > On Tue, 21 Feb 2023 09:51:07 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Mon, Feb 20, 2023 at 3:02 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Mon, 20 Feb 2023 13:38:24 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > Added virtio_dma_map() to map DMA addresses for virtual memory in
> > > > > > advance. The purpose is to keep memory mapped across multiple add/get
> > > > > > buf operations.
> > > > >
> > > > > I wonder if instead of exporting helpers like this, it might be simple
> > > > > to just export dma_dev then the upper layer can use DMA API at will?
> > > >
> > > >
> > > > The reason for not doing this, Virtio is not just using DMA_DEV to mapp, but
> > > > also check whether DMA is used.
> > >
> > > We should let the DMA API decide by exporting a correct dma_dev. E.g
> > > when ACCESS_PLATFORM is not negotiated, advertising a DMA dev without
> > > dma_ops.
> >
> >
> > Do you mean we provide this API?
> >
> > virtio_get_dma_dev()
> >
> > If it returns NULL, the caller will use the physical memory address directly. If
> > this func return a dma_dev, the caller should use DMA API.
>
>
> cc the XDP_SOCKET's maintainers.
>
> First of all, Jason does not want to encapsulate the API of DMA by Virtio. It is
> best to pass DMA Device to XSK, XSK uses DMA API for DMA MAP operation directly.
> I agree with this idea.
>
> However, there are several problems under Virtio:
> 1. In some virtualization scenarios, we do not have to perform DMA operations,
>    just use the physical address directly.

This is not a problem, we can simply return the virtio device itself
as the DMA device in this case. Since there's no DMA ops attached, DMA
API will use physical address in this case.

> 2. The latest Virtio Core supports each rx/tx queue with one DMA device.
>    Generally, the physical network card has only one device. All queues use
>    it for DMA operation.

I'm not sure this is a big deal, we just need to use the per virtqueue
dma device to use DMA API.

>
> So I consider this problem again, Virtio Core provides only one API.
>
> * virtio_get_dma_dev(queue)
>
> If the return value is NULL, it means that there is no DMA operation. If it is
> not NULL, use DMA API for DMA operation.
>
> The modification of XSK is like this. We may pass NULL as dev to xp_dma_map().
> If dev is NULL, then there is no need to perform DMA and Sync operations.
> Otherwise, it will perform DMA operations like other devices.

As discussed above, it might be easier:

    if (!virtio_has_feature(VIRTIO_F_ACCESS_PLATFORM))
        return virtio_device;
    else
        return vring_dma_dev();

>
> And if the dma_dev of rx and tx is different, then we can only disable
> XDP_SOCKET.

We can start with this.

Thanks

>
> Looking forward to everyone's reply.
>
> Thanks.
>
> >
> > Thanks.
> >
> >
> > >
> > > Thanks
> > >
> > > >
> > > >
> > > > >
> > > > > (Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?)
> > > > >
> > > > > >
> > > > > > Added virtio_dma_unmap() for unmap DMA address.
> > > > > >
> > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > ---
> > > > > >  drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
> > > > > >  include/linux/virtio.h       |  9 ++++
> > > > > >  2 files changed, 101 insertions(+)
> > > > > >
> > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > index cd9364eb2345..855338609c7f 100644
> > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > > > > >  }
> > > > > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> > > > > >
> > > > > > +/**
> > > > > > + * virtio_dma_map_page - get the DMA addr of the memory for virtio device
> > > > > > + * @dev: virtio device
> > > > > > + * @page: the page of the memory to DMA
> > > > > > + * @offset: the offset of the memory inside page
> > > > > > + * @length: memory length
> > > > > > + * @dir: DMA direction
> > > > > > + *
> > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > + * core handles DMA API internally.
> > > > > > + *
> > > > > > + * Returns the DMA addr. DMA_MAPPING_ERROR means error.
> > > > > > + */
> > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > +                              unsigned int length, enum dma_data_direction dir)
> > > > > > +{
> > > > >
> > > > > This (and the reset) needs to be done per virtqueue instead per device
> > > > > after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per
> > > > > virtqueue dma device").
> > > >
> > > >
> > > > YES.
> > > >
> > > >
> > > > >
> > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > +
> > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > +               return page_to_phys(page) + offset;
> > > > > > +
> > > > > > +       return dma_map_page(vdev->dev.parent, page, offset, length, dir);
> > > > > > +}
> > > > >
> > > > > Need either inline or EXPORT_SYMBOL_GPL() here.
> > > >
> > > > Because I did not use this interface, I did not  export it.
> > > >
> > > > Thanks.
> > > >
> > > >
> > > > >
> > > > > Thanks
> > > > >
> > > > >
> > > > > > +
> > > > > > +/**
> > > > > > + * virtio_dma_map - get the DMA addr of the memory for virtio device
> > > > > > + * @dev: virtio device
> > > > > > + * @addr: the addr to DMA
> > > > > > + * @length: memory length
> > > > > > + * @dir: DMA direction
> > > > > > + *
> > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > + * core handles DMA API internally.
> > > > > > + *
> > > > > > + * Returns the DMA addr.
> > > > > > + */
> > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > +                         enum dma_data_direction dir)
> > > > > > +{
> > > > > > +       struct page *page;
> > > > > > +       size_t offset;
> > > > > > +
> > > > > > +       page = virt_to_page(addr);
> > > > > > +       offset = offset_in_page(addr);
> > > > > > +
> > > > > > +       return virtio_dma_map_page(dev, page, offset, length, dir);
> > > > > > +}
> > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_map);
> > > > > > +
> > > > > > +/**
> > > > > > + * virtio_dma_mapping_error - check dma address
> > > > > > + * @dev: virtio device
> > > > > > + * @addr: DMA address
> > > > > > + *
> > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > + * core handles DMA API internally.
> > > > > > + *
> > > > > > + * Returns 0 means dma valid. Other means invalid dma address.
> > > > > > + */
> > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
> > > > > > +{
> > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > +
> > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > +               return 0;
> > > > > > +
> > > > > > +       return dma_mapping_error(vdev->dev.parent, addr);
> > > > > > +}
> > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
> > > > > > +
> > > > > > +/**
> > > > > > + * virtio_dma_unmap - unmap DMA addr
> > > > > > + * @dev: virtio device
> > > > > > + * @dma: DMA address
> > > > > > + * @length: memory length
> > > > > > + * @dir: DMA direction
> > > > > > + *
> > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > + * core handles DMA API internally.
> > > > > > + */
> > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > +                     enum dma_data_direction dir)
> > > > > > +{
> > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > +
> > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > +               return;
> > > > > > +
> > > > > > +       dma_unmap_page(vdev->dev.parent, dma, length, dir);
> > > > > > +}
> > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> > > > > > +
> > > > > >  MODULE_LICENSE("GPL");
> > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > index 3ebb346ebb7c..b5fa71476737 100644
> > > > > > --- a/include/linux/virtio.h
> > > > > > +++ b/include/linux/virtio.h
> > > > > > @@ -9,6 +9,7 @@
> > > > > >  #include <linux/device.h>
> > > > > >  #include <linux/mod_devicetable.h>
> > > > > >  #include <linux/gfp.h>
> > > > > > +#include <linux/dma-mapping.h>
> > > > > >
> > > > > >  /**
> > > > > >   * struct virtqueue - a queue to register buffers for sending or receiving.
> > > > > > @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
> > > > > >  #define module_virtio_driver(__virtio_driver) \
> > > > > >         module_driver(__virtio_driver, register_virtio_driver, \
> > > > > >                         unregister_virtio_driver)
> > > > > > +
> > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > +                              unsigned int length, enum dma_data_direction dir);
> > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > +                         enum dma_data_direction dir);
> > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > +                     enum dma_data_direction dir);
> > > > > >  #endif /* _LINUX_VIRTIO_H */
> > > > > > --
> > > > > > 2.32.0.3.g01195cf9f
> > > > > >
> > > > >
> > > >
> > >
> > _______________________________________________
> > Virtualization mailing list
> > Virtualization@lists.linux-foundation.org
> > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-03-02  3:05             ` Jason Wang
@ 2023-03-02  3:21               ` Xuan Zhuo
  2023-03-02  3:26                 ` Jason Wang
  0 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-03-02  3:21 UTC (permalink / raw)
  To: Jason Wang
  Cc: maciej.fijalkowski, Michael S. Tsirkin, virtualization, bjorn,
	jonathan.lemon, magnus.karlsson

On Thu, 2 Mar 2023 11:05:26 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Thu, Mar 2, 2023 at 10:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Tue, 28 Feb 2023 19:15:23 +0800, Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > On Tue, 21 Feb 2023 09:51:07 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > On Mon, Feb 20, 2023 at 3:02 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > On Mon, 20 Feb 2023 13:38:24 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > >
> > > > > > > Added virtio_dma_map() to map DMA addresses for virtual memory in
> > > > > > > advance. The purpose is to keep memory mapped across multiple add/get
> > > > > > > buf operations.
> > > > > >
> > > > > > I wonder if instead of exporting helpers like this, it might be simple
> > > > > > to just export dma_dev then the upper layer can use DMA API at will?
> > > > >
> > > > >
> > > > > The reason for not doing this, Virtio is not just using DMA_DEV to mapp, but
> > > > > also check whether DMA is used.
> > > >
> > > > We should let the DMA API decide by exporting a correct dma_dev. E.g
> > > > when ACCESS_PLATFORM is not negotiated, advertising a DMA dev without
> > > > dma_ops.
> > >
> > >
> > > Do you mean we provide this API?
> > >
> > > virtio_get_dma_dev()
> > >
> > > If it returns NULL, the caller will use the physical memory address directly. If
> > > this func return a dma_dev, the caller should use DMA API.
> >
> >
> > cc the XDP_SOCKET's maintainers.
> >
> > First of all, Jason does not want to encapsulate the API of DMA by Virtio. It is
> > best to pass DMA Device to XSK, XSK uses DMA API for DMA MAP operation directly.
> > I agree with this idea.
> >
> > However, there are several problems under Virtio:
> > 1. In some virtualization scenarios, we do not have to perform DMA operations,
> >    just use the physical address directly.
>
> This is not a problem, we can simply return the virtio device itself
> as the DMA device in this case. Since there's no DMA ops attached, DMA
> API will use physical address in this case.

Is this like this? So why do we have to deal with it in Virtio Ring? Let me
learn it.


>
> > 2. The latest Virtio Core supports each rx/tx queue with one DMA device.
> >    Generally, the physical network card has only one device. All queues use
> >    it for DMA operation.
>
> I'm not sure this is a big deal, we just need to use the per virtqueue
> dma device to use DMA API.

Yes.


>
> >
> > So I consider this problem again, Virtio Core provides only one API.
> >
> > * virtio_get_dma_dev(queue)
> >
> > If the return value is NULL, it means that there is no DMA operation. If it is
> > not NULL, use DMA API for DMA operation.
> >
> > The modification of XSK is like this. We may pass NULL as dev to xp_dma_map().
> > If dev is NULL, then there is no need to perform DMA and Sync operations.
> > Otherwise, it will perform DMA operations like other devices.
>
> As discussed above, it might be easier:
>
>     if (!virtio_has_feature(VIRTIO_F_ACCESS_PLATFORM))
>         return virtio_device;
>     else
>         return vring_dma_dev();

Yes, according to Jason's opinion, then XSK not need to do any modifications.

Thanks.

>
> >
> > And if the dma_dev of rx and tx is different, then we can only disable
> > XDP_SOCKET.
>
> We can start with this.
>
> Thanks
>
> >
> > Looking forward to everyone's reply.
> >
> > Thanks.
> >
> > >
> > > Thanks.
> > >
> > >
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > >
> > > > > >
> > > > > > (Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?)
> > > > > >
> > > > > > >
> > > > > > > Added virtio_dma_unmap() for unmap DMA address.
> > > > > > >
> > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > ---
> > > > > > >  drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
> > > > > > >  include/linux/virtio.h       |  9 ++++
> > > > > > >  2 files changed, 101 insertions(+)
> > > > > > >
> > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > index cd9364eb2345..855338609c7f 100644
> > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > > > > > >  }
> > > > > > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> > > > > > >
> > > > > > > +/**
> > > > > > > + * virtio_dma_map_page - get the DMA addr of the memory for virtio device
> > > > > > > + * @dev: virtio device
> > > > > > > + * @page: the page of the memory to DMA
> > > > > > > + * @offset: the offset of the memory inside page
> > > > > > > + * @length: memory length
> > > > > > > + * @dir: DMA direction
> > > > > > > + *
> > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > + * core handles DMA API internally.
> > > > > > > + *
> > > > > > > + * Returns the DMA addr. DMA_MAPPING_ERROR means error.
> > > > > > > + */
> > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > +                              unsigned int length, enum dma_data_direction dir)
> > > > > > > +{
> > > > > >
> > > > > > This (and the reset) needs to be done per virtqueue instead per device
> > > > > > after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per
> > > > > > virtqueue dma device").
> > > > >
> > > > >
> > > > > YES.
> > > > >
> > > > >
> > > > > >
> > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > +
> > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > +               return page_to_phys(page) + offset;
> > > > > > > +
> > > > > > > +       return dma_map_page(vdev->dev.parent, page, offset, length, dir);
> > > > > > > +}
> > > > > >
> > > > > > Need either inline or EXPORT_SYMBOL_GPL() here.
> > > > >
> > > > > Because I did not use this interface, I did not  export it.
> > > > >
> > > > > Thanks.
> > > > >
> > > > >
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > > >
> > > > > > > +
> > > > > > > +/**
> > > > > > > + * virtio_dma_map - get the DMA addr of the memory for virtio device
> > > > > > > + * @dev: virtio device
> > > > > > > + * @addr: the addr to DMA
> > > > > > > + * @length: memory length
> > > > > > > + * @dir: DMA direction
> > > > > > > + *
> > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > + * core handles DMA API internally.
> > > > > > > + *
> > > > > > > + * Returns the DMA addr.
> > > > > > > + */
> > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > +                         enum dma_data_direction dir)
> > > > > > > +{
> > > > > > > +       struct page *page;
> > > > > > > +       size_t offset;
> > > > > > > +
> > > > > > > +       page = virt_to_page(addr);
> > > > > > > +       offset = offset_in_page(addr);
> > > > > > > +
> > > > > > > +       return virtio_dma_map_page(dev, page, offset, length, dir);
> > > > > > > +}
> > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_map);
> > > > > > > +
> > > > > > > +/**
> > > > > > > + * virtio_dma_mapping_error - check dma address
> > > > > > > + * @dev: virtio device
> > > > > > > + * @addr: DMA address
> > > > > > > + *
> > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > + * core handles DMA API internally.
> > > > > > > + *
> > > > > > > + * Returns 0 means dma valid. Other means invalid dma address.
> > > > > > > + */
> > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
> > > > > > > +{
> > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > +
> > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > +               return 0;
> > > > > > > +
> > > > > > > +       return dma_mapping_error(vdev->dev.parent, addr);
> > > > > > > +}
> > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
> > > > > > > +
> > > > > > > +/**
> > > > > > > + * virtio_dma_unmap - unmap DMA addr
> > > > > > > + * @dev: virtio device
> > > > > > > + * @dma: DMA address
> > > > > > > + * @length: memory length
> > > > > > > + * @dir: DMA direction
> > > > > > > + *
> > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > + * core handles DMA API internally.
> > > > > > > + */
> > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > +                     enum dma_data_direction dir)
> > > > > > > +{
> > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > +
> > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > +               return;
> > > > > > > +
> > > > > > > +       dma_unmap_page(vdev->dev.parent, dma, length, dir);
> > > > > > > +}
> > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> > > > > > > +
> > > > > > >  MODULE_LICENSE("GPL");
> > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > index 3ebb346ebb7c..b5fa71476737 100644
> > > > > > > --- a/include/linux/virtio.h
> > > > > > > +++ b/include/linux/virtio.h
> > > > > > > @@ -9,6 +9,7 @@
> > > > > > >  #include <linux/device.h>
> > > > > > >  #include <linux/mod_devicetable.h>
> > > > > > >  #include <linux/gfp.h>
> > > > > > > +#include <linux/dma-mapping.h>
> > > > > > >
> > > > > > >  /**
> > > > > > >   * struct virtqueue - a queue to register buffers for sending or receiving.
> > > > > > > @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
> > > > > > >  #define module_virtio_driver(__virtio_driver) \
> > > > > > >         module_driver(__virtio_driver, register_virtio_driver, \
> > > > > > >                         unregister_virtio_driver)
> > > > > > > +
> > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > +                              unsigned int length, enum dma_data_direction dir);
> > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > +                         enum dma_data_direction dir);
> > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > +                     enum dma_data_direction dir);
> > > > > > >  #endif /* _LINUX_VIRTIO_H */
> > > > > > > --
> > > > > > > 2.32.0.3.g01195cf9f
> > > > > > >
> > > > > >
> > > > >
> > > >
> > > _______________________________________________
> > > Virtualization mailing list
> > > Virtualization@lists.linux-foundation.org
> > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-03-02  3:21               ` Xuan Zhuo
@ 2023-03-02  3:26                 ` Jason Wang
  2023-03-02  6:09                   ` Michael S. Tsirkin
  0 siblings, 1 reply; 46+ messages in thread
From: Jason Wang @ 2023-03-02  3:26 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: maciej.fijalkowski, Michael S. Tsirkin, virtualization, bjorn,
	jonathan.lemon, magnus.karlsson

On Thu, Mar 2, 2023 at 11:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Thu, 2 Mar 2023 11:05:26 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Thu, Mar 2, 2023 at 10:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Tue, 28 Feb 2023 19:15:23 +0800, Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > On Tue, 21 Feb 2023 09:51:07 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > On Mon, Feb 20, 2023 at 3:02 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > On Mon, 20 Feb 2023 13:38:24 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > >
> > > > > > > > Added virtio_dma_map() to map DMA addresses for virtual memory in
> > > > > > > > advance. The purpose is to keep memory mapped across multiple add/get
> > > > > > > > buf operations.
> > > > > > >
> > > > > > > I wonder if instead of exporting helpers like this, it might be simple
> > > > > > > to just export dma_dev then the upper layer can use DMA API at will?
> > > > > >
> > > > > >
> > > > > > The reason for not doing this, Virtio is not just using DMA_DEV to mapp, but
> > > > > > also check whether DMA is used.
> > > > >
> > > > > We should let the DMA API decide by exporting a correct dma_dev. E.g
> > > > > when ACCESS_PLATFORM is not negotiated, advertising a DMA dev without
> > > > > dma_ops.
> > > >
> > > >
> > > > Do you mean we provide this API?
> > > >
> > > > virtio_get_dma_dev()
> > > >
> > > > If it returns NULL, the caller will use the physical memory address directly. If
> > > > this func return a dma_dev, the caller should use DMA API.
> > >
> > >
> > > cc the XDP_SOCKET's maintainers.
> > >
> > > First of all, Jason does not want to encapsulate the API of DMA by Virtio. It is
> > > best to pass DMA Device to XSK, XSK uses DMA API for DMA MAP operation directly.
> > > I agree with this idea.
> > >
> > > However, there are several problems under Virtio:
> > > 1. In some virtualization scenarios, we do not have to perform DMA operations,
> > >    just use the physical address directly.
> >
> > This is not a problem, we can simply return the virtio device itself
> > as the DMA device in this case. Since there's no DMA ops attached, DMA
> > API will use physical address in this case.
>
> Is this like this? So why do we have to deal with it in Virtio Ring? Let me
> learn it.

It has a long debate and I can't recall too many details. (You can
search the archives). Michael may show more thoughts here.

One concern is the overhead of the DMA API that needs to be benchmarked.

Thanks

>
>
> >
> > > 2. The latest Virtio Core supports each rx/tx queue with one DMA device.
> > >    Generally, the physical network card has only one device. All queues use
> > >    it for DMA operation.
> >
> > I'm not sure this is a big deal, we just need to use the per virtqueue
> > dma device to use DMA API.
>
> Yes.
>
>
> >
> > >
> > > So I consider this problem again, Virtio Core provides only one API.
> > >
> > > * virtio_get_dma_dev(queue)
> > >
> > > If the return value is NULL, it means that there is no DMA operation. If it is
> > > not NULL, use DMA API for DMA operation.
> > >
> > > The modification of XSK is like this. We may pass NULL as dev to xp_dma_map().
> > > If dev is NULL, then there is no need to perform DMA and Sync operations.
> > > Otherwise, it will perform DMA operations like other devices.
> >
> > As discussed above, it might be easier:
> >
> >     if (!virtio_has_feature(VIRTIO_F_ACCESS_PLATFORM))
> >         return virtio_device;
> >     else
> >         return vring_dma_dev();
>
> Yes, according to Jason's opinion, then XSK not need to do any modifications.
>
> Thanks.
>
> >
> > >
> > > And if the dma_dev of rx and tx is different, then we can only disable
> > > XDP_SOCKET.
> >
> > We can start with this.
> >
> > Thanks
> >
> > >
> > > Looking forward to everyone's reply.
> > >
> > > Thanks.
> > >
> > > >
> > > > Thanks.
> > > >
> > > >
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > (Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?)
> > > > > > >
> > > > > > > >
> > > > > > > > Added virtio_dma_unmap() for unmap DMA address.
> > > > > > > >
> > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > ---
> > > > > > > >  drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
> > > > > > > >  include/linux/virtio.h       |  9 ++++
> > > > > > > >  2 files changed, 101 insertions(+)
> > > > > > > >
> > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > index cd9364eb2345..855338609c7f 100644
> > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > > > > > > >  }
> > > > > > > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> > > > > > > >
> > > > > > > > +/**
> > > > > > > > + * virtio_dma_map_page - get the DMA addr of the memory for virtio device
> > > > > > > > + * @dev: virtio device
> > > > > > > > + * @page: the page of the memory to DMA
> > > > > > > > + * @offset: the offset of the memory inside page
> > > > > > > > + * @length: memory length
> > > > > > > > + * @dir: DMA direction
> > > > > > > > + *
> > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > + * core handles DMA API internally.
> > > > > > > > + *
> > > > > > > > + * Returns the DMA addr. DMA_MAPPING_ERROR means error.
> > > > > > > > + */
> > > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > > +                              unsigned int length, enum dma_data_direction dir)
> > > > > > > > +{
> > > > > > >
> > > > > > > This (and the reset) needs to be done per virtqueue instead per device
> > > > > > > after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per
> > > > > > > virtqueue dma device").
> > > > > >
> > > > > >
> > > > > > YES.
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > +
> > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > +               return page_to_phys(page) + offset;
> > > > > > > > +
> > > > > > > > +       return dma_map_page(vdev->dev.parent, page, offset, length, dir);
> > > > > > > > +}
> > > > > > >
> > > > > > > Need either inline or EXPORT_SYMBOL_GPL() here.
> > > > > >
> > > > > > Because I did not use this interface, I did not  export it.
> > > > > >
> > > > > > Thanks.
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > >
> > > > > > > > +
> > > > > > > > +/**
> > > > > > > > + * virtio_dma_map - get the DMA addr of the memory for virtio device
> > > > > > > > + * @dev: virtio device
> > > > > > > > + * @addr: the addr to DMA
> > > > > > > > + * @length: memory length
> > > > > > > > + * @dir: DMA direction
> > > > > > > > + *
> > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > + * core handles DMA API internally.
> > > > > > > > + *
> > > > > > > > + * Returns the DMA addr.
> > > > > > > > + */
> > > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > > +                         enum dma_data_direction dir)
> > > > > > > > +{
> > > > > > > > +       struct page *page;
> > > > > > > > +       size_t offset;
> > > > > > > > +
> > > > > > > > +       page = virt_to_page(addr);
> > > > > > > > +       offset = offset_in_page(addr);
> > > > > > > > +
> > > > > > > > +       return virtio_dma_map_page(dev, page, offset, length, dir);
> > > > > > > > +}
> > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_map);
> > > > > > > > +
> > > > > > > > +/**
> > > > > > > > + * virtio_dma_mapping_error - check dma address
> > > > > > > > + * @dev: virtio device
> > > > > > > > + * @addr: DMA address
> > > > > > > > + *
> > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > + * core handles DMA API internally.
> > > > > > > > + *
> > > > > > > > + * Returns 0 means dma valid. Other means invalid dma address.
> > > > > > > > + */
> > > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
> > > > > > > > +{
> > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > +
> > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > +               return 0;
> > > > > > > > +
> > > > > > > > +       return dma_mapping_error(vdev->dev.parent, addr);
> > > > > > > > +}
> > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
> > > > > > > > +
> > > > > > > > +/**
> > > > > > > > + * virtio_dma_unmap - unmap DMA addr
> > > > > > > > + * @dev: virtio device
> > > > > > > > + * @dma: DMA address
> > > > > > > > + * @length: memory length
> > > > > > > > + * @dir: DMA direction
> > > > > > > > + *
> > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > + * core handles DMA API internally.
> > > > > > > > + */
> > > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > > +                     enum dma_data_direction dir)
> > > > > > > > +{
> > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > +
> > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > +               return;
> > > > > > > > +
> > > > > > > > +       dma_unmap_page(vdev->dev.parent, dma, length, dir);
> > > > > > > > +}
> > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> > > > > > > > +
> > > > > > > >  MODULE_LICENSE("GPL");
> > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > index 3ebb346ebb7c..b5fa71476737 100644
> > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > @@ -9,6 +9,7 @@
> > > > > > > >  #include <linux/device.h>
> > > > > > > >  #include <linux/mod_devicetable.h>
> > > > > > > >  #include <linux/gfp.h>
> > > > > > > > +#include <linux/dma-mapping.h>
> > > > > > > >
> > > > > > > >  /**
> > > > > > > >   * struct virtqueue - a queue to register buffers for sending or receiving.
> > > > > > > > @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
> > > > > > > >  #define module_virtio_driver(__virtio_driver) \
> > > > > > > >         module_driver(__virtio_driver, register_virtio_driver, \
> > > > > > > >                         unregister_virtio_driver)
> > > > > > > > +
> > > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > > +                              unsigned int length, enum dma_data_direction dir);
> > > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > > +                         enum dma_data_direction dir);
> > > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> > > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > > +                     enum dma_data_direction dir);
> > > > > > > >  #endif /* _LINUX_VIRTIO_H */
> > > > > > > > --
> > > > > > > > 2.32.0.3.g01195cf9f
> > > > > > > >
> > > > > > >
> > > > > >
> > > > >
> > > > _______________________________________________
> > > > Virtualization mailing list
> > > > Virtualization@lists.linux-foundation.org
> > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > >
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-03-02  3:26                 ` Jason Wang
@ 2023-03-02  6:09                   ` Michael S. Tsirkin
  2023-03-02  6:43                     ` Xuan Zhuo
  2023-03-02  6:56                     ` Jason Wang
  0 siblings, 2 replies; 46+ messages in thread
From: Michael S. Tsirkin @ 2023-03-02  6:09 UTC (permalink / raw)
  To: Jason Wang
  Cc: maciej.fijalkowski, virtualization, bjorn, jonathan.lemon,
	magnus.karlsson

On Thu, Mar 02, 2023 at 11:26:53AM +0800, Jason Wang wrote:
> On Thu, Mar 2, 2023 at 11:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Thu, 2 Mar 2023 11:05:26 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Thu, Mar 2, 2023 at 10:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Tue, 28 Feb 2023 19:15:23 +0800, Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > On Tue, 21 Feb 2023 09:51:07 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > On Mon, Feb 20, 2023 at 3:02 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > >
> > > > > > > On Mon, 20 Feb 2023 13:38:24 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > >
> > > > > > > > > Added virtio_dma_map() to map DMA addresses for virtual memory in
> > > > > > > > > advance. The purpose is to keep memory mapped across multiple add/get
> > > > > > > > > buf operations.
> > > > > > > >
> > > > > > > > I wonder if instead of exporting helpers like this, it might be simple
> > > > > > > > to just export dma_dev then the upper layer can use DMA API at will?
> > > > > > >
> > > > > > >
> > > > > > > The reason for not doing this, Virtio is not just using DMA_DEV to mapp, but
> > > > > > > also check whether DMA is used.
> > > > > >
> > > > > > We should let the DMA API decide by exporting a correct dma_dev. E.g
> > > > > > when ACCESS_PLATFORM is not negotiated, advertising a DMA dev without
> > > > > > dma_ops.
> > > > >
> > > > >
> > > > > Do you mean we provide this API?
> > > > >
> > > > > virtio_get_dma_dev()
> > > > >
> > > > > If it returns NULL, the caller will use the physical memory address directly. If
> > > > > this func return a dma_dev, the caller should use DMA API.
> > > >
> > > >
> > > > cc the XDP_SOCKET's maintainers.
> > > >
> > > > First of all, Jason does not want to encapsulate the API of DMA by Virtio. It is
> > > > best to pass DMA Device to XSK, XSK uses DMA API for DMA MAP operation directly.
> > > > I agree with this idea.
> > > >
> > > > However, there are several problems under Virtio:
> > > > 1. In some virtualization scenarios, we do not have to perform DMA operations,
> > > >    just use the physical address directly.
> > >
> > > This is not a problem, we can simply return the virtio device itself
> > > as the DMA device in this case. Since there's no DMA ops attached, DMA
> > > API will use physical address in this case.
> >
> > Is this like this? So why do we have to deal with it in Virtio Ring? Let me
> > learn it.
> 
> It has a long debate and I can't recall too many details. (You can
> search the archives). Michael may show more thoughts here.
> 
> One concern is the overhead of the DMA API that needs to be benchmarked.
> 
> Thanks

Concern with what? This patch does not change devices, they are using
the existing API. Xuan Zhuo already showed a benchmark result for AF_XDP.


> >
> >
> > >
> > > > 2. The latest Virtio Core supports each rx/tx queue with one DMA device.
> > > >    Generally, the physical network card has only one device. All queues use
> > > >    it for DMA operation.
> > >
> > > I'm not sure this is a big deal, we just need to use the per virtqueue
> > > dma device to use DMA API.
> >
> > Yes.
> >
> >
> > >
> > > >
> > > > So I consider this problem again, Virtio Core provides only one API.
> > > >
> > > > * virtio_get_dma_dev(queue)
> > > >
> > > > If the return value is NULL, it means that there is no DMA operation. If it is
> > > > not NULL, use DMA API for DMA operation.
> > > >
> > > > The modification of XSK is like this. We may pass NULL as dev to xp_dma_map().
> > > > If dev is NULL, then there is no need to perform DMA and Sync operations.
> > > > Otherwise, it will perform DMA operations like other devices.
> > >
> > > As discussed above, it might be easier:
> > >
> > >     if (!virtio_has_feature(VIRTIO_F_ACCESS_PLATFORM))
> > >         return virtio_device;
> > >     else
> > >         return vring_dma_dev();
> >
> > Yes, according to Jason's opinion, then XSK not need to do any modifications.
> >
> > Thanks.

Yes AF_XDP does not need the per VQ device hack.
We should probably rethink it.

But as far as implementation goes, poking at VIRTIO_F_ACCESS_PLATFORM
is wrong. Please use virtio_has_dma_quirk.



> > >
> > > >
> > > > And if the dma_dev of rx and tx is different, then we can only disable
> > > > XDP_SOCKET.
> > >
> > > We can start with this.
> > >
> > > Thanks
> > >
> > > >
> > > > Looking forward to everyone's reply.
> > > >
> > > > Thanks.
> > > >
> > > > >
> > > > > Thanks.
> > > > >
> > > > >
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > (Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?)
> > > > > > > >
> > > > > > > > >
> > > > > > > > > Added virtio_dma_unmap() for unmap DMA address.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > > ---
> > > > > > > > >  drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
> > > > > > > > >  include/linux/virtio.h       |  9 ++++
> > > > > > > > >  2 files changed, 101 insertions(+)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > index cd9364eb2345..855338609c7f 100644
> > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > > > > > > > >  }
> > > > > > > > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> > > > > > > > >
> > > > > > > > > +/**
> > > > > > > > > + * virtio_dma_map_page - get the DMA addr of the memory for virtio device
> > > > > > > > > + * @dev: virtio device
> > > > > > > > > + * @page: the page of the memory to DMA
> > > > > > > > > + * @offset: the offset of the memory inside page
> > > > > > > > > + * @length: memory length
> > > > > > > > > + * @dir: DMA direction
> > > > > > > > > + *
> > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > + *
> > > > > > > > > + * Returns the DMA addr. DMA_MAPPING_ERROR means error.
> > > > > > > > > + */
> > > > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > > > +                              unsigned int length, enum dma_data_direction dir)
> > > > > > > > > +{
> > > > > > > >
> > > > > > > > This (and the reset) needs to be done per virtqueue instead per device
> > > > > > > > after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per
> > > > > > > > virtqueue dma device").
> > > > > > >
> > > > > > >
> > > > > > > YES.
> > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > +
> > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > +               return page_to_phys(page) + offset;
> > > > > > > > > +
> > > > > > > > > +       return dma_map_page(vdev->dev.parent, page, offset, length, dir);
> > > > > > > > > +}
> > > > > > > >
> > > > > > > > Need either inline or EXPORT_SYMBOL_GPL() here.
> > > > > > >
> > > > > > > Because I did not use this interface, I did not  export it.
> > > > > > >
> > > > > > > Thanks.
> > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > Thanks
> > > > > > > >
> > > > > > > >
> > > > > > > > > +
> > > > > > > > > +/**
> > > > > > > > > + * virtio_dma_map - get the DMA addr of the memory for virtio device
> > > > > > > > > + * @dev: virtio device
> > > > > > > > > + * @addr: the addr to DMA
> > > > > > > > > + * @length: memory length
> > > > > > > > > + * @dir: DMA direction
> > > > > > > > > + *
> > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > + *
> > > > > > > > > + * Returns the DMA addr.
> > > > > > > > > + */
> > > > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > > > +                         enum dma_data_direction dir)
> > > > > > > > > +{
> > > > > > > > > +       struct page *page;
> > > > > > > > > +       size_t offset;
> > > > > > > > > +
> > > > > > > > > +       page = virt_to_page(addr);
> > > > > > > > > +       offset = offset_in_page(addr);
> > > > > > > > > +
> > > > > > > > > +       return virtio_dma_map_page(dev, page, offset, length, dir);
> > > > > > > > > +}
> > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_map);
> > > > > > > > > +
> > > > > > > > > +/**
> > > > > > > > > + * virtio_dma_mapping_error - check dma address
> > > > > > > > > + * @dev: virtio device
> > > > > > > > > + * @addr: DMA address
> > > > > > > > > + *
> > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > + *
> > > > > > > > > + * Returns 0 means dma valid. Other means invalid dma address.
> > > > > > > > > + */
> > > > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
> > > > > > > > > +{
> > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > +
> > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > +               return 0;
> > > > > > > > > +
> > > > > > > > > +       return dma_mapping_error(vdev->dev.parent, addr);
> > > > > > > > > +}
> > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
> > > > > > > > > +
> > > > > > > > > +/**
> > > > > > > > > + * virtio_dma_unmap - unmap DMA addr
> > > > > > > > > + * @dev: virtio device
> > > > > > > > > + * @dma: DMA address
> > > > > > > > > + * @length: memory length
> > > > > > > > > + * @dir: DMA direction
> > > > > > > > > + *
> > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > + */
> > > > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > > > +                     enum dma_data_direction dir)
> > > > > > > > > +{
> > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > +
> > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > +               return;
> > > > > > > > > +
> > > > > > > > > +       dma_unmap_page(vdev->dev.parent, dma, length, dir);
> > > > > > > > > +}
> > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> > > > > > > > > +
> > > > > > > > >  MODULE_LICENSE("GPL");
> > > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > > index 3ebb346ebb7c..b5fa71476737 100644
> > > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > > @@ -9,6 +9,7 @@
> > > > > > > > >  #include <linux/device.h>
> > > > > > > > >  #include <linux/mod_devicetable.h>
> > > > > > > > >  #include <linux/gfp.h>
> > > > > > > > > +#include <linux/dma-mapping.h>
> > > > > > > > >
> > > > > > > > >  /**
> > > > > > > > >   * struct virtqueue - a queue to register buffers for sending or receiving.
> > > > > > > > > @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
> > > > > > > > >  #define module_virtio_driver(__virtio_driver) \
> > > > > > > > >         module_driver(__virtio_driver, register_virtio_driver, \
> > > > > > > > >                         unregister_virtio_driver)
> > > > > > > > > +
> > > > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > > > +                              unsigned int length, enum dma_data_direction dir);
> > > > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > > > +                         enum dma_data_direction dir);
> > > > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> > > > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > > > +                     enum dma_data_direction dir);
> > > > > > > > >  #endif /* _LINUX_VIRTIO_H */
> > > > > > > > > --
> > > > > > > > > 2.32.0.3.g01195cf9f
> > > > > > > > >
> > > > > > > >
> > > > > > >
> > > > > >
> > > > > _______________________________________________
> > > > > Virtualization mailing list
> > > > > Virtualization@lists.linux-foundation.org
> > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > >
> > >
> >

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-03-02  6:09                   ` Michael S. Tsirkin
@ 2023-03-02  6:43                     ` Xuan Zhuo
  2023-03-02  6:56                     ` Jason Wang
  1 sibling, 0 replies; 46+ messages in thread
From: Xuan Zhuo @ 2023-03-02  6:43 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: maciej.fijalkowski, virtualization, bjorn, jonathan.lemon,
	magnus.karlsson

On Thu, 2 Mar 2023 01:09:04 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Thu, Mar 02, 2023 at 11:26:53AM +0800, Jason Wang wrote:
> > On Thu, Mar 2, 2023 at 11:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Thu, 2 Mar 2023 11:05:26 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > On Thu, Mar 2, 2023 at 10:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > On Tue, 28 Feb 2023 19:15:23 +0800, Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > On Tue, 21 Feb 2023 09:51:07 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > On Mon, Feb 20, 2023 at 3:02 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > >
> > > > > > > > On Mon, 20 Feb 2023 13:38:24 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > > >
> > > > > > > > > > Added virtio_dma_map() to map DMA addresses for virtual memory in
> > > > > > > > > > advance. The purpose is to keep memory mapped across multiple add/get
> > > > > > > > > > buf operations.
> > > > > > > > >
> > > > > > > > > I wonder if instead of exporting helpers like this, it might be simple
> > > > > > > > > to just export dma_dev then the upper layer can use DMA API at will?
> > > > > > > >
> > > > > > > >
> > > > > > > > The reason for not doing this, Virtio is not just using DMA_DEV to mapp, but
> > > > > > > > also check whether DMA is used.
> > > > > > >
> > > > > > > We should let the DMA API decide by exporting a correct dma_dev. E.g
> > > > > > > when ACCESS_PLATFORM is not negotiated, advertising a DMA dev without
> > > > > > > dma_ops.
> > > > > >
> > > > > >
> > > > > > Do you mean we provide this API?
> > > > > >
> > > > > > virtio_get_dma_dev()
> > > > > >
> > > > > > If it returns NULL, the caller will use the physical memory address directly. If
> > > > > > this func return a dma_dev, the caller should use DMA API.
> > > > >
> > > > >
> > > > > cc the XDP_SOCKET's maintainers.
> > > > >
> > > > > First of all, Jason does not want to encapsulate the API of DMA by Virtio. It is
> > > > > best to pass DMA Device to XSK, XSK uses DMA API for DMA MAP operation directly.
> > > > > I agree with this idea.
> > > > >
> > > > > However, there are several problems under Virtio:
> > > > > 1. In some virtualization scenarios, we do not have to perform DMA operations,
> > > > >    just use the physical address directly.
> > > >
> > > > This is not a problem, we can simply return the virtio device itself
> > > > as the DMA device in this case. Since there's no DMA ops attached, DMA
> > > > API will use physical address in this case.
> > >
> > > Is this like this? So why do we have to deal with it in Virtio Ring? Let me
> > > learn it.
> >
> > It has a long debate and I can't recall too many details. (You can
> > search the archives). Michael may show more thoughts here.
> >
> > One concern is the overhead of the DMA API that needs to be benchmarked.
> >
> > Thanks
>
> Concern with what? This patch does not change devices, they are using
> the existing API. Xuan Zhuo already showed a benchmark result for AF_XDP.
>
>
> > >
> > >
> > > >
> > > > > 2. The latest Virtio Core supports each rx/tx queue with one DMA device.
> > > > >    Generally, the physical network card has only one device. All queues use
> > > > >    it for DMA operation.
> > > >
> > > > I'm not sure this is a big deal, we just need to use the per virtqueue
> > > > dma device to use DMA API.
> > >
> > > Yes.
> > >
> > >
> > > >
> > > > >
> > > > > So I consider this problem again, Virtio Core provides only one API.
> > > > >
> > > > > * virtio_get_dma_dev(queue)
> > > > >
> > > > > If the return value is NULL, it means that there is no DMA operation. If it is
> > > > > not NULL, use DMA API for DMA operation.
> > > > >
> > > > > The modification of XSK is like this. We may pass NULL as dev to xp_dma_map().
> > > > > If dev is NULL, then there is no need to perform DMA and Sync operations.
> > > > > Otherwise, it will perform DMA operations like other devices.
> > > >
> > > > As discussed above, it might be easier:
> > > >
> > > >     if (!virtio_has_feature(VIRTIO_F_ACCESS_PLATFORM))
> > > >         return virtio_device;
> > > >     else
> > > >         return vring_dma_dev();
> > >
> > > Yes, according to Jason's opinion, then XSK not need to do any modifications.
> > >
> > > Thanks.
>
> Yes AF_XDP does not need the per VQ device hack.
> We should probably rethink it.
>
> But as far as implementation goes, poking at VIRTIO_F_ACCESS_PLATFORM
> is wrong. Please use virtio_has_dma_quirk.

I think the code should like this:

+struct device *virtqueue_get_dma_dev(struct virtqueue *_vq)
+{
+       struct vring_virtqueue *vq = to_vvq(_vq);
+
+       if (!vq->use_dma_api)
+               return &vq->vq.vdev->dev;
+
+       return vring_dma_dev(vq);
+}
+EXPORT_SYMBOL_GPL(virtqueue_get_dma_dev);

Thanks.


>
>
>
> > > >
> > > > >
> > > > > And if the dma_dev of rx and tx is different, then we can only disable
> > > > > XDP_SOCKET.
> > > >
> > > > We can start with this.
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > > Looking forward to everyone's reply.
> > > > >
> > > > > Thanks.
> > > > >
> > > > > >
> > > > > > Thanks.
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > (Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?)
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Added virtio_dma_unmap() for unmap DMA address.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > > > ---
> > > > > > > > > >  drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
> > > > > > > > > >  include/linux/virtio.h       |  9 ++++
> > > > > > > > > >  2 files changed, 101 insertions(+)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > > index cd9364eb2345..855338609c7f 100644
> > > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > > @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > > > > > > > > >  }
> > > > > > > > > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> > > > > > > > > >
> > > > > > > > > > +/**
> > > > > > > > > > + * virtio_dma_map_page - get the DMA addr of the memory for virtio device
> > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > + * @page: the page of the memory to DMA
> > > > > > > > > > + * @offset: the offset of the memory inside page
> > > > > > > > > > + * @length: memory length
> > > > > > > > > > + * @dir: DMA direction
> > > > > > > > > > + *
> > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > + *
> > > > > > > > > > + * Returns the DMA addr. DMA_MAPPING_ERROR means error.
> > > > > > > > > > + */
> > > > > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > > > > +                              unsigned int length, enum dma_data_direction dir)
> > > > > > > > > > +{
> > > > > > > > >
> > > > > > > > > This (and the reset) needs to be done per virtqueue instead per device
> > > > > > > > > after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per
> > > > > > > > > virtqueue dma device").
> > > > > > > >
> > > > > > > >
> > > > > > > > YES.
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > > +
> > > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > > +               return page_to_phys(page) + offset;
> > > > > > > > > > +
> > > > > > > > > > +       return dma_map_page(vdev->dev.parent, page, offset, length, dir);
> > > > > > > > > > +}
> > > > > > > > >
> > > > > > > > > Need either inline or EXPORT_SYMBOL_GPL() here.
> > > > > > > >
> > > > > > > > Because I did not use this interface, I did not  export it.
> > > > > > > >
> > > > > > > > Thanks.
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > +
> > > > > > > > > > +/**
> > > > > > > > > > + * virtio_dma_map - get the DMA addr of the memory for virtio device
> > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > + * @addr: the addr to DMA
> > > > > > > > > > + * @length: memory length
> > > > > > > > > > + * @dir: DMA direction
> > > > > > > > > > + *
> > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > + *
> > > > > > > > > > + * Returns the DMA addr.
> > > > > > > > > > + */
> > > > > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > > > > +                         enum dma_data_direction dir)
> > > > > > > > > > +{
> > > > > > > > > > +       struct page *page;
> > > > > > > > > > +       size_t offset;
> > > > > > > > > > +
> > > > > > > > > > +       page = virt_to_page(addr);
> > > > > > > > > > +       offset = offset_in_page(addr);
> > > > > > > > > > +
> > > > > > > > > > +       return virtio_dma_map_page(dev, page, offset, length, dir);
> > > > > > > > > > +}
> > > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_map);
> > > > > > > > > > +
> > > > > > > > > > +/**
> > > > > > > > > > + * virtio_dma_mapping_error - check dma address
> > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > + * @addr: DMA address
> > > > > > > > > > + *
> > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > + *
> > > > > > > > > > + * Returns 0 means dma valid. Other means invalid dma address.
> > > > > > > > > > + */
> > > > > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
> > > > > > > > > > +{
> > > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > > +
> > > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > > +               return 0;
> > > > > > > > > > +
> > > > > > > > > > +       return dma_mapping_error(vdev->dev.parent, addr);
> > > > > > > > > > +}
> > > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
> > > > > > > > > > +
> > > > > > > > > > +/**
> > > > > > > > > > + * virtio_dma_unmap - unmap DMA addr
> > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > + * @dma: DMA address
> > > > > > > > > > + * @length: memory length
> > > > > > > > > > + * @dir: DMA direction
> > > > > > > > > > + *
> > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > + */
> > > > > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > > > > +                     enum dma_data_direction dir)
> > > > > > > > > > +{
> > > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > > +
> > > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > > +               return;
> > > > > > > > > > +
> > > > > > > > > > +       dma_unmap_page(vdev->dev.parent, dma, length, dir);
> > > > > > > > > > +}
> > > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> > > > > > > > > > +
> > > > > > > > > >  MODULE_LICENSE("GPL");
> > > > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > > > index 3ebb346ebb7c..b5fa71476737 100644
> > > > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > > > @@ -9,6 +9,7 @@
> > > > > > > > > >  #include <linux/device.h>
> > > > > > > > > >  #include <linux/mod_devicetable.h>
> > > > > > > > > >  #include <linux/gfp.h>
> > > > > > > > > > +#include <linux/dma-mapping.h>
> > > > > > > > > >
> > > > > > > > > >  /**
> > > > > > > > > >   * struct virtqueue - a queue to register buffers for sending or receiving.
> > > > > > > > > > @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
> > > > > > > > > >  #define module_virtio_driver(__virtio_driver) \
> > > > > > > > > >         module_driver(__virtio_driver, register_virtio_driver, \
> > > > > > > > > >                         unregister_virtio_driver)
> > > > > > > > > > +
> > > > > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > > > > +                              unsigned int length, enum dma_data_direction dir);
> > > > > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > > > > +                         enum dma_data_direction dir);
> > > > > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> > > > > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > > > > +                     enum dma_data_direction dir);
> > > > > > > > > >  #endif /* _LINUX_VIRTIO_H */
> > > > > > > > > > --
> > > > > > > > > > 2.32.0.3.g01195cf9f
> > > > > > > > > >
> > > > > > > > >
> > > > > > > >
> > > > > > >
> > > > > > _______________________________________________
> > > > > > Virtualization mailing list
> > > > > > Virtualization@lists.linux-foundation.org
> > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > >
> > > >
> > >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-03-02  6:09                   ` Michael S. Tsirkin
  2023-03-02  6:43                     ` Xuan Zhuo
@ 2023-03-02  6:56                     ` Jason Wang
  2023-03-02  7:31                       ` Xuan Zhuo
  1 sibling, 1 reply; 46+ messages in thread
From: Jason Wang @ 2023-03-02  6:56 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: maciej.fijalkowski, virtualization, bjorn, jonathan.lemon,
	magnus.karlsson

On Thu, Mar 2, 2023 at 2:09 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Thu, Mar 02, 2023 at 11:26:53AM +0800, Jason Wang wrote:
> > On Thu, Mar 2, 2023 at 11:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Thu, 2 Mar 2023 11:05:26 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > On Thu, Mar 2, 2023 at 10:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > On Tue, 28 Feb 2023 19:15:23 +0800, Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > On Tue, 21 Feb 2023 09:51:07 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > On Mon, Feb 20, 2023 at 3:02 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > >
> > > > > > > > On Mon, 20 Feb 2023 13:38:24 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > > >
> > > > > > > > > > Added virtio_dma_map() to map DMA addresses for virtual memory in
> > > > > > > > > > advance. The purpose is to keep memory mapped across multiple add/get
> > > > > > > > > > buf operations.
> > > > > > > > >
> > > > > > > > > I wonder if instead of exporting helpers like this, it might be simple
> > > > > > > > > to just export dma_dev then the upper layer can use DMA API at will?
> > > > > > > >
> > > > > > > >
> > > > > > > > The reason for not doing this, Virtio is not just using DMA_DEV to mapp, but
> > > > > > > > also check whether DMA is used.
> > > > > > >
> > > > > > > We should let the DMA API decide by exporting a correct dma_dev. E.g
> > > > > > > when ACCESS_PLATFORM is not negotiated, advertising a DMA dev without
> > > > > > > dma_ops.
> > > > > >
> > > > > >
> > > > > > Do you mean we provide this API?
> > > > > >
> > > > > > virtio_get_dma_dev()
> > > > > >
> > > > > > If it returns NULL, the caller will use the physical memory address directly. If
> > > > > > this func return a dma_dev, the caller should use DMA API.
> > > > >
> > > > >
> > > > > cc the XDP_SOCKET's maintainers.
> > > > >
> > > > > First of all, Jason does not want to encapsulate the API of DMA by Virtio. It is
> > > > > best to pass DMA Device to XSK, XSK uses DMA API for DMA MAP operation directly.
> > > > > I agree with this idea.
> > > > >
> > > > > However, there are several problems under Virtio:
> > > > > 1. In some virtualization scenarios, we do not have to perform DMA operations,
> > > > >    just use the physical address directly.
> > > >
> > > > This is not a problem, we can simply return the virtio device itself
> > > > as the DMA device in this case. Since there's no DMA ops attached, DMA
> > > > API will use physical address in this case.
> > >
> > > Is this like this? So why do we have to deal with it in Virtio Ring? Let me
> > > learn it.
> >
> > It has a long debate and I can't recall too many details. (You can
> > search the archives). Michael may show more thoughts here.
> >
> > One concern is the overhead of the DMA API that needs to be benchmarked.
> >
> > Thanks
>
> Concern with what?

Always use the DMA API for virtio devices by drop vq->use_dma_api.

For the device that doesn't need quirk, advertise the virtio device as
its dma device.

Thanks

> This patch does not change devices, they are using
> the existing API. Xuan Zhuo already showed a benchmark result for AF_XDP.
>
>
> > >
> > >
> > > >
> > > > > 2. The latest Virtio Core supports each rx/tx queue with one DMA device.
> > > > >    Generally, the physical network card has only one device. All queues use
> > > > >    it for DMA operation.
> > > >
> > > > I'm not sure this is a big deal, we just need to use the per virtqueue
> > > > dma device to use DMA API.
> > >
> > > Yes.
> > >
> > >
> > > >
> > > > >
> > > > > So I consider this problem again, Virtio Core provides only one API.
> > > > >
> > > > > * virtio_get_dma_dev(queue)
> > > > >
> > > > > If the return value is NULL, it means that there is no DMA operation. If it is
> > > > > not NULL, use DMA API for DMA operation.
> > > > >
> > > > > The modification of XSK is like this. We may pass NULL as dev to xp_dma_map().
> > > > > If dev is NULL, then there is no need to perform DMA and Sync operations.
> > > > > Otherwise, it will perform DMA operations like other devices.
> > > >
> > > > As discussed above, it might be easier:
> > > >
> > > >     if (!virtio_has_feature(VIRTIO_F_ACCESS_PLATFORM))
> > > >         return virtio_device;
> > > >     else
> > > >         return vring_dma_dev();
> > >
> > > Yes, according to Jason's opinion, then XSK not need to do any modifications.
> > >
> > > Thanks.
>
> Yes AF_XDP does not need the per VQ device hack.
> We should probably rethink it.
>
> But as far as implementation goes, poking at VIRTIO_F_ACCESS_PLATFORM
> is wrong. Please use virtio_has_dma_quirk.
>
>
>
> > > >
> > > > >
> > > > > And if the dma_dev of rx and tx is different, then we can only disable
> > > > > XDP_SOCKET.
> > > >
> > > > We can start with this.
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > > Looking forward to everyone's reply.
> > > > >
> > > > > Thanks.
> > > > >
> > > > > >
> > > > > > Thanks.
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > (Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?)
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Added virtio_dma_unmap() for unmap DMA address.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > > > ---
> > > > > > > > > >  drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
> > > > > > > > > >  include/linux/virtio.h       |  9 ++++
> > > > > > > > > >  2 files changed, 101 insertions(+)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > > index cd9364eb2345..855338609c7f 100644
> > > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > > @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > > > > > > > > >  }
> > > > > > > > > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> > > > > > > > > >
> > > > > > > > > > +/**
> > > > > > > > > > + * virtio_dma_map_page - get the DMA addr of the memory for virtio device
> > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > + * @page: the page of the memory to DMA
> > > > > > > > > > + * @offset: the offset of the memory inside page
> > > > > > > > > > + * @length: memory length
> > > > > > > > > > + * @dir: DMA direction
> > > > > > > > > > + *
> > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > + *
> > > > > > > > > > + * Returns the DMA addr. DMA_MAPPING_ERROR means error.
> > > > > > > > > > + */
> > > > > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > > > > +                              unsigned int length, enum dma_data_direction dir)
> > > > > > > > > > +{
> > > > > > > > >
> > > > > > > > > This (and the reset) needs to be done per virtqueue instead per device
> > > > > > > > > after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per
> > > > > > > > > virtqueue dma device").
> > > > > > > >
> > > > > > > >
> > > > > > > > YES.
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > > +
> > > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > > +               return page_to_phys(page) + offset;
> > > > > > > > > > +
> > > > > > > > > > +       return dma_map_page(vdev->dev.parent, page, offset, length, dir);
> > > > > > > > > > +}
> > > > > > > > >
> > > > > > > > > Need either inline or EXPORT_SYMBOL_GPL() here.
> > > > > > > >
> > > > > > > > Because I did not use this interface, I did not  export it.
> > > > > > > >
> > > > > > > > Thanks.
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > +
> > > > > > > > > > +/**
> > > > > > > > > > + * virtio_dma_map - get the DMA addr of the memory for virtio device
> > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > + * @addr: the addr to DMA
> > > > > > > > > > + * @length: memory length
> > > > > > > > > > + * @dir: DMA direction
> > > > > > > > > > + *
> > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > + *
> > > > > > > > > > + * Returns the DMA addr.
> > > > > > > > > > + */
> > > > > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > > > > +                         enum dma_data_direction dir)
> > > > > > > > > > +{
> > > > > > > > > > +       struct page *page;
> > > > > > > > > > +       size_t offset;
> > > > > > > > > > +
> > > > > > > > > > +       page = virt_to_page(addr);
> > > > > > > > > > +       offset = offset_in_page(addr);
> > > > > > > > > > +
> > > > > > > > > > +       return virtio_dma_map_page(dev, page, offset, length, dir);
> > > > > > > > > > +}
> > > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_map);
> > > > > > > > > > +
> > > > > > > > > > +/**
> > > > > > > > > > + * virtio_dma_mapping_error - check dma address
> > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > + * @addr: DMA address
> > > > > > > > > > + *
> > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > + *
> > > > > > > > > > + * Returns 0 means dma valid. Other means invalid dma address.
> > > > > > > > > > + */
> > > > > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
> > > > > > > > > > +{
> > > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > > +
> > > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > > +               return 0;
> > > > > > > > > > +
> > > > > > > > > > +       return dma_mapping_error(vdev->dev.parent, addr);
> > > > > > > > > > +}
> > > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
> > > > > > > > > > +
> > > > > > > > > > +/**
> > > > > > > > > > + * virtio_dma_unmap - unmap DMA addr
> > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > + * @dma: DMA address
> > > > > > > > > > + * @length: memory length
> > > > > > > > > > + * @dir: DMA direction
> > > > > > > > > > + *
> > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > + */
> > > > > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > > > > +                     enum dma_data_direction dir)
> > > > > > > > > > +{
> > > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > > +
> > > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > > +               return;
> > > > > > > > > > +
> > > > > > > > > > +       dma_unmap_page(vdev->dev.parent, dma, length, dir);
> > > > > > > > > > +}
> > > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> > > > > > > > > > +
> > > > > > > > > >  MODULE_LICENSE("GPL");
> > > > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > > > index 3ebb346ebb7c..b5fa71476737 100644
> > > > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > > > @@ -9,6 +9,7 @@
> > > > > > > > > >  #include <linux/device.h>
> > > > > > > > > >  #include <linux/mod_devicetable.h>
> > > > > > > > > >  #include <linux/gfp.h>
> > > > > > > > > > +#include <linux/dma-mapping.h>
> > > > > > > > > >
> > > > > > > > > >  /**
> > > > > > > > > >   * struct virtqueue - a queue to register buffers for sending or receiving.
> > > > > > > > > > @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
> > > > > > > > > >  #define module_virtio_driver(__virtio_driver) \
> > > > > > > > > >         module_driver(__virtio_driver, register_virtio_driver, \
> > > > > > > > > >                         unregister_virtio_driver)
> > > > > > > > > > +
> > > > > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > > > > +                              unsigned int length, enum dma_data_direction dir);
> > > > > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > > > > +                         enum dma_data_direction dir);
> > > > > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> > > > > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > > > > +                     enum dma_data_direction dir);
> > > > > > > > > >  #endif /* _LINUX_VIRTIO_H */
> > > > > > > > > > --
> > > > > > > > > > 2.32.0.3.g01195cf9f
> > > > > > > > > >
> > > > > > > > >
> > > > > > > >
> > > > > > >
> > > > > > _______________________________________________
> > > > > > Virtualization mailing list
> > > > > > Virtualization@lists.linux-foundation.org
> > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > >
> > > >
> > >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-03-02  6:56                     ` Jason Wang
@ 2023-03-02  7:31                       ` Xuan Zhuo
  2023-03-02  7:56                         ` Jason Wang
  0 siblings, 1 reply; 46+ messages in thread
From: Xuan Zhuo @ 2023-03-02  7:31 UTC (permalink / raw)
  To: Jason Wang
  Cc: maciej.fijalkowski, Michael S. Tsirkin, virtualization, bjorn,
	jonathan.lemon, magnus.karlsson

On Thu, 2 Mar 2023 14:56:11 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Thu, Mar 2, 2023 at 2:09 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Thu, Mar 02, 2023 at 11:26:53AM +0800, Jason Wang wrote:
> > > On Thu, Mar 2, 2023 at 11:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Thu, 2 Mar 2023 11:05:26 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > On Thu, Mar 2, 2023 at 10:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > On Tue, 28 Feb 2023 19:15:23 +0800, Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > On Tue, 21 Feb 2023 09:51:07 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > On Mon, Feb 20, 2023 at 3:02 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > >
> > > > > > > > > On Mon, 20 Feb 2023 13:38:24 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > Added virtio_dma_map() to map DMA addresses for virtual memory in
> > > > > > > > > > > advance. The purpose is to keep memory mapped across multiple add/get
> > > > > > > > > > > buf operations.
> > > > > > > > > >
> > > > > > > > > > I wonder if instead of exporting helpers like this, it might be simple
> > > > > > > > > > to just export dma_dev then the upper layer can use DMA API at will?
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > The reason for not doing this, Virtio is not just using DMA_DEV to mapp, but
> > > > > > > > > also check whether DMA is used.
> > > > > > > >
> > > > > > > > We should let the DMA API decide by exporting a correct dma_dev. E.g
> > > > > > > > when ACCESS_PLATFORM is not negotiated, advertising a DMA dev without
> > > > > > > > dma_ops.
> > > > > > >
> > > > > > >
> > > > > > > Do you mean we provide this API?
> > > > > > >
> > > > > > > virtio_get_dma_dev()
> > > > > > >
> > > > > > > If it returns NULL, the caller will use the physical memory address directly. If
> > > > > > > this func return a dma_dev, the caller should use DMA API.
> > > > > >
> > > > > >
> > > > > > cc the XDP_SOCKET's maintainers.
> > > > > >
> > > > > > First of all, Jason does not want to encapsulate the API of DMA by Virtio. It is
> > > > > > best to pass DMA Device to XSK, XSK uses DMA API for DMA MAP operation directly.
> > > > > > I agree with this idea.
> > > > > >
> > > > > > However, there are several problems under Virtio:
> > > > > > 1. In some virtualization scenarios, we do not have to perform DMA operations,
> > > > > >    just use the physical address directly.
> > > > >
> > > > > This is not a problem, we can simply return the virtio device itself
> > > > > as the DMA device in this case. Since there's no DMA ops attached, DMA
> > > > > API will use physical address in this case.
> > > >
> > > > Is this like this? So why do we have to deal with it in Virtio Ring? Let me
> > > > learn it.
> > >
> > > It has a long debate and I can't recall too many details. (You can
> > > search the archives). Michael may show more thoughts here.
> > >
> > > One concern is the overhead of the DMA API that needs to be benchmarked.
> > >
> > > Thanks
> >
> > Concern with what?
>
> Always use the DMA API for virtio devices by drop vq->use_dma_api.

Do you mean the affects of the AF_XDP performance? I think this
may not be a key issue, because DMA is completed in advance, and there is no
DMA operation on the data path. So the overhead is not big.

Thanks.


>
> For the device that doesn't need quirk, advertise the virtio device as
> its dma device.
>
> Thanks
>
> > This patch does not change devices, they are using
> > the existing API. Xuan Zhuo already showed a benchmark result for AF_XDP.
> >
> >
> > > >
> > > >
> > > > >
> > > > > > 2. The latest Virtio Core supports each rx/tx queue with one DMA device.
> > > > > >    Generally, the physical network card has only one device. All queues use
> > > > > >    it for DMA operation.
> > > > >
> > > > > I'm not sure this is a big deal, we just need to use the per virtqueue
> > > > > dma device to use DMA API.
> > > >
> > > > Yes.
> > > >
> > > >
> > > > >
> > > > > >
> > > > > > So I consider this problem again, Virtio Core provides only one API.
> > > > > >
> > > > > > * virtio_get_dma_dev(queue)
> > > > > >
> > > > > > If the return value is NULL, it means that there is no DMA operation. If it is
> > > > > > not NULL, use DMA API for DMA operation.
> > > > > >
> > > > > > The modification of XSK is like this. We may pass NULL as dev to xp_dma_map().
> > > > > > If dev is NULL, then there is no need to perform DMA and Sync operations.
> > > > > > Otherwise, it will perform DMA operations like other devices.
> > > > >
> > > > > As discussed above, it might be easier:
> > > > >
> > > > >     if (!virtio_has_feature(VIRTIO_F_ACCESS_PLATFORM))
> > > > >         return virtio_device;
> > > > >     else
> > > > >         return vring_dma_dev();
> > > >
> > > > Yes, according to Jason's opinion, then XSK not need to do any modifications.
> > > >
> > > > Thanks.
> >
> > Yes AF_XDP does not need the per VQ device hack.
> > We should probably rethink it.
> >
> > But as far as implementation goes, poking at VIRTIO_F_ACCESS_PLATFORM
> > is wrong. Please use virtio_has_dma_quirk.
> >
> >
> >
> > > > >
> > > > > >
> > > > > > And if the dma_dev of rx and tx is different, then we can only disable
> > > > > > XDP_SOCKET.
> > > > >
> > > > > We can start with this.
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > > Looking forward to everyone's reply.
> > > > > >
> > > > > > Thanks.
> > > > > >
> > > > > > >
> > > > > > > Thanks.
> > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > Thanks
> > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > (Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?)
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > Added virtio_dma_unmap() for unmap DMA address.
> > > > > > > > > > >
> > > > > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > > > > ---
> > > > > > > > > > >  drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
> > > > > > > > > > >  include/linux/virtio.h       |  9 ++++
> > > > > > > > > > >  2 files changed, 101 insertions(+)
> > > > > > > > > > >
> > > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > index cd9364eb2345..855338609c7f 100644
> > > > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > > > > > > > > > >  }
> > > > > > > > > > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> > > > > > > > > > >
> > > > > > > > > > > +/**
> > > > > > > > > > > + * virtio_dma_map_page - get the DMA addr of the memory for virtio device
> > > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > > + * @page: the page of the memory to DMA
> > > > > > > > > > > + * @offset: the offset of the memory inside page
> > > > > > > > > > > + * @length: memory length
> > > > > > > > > > > + * @dir: DMA direction
> > > > > > > > > > > + *
> > > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > > + *
> > > > > > > > > > > + * Returns the DMA addr. DMA_MAPPING_ERROR means error.
> > > > > > > > > > > + */
> > > > > > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > > > > > +                              unsigned int length, enum dma_data_direction dir)
> > > > > > > > > > > +{
> > > > > > > > > >
> > > > > > > > > > This (and the reset) needs to be done per virtqueue instead per device
> > > > > > > > > > after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per
> > > > > > > > > > virtqueue dma device").
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > YES.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > > > +
> > > > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > > > +               return page_to_phys(page) + offset;
> > > > > > > > > > > +
> > > > > > > > > > > +       return dma_map_page(vdev->dev.parent, page, offset, length, dir);
> > > > > > > > > > > +}
> > > > > > > > > >
> > > > > > > > > > Need either inline or EXPORT_SYMBOL_GPL() here.
> > > > > > > > >
> > > > > > > > > Because I did not use this interface, I did not  export it.
> > > > > > > > >
> > > > > > > > > Thanks.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Thanks
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > +
> > > > > > > > > > > +/**
> > > > > > > > > > > + * virtio_dma_map - get the DMA addr of the memory for virtio device
> > > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > > + * @addr: the addr to DMA
> > > > > > > > > > > + * @length: memory length
> > > > > > > > > > > + * @dir: DMA direction
> > > > > > > > > > > + *
> > > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > > + *
> > > > > > > > > > > + * Returns the DMA addr.
> > > > > > > > > > > + */
> > > > > > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > > > > > +                         enum dma_data_direction dir)
> > > > > > > > > > > +{
> > > > > > > > > > > +       struct page *page;
> > > > > > > > > > > +       size_t offset;
> > > > > > > > > > > +
> > > > > > > > > > > +       page = virt_to_page(addr);
> > > > > > > > > > > +       offset = offset_in_page(addr);
> > > > > > > > > > > +
> > > > > > > > > > > +       return virtio_dma_map_page(dev, page, offset, length, dir);
> > > > > > > > > > > +}
> > > > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_map);
> > > > > > > > > > > +
> > > > > > > > > > > +/**
> > > > > > > > > > > + * virtio_dma_mapping_error - check dma address
> > > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > > + * @addr: DMA address
> > > > > > > > > > > + *
> > > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > > + *
> > > > > > > > > > > + * Returns 0 means dma valid. Other means invalid dma address.
> > > > > > > > > > > + */
> > > > > > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
> > > > > > > > > > > +{
> > > > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > > > +
> > > > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > > > +               return 0;
> > > > > > > > > > > +
> > > > > > > > > > > +       return dma_mapping_error(vdev->dev.parent, addr);
> > > > > > > > > > > +}
> > > > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
> > > > > > > > > > > +
> > > > > > > > > > > +/**
> > > > > > > > > > > + * virtio_dma_unmap - unmap DMA addr
> > > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > > + * @dma: DMA address
> > > > > > > > > > > + * @length: memory length
> > > > > > > > > > > + * @dir: DMA direction
> > > > > > > > > > > + *
> > > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > > + */
> > > > > > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > > > > > +                     enum dma_data_direction dir)
> > > > > > > > > > > +{
> > > > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > > > +
> > > > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > > > +               return;
> > > > > > > > > > > +
> > > > > > > > > > > +       dma_unmap_page(vdev->dev.parent, dma, length, dir);
> > > > > > > > > > > +}
> > > > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> > > > > > > > > > > +
> > > > > > > > > > >  MODULE_LICENSE("GPL");
> > > > > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > > > > index 3ebb346ebb7c..b5fa71476737 100644
> > > > > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > > > > @@ -9,6 +9,7 @@
> > > > > > > > > > >  #include <linux/device.h>
> > > > > > > > > > >  #include <linux/mod_devicetable.h>
> > > > > > > > > > >  #include <linux/gfp.h>
> > > > > > > > > > > +#include <linux/dma-mapping.h>
> > > > > > > > > > >
> > > > > > > > > > >  /**
> > > > > > > > > > >   * struct virtqueue - a queue to register buffers for sending or receiving.
> > > > > > > > > > > @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
> > > > > > > > > > >  #define module_virtio_driver(__virtio_driver) \
> > > > > > > > > > >         module_driver(__virtio_driver, register_virtio_driver, \
> > > > > > > > > > >                         unregister_virtio_driver)
> > > > > > > > > > > +
> > > > > > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > > > > > +                              unsigned int length, enum dma_data_direction dir);
> > > > > > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > > > > > +                         enum dma_data_direction dir);
> > > > > > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> > > > > > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > > > > > +                     enum dma_data_direction dir);
> > > > > > > > > > >  #endif /* _LINUX_VIRTIO_H */
> > > > > > > > > > > --
> > > > > > > > > > > 2.32.0.3.g01195cf9f
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > >
> > > > > > > >
> > > > > > > _______________________________________________
> > > > > > > Virtualization mailing list
> > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > >
> > > > >
> > > >
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-03-02  7:31                       ` Xuan Zhuo
@ 2023-03-02  7:56                         ` Jason Wang
  2023-03-02 11:08                           ` Xuan Zhuo
  0 siblings, 1 reply; 46+ messages in thread
From: Jason Wang @ 2023-03-02  7:56 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: maciej.fijalkowski, Michael S. Tsirkin, virtualization, bjorn,
	jonathan.lemon, magnus.karlsson

On Thu, Mar 2, 2023 at 3:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Thu, 2 Mar 2023 14:56:11 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Thu, Mar 2, 2023 at 2:09 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Thu, Mar 02, 2023 at 11:26:53AM +0800, Jason Wang wrote:
> > > > On Thu, Mar 2, 2023 at 11:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > On Thu, 2 Mar 2023 11:05:26 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > On Thu, Mar 2, 2023 at 10:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > >
> > > > > > > On Tue, 28 Feb 2023 19:15:23 +0800, Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > On Tue, 21 Feb 2023 09:51:07 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > On Mon, Feb 20, 2023 at 3:02 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > > >
> > > > > > > > > > On Mon, 20 Feb 2023 13:38:24 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > Added virtio_dma_map() to map DMA addresses for virtual memory in
> > > > > > > > > > > > advance. The purpose is to keep memory mapped across multiple add/get
> > > > > > > > > > > > buf operations.
> > > > > > > > > > >
> > > > > > > > > > > I wonder if instead of exporting helpers like this, it might be simple
> > > > > > > > > > > to just export dma_dev then the upper layer can use DMA API at will?
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > The reason for not doing this, Virtio is not just using DMA_DEV to mapp, but
> > > > > > > > > > also check whether DMA is used.
> > > > > > > > >
> > > > > > > > > We should let the DMA API decide by exporting a correct dma_dev. E.g
> > > > > > > > > when ACCESS_PLATFORM is not negotiated, advertising a DMA dev without
> > > > > > > > > dma_ops.
> > > > > > > >
> > > > > > > >
> > > > > > > > Do you mean we provide this API?
> > > > > > > >
> > > > > > > > virtio_get_dma_dev()
> > > > > > > >
> > > > > > > > If it returns NULL, the caller will use the physical memory address directly. If
> > > > > > > > this func return a dma_dev, the caller should use DMA API.
> > > > > > >
> > > > > > >
> > > > > > > cc the XDP_SOCKET's maintainers.
> > > > > > >
> > > > > > > First of all, Jason does not want to encapsulate the API of DMA by Virtio. It is
> > > > > > > best to pass DMA Device to XSK, XSK uses DMA API for DMA MAP operation directly.
> > > > > > > I agree with this idea.
> > > > > > >
> > > > > > > However, there are several problems under Virtio:
> > > > > > > 1. In some virtualization scenarios, we do not have to perform DMA operations,
> > > > > > >    just use the physical address directly.
> > > > > >
> > > > > > This is not a problem, we can simply return the virtio device itself
> > > > > > as the DMA device in this case. Since there's no DMA ops attached, DMA
> > > > > > API will use physical address in this case.
> > > > >
> > > > > Is this like this? So why do we have to deal with it in Virtio Ring? Let me
> > > > > learn it.
> > > >
> > > > It has a long debate and I can't recall too many details. (You can
> > > > search the archives). Michael may show more thoughts here.
> > > >
> > > > One concern is the overhead of the DMA API that needs to be benchmarked.
> > > >
> > > > Thanks
> > >
> > > Concern with what?
> >
> > Always use the DMA API for virtio devices by drop vq->use_dma_api.
>
> Do you mean the affects of the AF_XDP performance?

No, I was replied to your question:

"
Is this like this? So why do we have to deal with it in Virtio Ring?
"

Try to answer why we don't do that in the virtio ring level but try to
have virtio's own dma helper like vring_map_one_sg().

> I think this
> may not be a key issue, because DMA is completed in advance, and there is no
> DMA operation on the data path. So the overhead is not big.

Right.

Thanks

>
> Thanks.
>
>
> >
> > For the device that doesn't need quirk, advertise the virtio device as
> > its dma device.
> >
> > Thanks
> >
> > > This patch does not change devices, they are using
> > > the existing API. Xuan Zhuo already showed a benchmark result for AF_XDP.
> > >
> > >
> > > > >
> > > > >
> > > > > >
> > > > > > > 2. The latest Virtio Core supports each rx/tx queue with one DMA device.
> > > > > > >    Generally, the physical network card has only one device. All queues use
> > > > > > >    it for DMA operation.
> > > > > >
> > > > > > I'm not sure this is a big deal, we just need to use the per virtqueue
> > > > > > dma device to use DMA API.
> > > > >
> > > > > Yes.
> > > > >
> > > > >
> > > > > >
> > > > > > >
> > > > > > > So I consider this problem again, Virtio Core provides only one API.
> > > > > > >
> > > > > > > * virtio_get_dma_dev(queue)
> > > > > > >
> > > > > > > If the return value is NULL, it means that there is no DMA operation. If it is
> > > > > > > not NULL, use DMA API for DMA operation.
> > > > > > >
> > > > > > > The modification of XSK is like this. We may pass NULL as dev to xp_dma_map().
> > > > > > > If dev is NULL, then there is no need to perform DMA and Sync operations.
> > > > > > > Otherwise, it will perform DMA operations like other devices.
> > > > > >
> > > > > > As discussed above, it might be easier:
> > > > > >
> > > > > >     if (!virtio_has_feature(VIRTIO_F_ACCESS_PLATFORM))
> > > > > >         return virtio_device;
> > > > > >     else
> > > > > >         return vring_dma_dev();
> > > > >
> > > > > Yes, according to Jason's opinion, then XSK not need to do any modifications.
> > > > >
> > > > > Thanks.
> > >
> > > Yes AF_XDP does not need the per VQ device hack.
> > > We should probably rethink it.
> > >
> > > But as far as implementation goes, poking at VIRTIO_F_ACCESS_PLATFORM
> > > is wrong. Please use virtio_has_dma_quirk.
> > >
> > >
> > >
> > > > > >
> > > > > > >
> > > > > > > And if the dma_dev of rx and tx is different, then we can only disable
> > > > > > > XDP_SOCKET.
> > > > > >
> > > > > > We can start with this.
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > > > >
> > > > > > > Looking forward to everyone's reply.
> > > > > > >
> > > > > > > Thanks.
> > > > > > >
> > > > > > > >
> > > > > > > > Thanks.
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > Thanks
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > (Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?)
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > Added virtio_dma_unmap() for unmap DMA address.
> > > > > > > > > > > >
> > > > > > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > > > > > ---
> > > > > > > > > > > >  drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
> > > > > > > > > > > >  include/linux/virtio.h       |  9 ++++
> > > > > > > > > > > >  2 files changed, 101 insertions(+)
> > > > > > > > > > > >
> > > > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > index cd9364eb2345..855338609c7f 100644
> > > > > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > > > > > > > > > > >  }
> > > > > > > > > > > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> > > > > > > > > > > >
> > > > > > > > > > > > +/**
> > > > > > > > > > > > + * virtio_dma_map_page - get the DMA addr of the memory for virtio device
> > > > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > > > + * @page: the page of the memory to DMA
> > > > > > > > > > > > + * @offset: the offset of the memory inside page
> > > > > > > > > > > > + * @length: memory length
> > > > > > > > > > > > + * @dir: DMA direction
> > > > > > > > > > > > + *
> > > > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > > > + *
> > > > > > > > > > > > + * Returns the DMA addr. DMA_MAPPING_ERROR means error.
> > > > > > > > > > > > + */
> > > > > > > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > > > > > > +                              unsigned int length, enum dma_data_direction dir)
> > > > > > > > > > > > +{
> > > > > > > > > > >
> > > > > > > > > > > This (and the reset) needs to be done per virtqueue instead per device
> > > > > > > > > > > after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per
> > > > > > > > > > > virtqueue dma device").
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > YES.
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > > > > +
> > > > > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > > > > +               return page_to_phys(page) + offset;
> > > > > > > > > > > > +
> > > > > > > > > > > > +       return dma_map_page(vdev->dev.parent, page, offset, length, dir);
> > > > > > > > > > > > +}
> > > > > > > > > > >
> > > > > > > > > > > Need either inline or EXPORT_SYMBOL_GPL() here.
> > > > > > > > > >
> > > > > > > > > > Because I did not use this interface, I did not  export it.
> > > > > > > > > >
> > > > > > > > > > Thanks.
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > Thanks
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > +
> > > > > > > > > > > > +/**
> > > > > > > > > > > > + * virtio_dma_map - get the DMA addr of the memory for virtio device
> > > > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > > > + * @addr: the addr to DMA
> > > > > > > > > > > > + * @length: memory length
> > > > > > > > > > > > + * @dir: DMA direction
> > > > > > > > > > > > + *
> > > > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > > > + *
> > > > > > > > > > > > + * Returns the DMA addr.
> > > > > > > > > > > > + */
> > > > > > > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > > > > > > +                         enum dma_data_direction dir)
> > > > > > > > > > > > +{
> > > > > > > > > > > > +       struct page *page;
> > > > > > > > > > > > +       size_t offset;
> > > > > > > > > > > > +
> > > > > > > > > > > > +       page = virt_to_page(addr);
> > > > > > > > > > > > +       offset = offset_in_page(addr);
> > > > > > > > > > > > +
> > > > > > > > > > > > +       return virtio_dma_map_page(dev, page, offset, length, dir);
> > > > > > > > > > > > +}
> > > > > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_map);
> > > > > > > > > > > > +
> > > > > > > > > > > > +/**
> > > > > > > > > > > > + * virtio_dma_mapping_error - check dma address
> > > > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > > > + * @addr: DMA address
> > > > > > > > > > > > + *
> > > > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > > > + *
> > > > > > > > > > > > + * Returns 0 means dma valid. Other means invalid dma address.
> > > > > > > > > > > > + */
> > > > > > > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
> > > > > > > > > > > > +{
> > > > > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > > > > +
> > > > > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > > > > +               return 0;
> > > > > > > > > > > > +
> > > > > > > > > > > > +       return dma_mapping_error(vdev->dev.parent, addr);
> > > > > > > > > > > > +}
> > > > > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
> > > > > > > > > > > > +
> > > > > > > > > > > > +/**
> > > > > > > > > > > > + * virtio_dma_unmap - unmap DMA addr
> > > > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > > > + * @dma: DMA address
> > > > > > > > > > > > + * @length: memory length
> > > > > > > > > > > > + * @dir: DMA direction
> > > > > > > > > > > > + *
> > > > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > > > + */
> > > > > > > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > > > > > > +                     enum dma_data_direction dir)
> > > > > > > > > > > > +{
> > > > > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > > > > +
> > > > > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > > > > +               return;
> > > > > > > > > > > > +
> > > > > > > > > > > > +       dma_unmap_page(vdev->dev.parent, dma, length, dir);
> > > > > > > > > > > > +}
> > > > > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> > > > > > > > > > > > +
> > > > > > > > > > > >  MODULE_LICENSE("GPL");
> > > > > > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > > > > > index 3ebb346ebb7c..b5fa71476737 100644
> > > > > > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > > > > > @@ -9,6 +9,7 @@
> > > > > > > > > > > >  #include <linux/device.h>
> > > > > > > > > > > >  #include <linux/mod_devicetable.h>
> > > > > > > > > > > >  #include <linux/gfp.h>
> > > > > > > > > > > > +#include <linux/dma-mapping.h>
> > > > > > > > > > > >
> > > > > > > > > > > >  /**
> > > > > > > > > > > >   * struct virtqueue - a queue to register buffers for sending or receiving.
> > > > > > > > > > > > @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
> > > > > > > > > > > >  #define module_virtio_driver(__virtio_driver) \
> > > > > > > > > > > >         module_driver(__virtio_driver, register_virtio_driver, \
> > > > > > > > > > > >                         unregister_virtio_driver)
> > > > > > > > > > > > +
> > > > > > > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > > > > > > +                              unsigned int length, enum dma_data_direction dir);
> > > > > > > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > > > > > > +                         enum dma_data_direction dir);
> > > > > > > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> > > > > > > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > > > > > > +                     enum dma_data_direction dir);
> > > > > > > > > > > >  #endif /* _LINUX_VIRTIO_H */
> > > > > > > > > > > > --
> > > > > > > > > > > > 2.32.0.3.g01195cf9f
> > > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > _______________________________________________
> > > > > > > > Virtualization mailing list
> > > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > >
> > > > > >
> > > > >
> > >
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
  2023-03-02  7:56                         ` Jason Wang
@ 2023-03-02 11:08                           ` Xuan Zhuo
  0 siblings, 0 replies; 46+ messages in thread
From: Xuan Zhuo @ 2023-03-02 11:08 UTC (permalink / raw)
  To: Jason Wang
  Cc: maciej.fijalkowski, Michael S. Tsirkin, virtualization, bjorn,
	jonathan.lemon, magnus.karlsson

On Thu, 2 Mar 2023 15:56:42 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Thu, Mar 2, 2023 at 3:34 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Thu, 2 Mar 2023 14:56:11 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Thu, Mar 2, 2023 at 2:09 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Thu, Mar 02, 2023 at 11:26:53AM +0800, Jason Wang wrote:
> > > > > On Thu, Mar 2, 2023 at 11:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > On Thu, 2 Mar 2023 11:05:26 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > On Thu, Mar 2, 2023 at 10:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > >
> > > > > > > > On Tue, 28 Feb 2023 19:15:23 +0800, Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > > On Tue, 21 Feb 2023 09:51:07 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > > On Mon, Feb 20, 2023 at 3:02 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > On Mon, 20 Feb 2023 13:38:24 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > > > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > > Added virtio_dma_map() to map DMA addresses for virtual memory in
> > > > > > > > > > > > > advance. The purpose is to keep memory mapped across multiple add/get
> > > > > > > > > > > > > buf operations.
> > > > > > > > > > > >
> > > > > > > > > > > > I wonder if instead of exporting helpers like this, it might be simple
> > > > > > > > > > > > to just export dma_dev then the upper layer can use DMA API at will?
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > The reason for not doing this, Virtio is not just using DMA_DEV to mapp, but
> > > > > > > > > > > also check whether DMA is used.
> > > > > > > > > >
> > > > > > > > > > We should let the DMA API decide by exporting a correct dma_dev. E.g
> > > > > > > > > > when ACCESS_PLATFORM is not negotiated, advertising a DMA dev without
> > > > > > > > > > dma_ops.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > Do you mean we provide this API?
> > > > > > > > >
> > > > > > > > > virtio_get_dma_dev()
> > > > > > > > >
> > > > > > > > > If it returns NULL, the caller will use the physical memory address directly. If
> > > > > > > > > this func return a dma_dev, the caller should use DMA API.
> > > > > > > >
> > > > > > > >
> > > > > > > > cc the XDP_SOCKET's maintainers.
> > > > > > > >
> > > > > > > > First of all, Jason does not want to encapsulate the API of DMA by Virtio. It is
> > > > > > > > best to pass DMA Device to XSK, XSK uses DMA API for DMA MAP operation directly.
> > > > > > > > I agree with this idea.
> > > > > > > >
> > > > > > > > However, there are several problems under Virtio:
> > > > > > > > 1. In some virtualization scenarios, we do not have to perform DMA operations,
> > > > > > > >    just use the physical address directly.
> > > > > > >
> > > > > > > This is not a problem, we can simply return the virtio device itself
> > > > > > > as the DMA device in this case. Since there's no DMA ops attached, DMA
> > > > > > > API will use physical address in this case.
> > > > > >
> > > > > > Is this like this? So why do we have to deal with it in Virtio Ring? Let me
> > > > > > learn it.
> > > > >
> > > > > It has a long debate and I can't recall too many details. (You can
> > > > > search the archives). Michael may show more thoughts here.
> > > > >
> > > > > One concern is the overhead of the DMA API that needs to be benchmarked.
> > > > >
> > > > > Thanks
> > > >
> > > > Concern with what?
> > >
> > > Always use the DMA API for virtio devices by drop vq->use_dma_api.
> >
> > Do you mean the affects of the AF_XDP performance?
>
> No, I was replied to your question:
>
> "
> Is this like this? So why do we have to deal with it in Virtio Ring?
> "
>
> Try to answer why we don't do that in the virtio ring level but try to
> have virtio's own dma helper like vring_map_one_sg().

I see.

Thanks.


>
> > I think this
> > may not be a key issue, because DMA is completed in advance, and there is no
> > DMA operation on the data path. So the overhead is not big.
>
> Right.
>
> Thanks
>
> >
> > Thanks.
> >
> >
> > >
> > > For the device that doesn't need quirk, advertise the virtio device as
> > > its dma device.
> > >
> > > Thanks
> > >
> > > > This patch does not change devices, they are using
> > > > the existing API. Xuan Zhuo already showed a benchmark result for AF_XDP.
> > > >
> > > >
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > > 2. The latest Virtio Core supports each rx/tx queue with one DMA device.
> > > > > > > >    Generally, the physical network card has only one device. All queues use
> > > > > > > >    it for DMA operation.
> > > > > > >
> > > > > > > I'm not sure this is a big deal, we just need to use the per virtqueue
> > > > > > > dma device to use DMA API.
> > > > > >
> > > > > > Yes.
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > So I consider this problem again, Virtio Core provides only one API.
> > > > > > > >
> > > > > > > > * virtio_get_dma_dev(queue)
> > > > > > > >
> > > > > > > > If the return value is NULL, it means that there is no DMA operation. If it is
> > > > > > > > not NULL, use DMA API for DMA operation.
> > > > > > > >
> > > > > > > > The modification of XSK is like this. We may pass NULL as dev to xp_dma_map().
> > > > > > > > If dev is NULL, then there is no need to perform DMA and Sync operations.
> > > > > > > > Otherwise, it will perform DMA operations like other devices.
> > > > > > >
> > > > > > > As discussed above, it might be easier:
> > > > > > >
> > > > > > >     if (!virtio_has_feature(VIRTIO_F_ACCESS_PLATFORM))
> > > > > > >         return virtio_device;
> > > > > > >     else
> > > > > > >         return vring_dma_dev();
> > > > > >
> > > > > > Yes, according to Jason's opinion, then XSK not need to do any modifications.
> > > > > >
> > > > > > Thanks.
> > > >
> > > > Yes AF_XDP does not need the per VQ device hack.
> > > > We should probably rethink it.
> > > >
> > > > But as far as implementation goes, poking at VIRTIO_F_ACCESS_PLATFORM
> > > > is wrong. Please use virtio_has_dma_quirk.
> > > >
> > > >
> > > >
> > > > > > >
> > > > > > > >
> > > > > > > > And if the dma_dev of rx and tx is different, then we can only disable
> > > > > > > > XDP_SOCKET.
> > > > > > >
> > > > > > > We can start with this.
> > > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > > >
> > > > > > > > Looking forward to everyone's reply.
> > > > > > > >
> > > > > > > > Thanks.
> > > > > > > >
> > > > > > > > >
> > > > > > > > > Thanks.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Thanks
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > (Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?)
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > Added virtio_dma_unmap() for unmap DMA address.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > > > > > > ---
> > > > > > > > > > > > >  drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
> > > > > > > > > > > > >  include/linux/virtio.h       |  9 ++++
> > > > > > > > > > > > >  2 files changed, 101 insertions(+)
> > > > > > > > > > > > >
> > > > > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > > index cd9364eb2345..855338609c7f 100644
> > > > > > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > > > > > @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > > > > > > > > > > > >  }
> > > > > > > > > > > > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring);
> > > > > > > > > > > > >
> > > > > > > > > > > > > +/**
> > > > > > > > > > > > > + * virtio_dma_map_page - get the DMA addr of the memory for virtio device
> > > > > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > > > > + * @page: the page of the memory to DMA
> > > > > > > > > > > > > + * @offset: the offset of the memory inside page
> > > > > > > > > > > > > + * @length: memory length
> > > > > > > > > > > > > + * @dir: DMA direction
> > > > > > > > > > > > > + *
> > > > > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > > > > + *
> > > > > > > > > > > > > + * Returns the DMA addr. DMA_MAPPING_ERROR means error.
> > > > > > > > > > > > > + */
> > > > > > > > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > > > > > > > +                              unsigned int length, enum dma_data_direction dir)
> > > > > > > > > > > > > +{
> > > > > > > > > > > >
> > > > > > > > > > > > This (and the reset) needs to be done per virtqueue instead per device
> > > > > > > > > > > > after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per
> > > > > > > > > > > > virtqueue dma device").
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > YES.
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > > > > > +               return page_to_phys(page) + offset;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +       return dma_map_page(vdev->dev.parent, page, offset, length, dir);
> > > > > > > > > > > > > +}
> > > > > > > > > > > >
> > > > > > > > > > > > Need either inline or EXPORT_SYMBOL_GPL() here.
> > > > > > > > > > >
> > > > > > > > > > > Because I did not use this interface, I did not  export it.
> > > > > > > > > > >
> > > > > > > > > > > Thanks.
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > Thanks
> > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +/**
> > > > > > > > > > > > > + * virtio_dma_map - get the DMA addr of the memory for virtio device
> > > > > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > > > > + * @addr: the addr to DMA
> > > > > > > > > > > > > + * @length: memory length
> > > > > > > > > > > > > + * @dir: DMA direction
> > > > > > > > > > > > > + *
> > > > > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > > > > + *
> > > > > > > > > > > > > + * Returns the DMA addr.
> > > > > > > > > > > > > + */
> > > > > > > > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > > > > > > > +                         enum dma_data_direction dir)
> > > > > > > > > > > > > +{
> > > > > > > > > > > > > +       struct page *page;
> > > > > > > > > > > > > +       size_t offset;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +       page = virt_to_page(addr);
> > > > > > > > > > > > > +       offset = offset_in_page(addr);
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +       return virtio_dma_map_page(dev, page, offset, length, dir);
> > > > > > > > > > > > > +}
> > > > > > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_map);
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +/**
> > > > > > > > > > > > > + * virtio_dma_mapping_error - check dma address
> > > > > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > > > > + * @addr: DMA address
> > > > > > > > > > > > > + *
> > > > > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > > > > + *
> > > > > > > > > > > > > + * Returns 0 means dma valid. Other means invalid dma address.
> > > > > > > > > > > > > + */
> > > > > > > > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
> > > > > > > > > > > > > +{
> > > > > > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > > > > > +               return 0;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +       return dma_mapping_error(vdev->dev.parent, addr);
> > > > > > > > > > > > > +}
> > > > > > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +/**
> > > > > > > > > > > > > + * virtio_dma_unmap - unmap DMA addr
> > > > > > > > > > > > > + * @dev: virtio device
> > > > > > > > > > > > > + * @dma: DMA address
> > > > > > > > > > > > > + * @length: memory length
> > > > > > > > > > > > > + * @dir: DMA direction
> > > > > > > > > > > > > + *
> > > > > > > > > > > > > + * This API is only for pre-mapped buffers, for non premapped buffers virtio
> > > > > > > > > > > > > + * core handles DMA API internally.
> > > > > > > > > > > > > + */
> > > > > > > > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > > > > > > > +                     enum dma_data_direction dir)
> > > > > > > > > > > > > +{
> > > > > > > > > > > > > +       struct virtio_device *vdev = dev_to_virtio(dev);
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +       if (!vring_use_dma_api(vdev))
> > > > > > > > > > > > > +               return;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +       dma_unmap_page(vdev->dev.parent, dma, length, dir);
> > > > > > > > > > > > > +}
> > > > > > > > > > > > > +EXPORT_SYMBOL_GPL(virtio_dma_unmap);
> > > > > > > > > > > > > +
> > > > > > > > > > > > >  MODULE_LICENSE("GPL");
> > > > > > > > > > > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > > > > > > > > > > > > index 3ebb346ebb7c..b5fa71476737 100644
> > > > > > > > > > > > > --- a/include/linux/virtio.h
> > > > > > > > > > > > > +++ b/include/linux/virtio.h
> > > > > > > > > > > > > @@ -9,6 +9,7 @@
> > > > > > > > > > > > >  #include <linux/device.h>
> > > > > > > > > > > > >  #include <linux/mod_devicetable.h>
> > > > > > > > > > > > >  #include <linux/gfp.h>
> > > > > > > > > > > > > +#include <linux/dma-mapping.h>
> > > > > > > > > > > > >
> > > > > > > > > > > > >  /**
> > > > > > > > > > > > >   * struct virtqueue - a queue to register buffers for sending or receiving.
> > > > > > > > > > > > > @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
> > > > > > > > > > > > >  #define module_virtio_driver(__virtio_driver) \
> > > > > > > > > > > > >         module_driver(__virtio_driver, register_virtio_driver, \
> > > > > > > > > > > > >                         unregister_virtio_driver)
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset,
> > > > > > > > > > > > > +                              unsigned int length, enum dma_data_direction dir);
> > > > > > > > > > > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
> > > > > > > > > > > > > +                         enum dma_data_direction dir);
> > > > > > > > > > > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
> > > > > > > > > > > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
> > > > > > > > > > > > > +                     enum dma_data_direction dir);
> > > > > > > > > > > > >  #endif /* _LINUX_VIRTIO_H */
> > > > > > > > > > > > > --
> > > > > > > > > > > > > 2.32.0.3.g01195cf9f
> > > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > _______________________________________________
> > > > > > > > > Virtualization mailing list
> > > > > > > > > Virtualization@lists.linux-foundation.org
> > > > > > > > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
> > > > > > > >
> > > > > > >
> > > > > >
> > > >
> > >
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 46+ messages in thread

end of thread, other threads:[~2023-03-02 11:09 UTC | newest]

Thread overview: 46+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-14  7:26 [PATCH vhost 00/10] virtio core prepares for AF_XDP Xuan Zhuo
2023-02-14  7:26 ` [PATCH vhost 01/10] virtio_ring: split: refactor virtqueue_add_split() for premapped Xuan Zhuo
2023-02-20  5:37   ` Jason Wang
2023-02-20  6:57     ` Xuan Zhuo
2023-02-20 12:12     ` Michael S. Tsirkin
2023-02-14  7:26 ` [PATCH vhost 02/10] virtio_ring: packed: separate prepare code from virtuque_add_indirect_packed() Xuan Zhuo
2023-02-20  5:37   ` Jason Wang
2023-02-20  6:56     ` Xuan Zhuo
2023-02-14  7:26 ` [PATCH vhost 03/10] virtio_ring: packed: refactor virtqueue_add_packed() for premapped Xuan Zhuo
2023-02-20  5:37   ` Jason Wang
2023-02-14  7:26 ` [PATCH vhost 04/10] virtio_ring: split: introduce virtqueue_add_split_premapped() Xuan Zhuo
2023-02-20  5:38   ` Jason Wang
2023-02-20  6:43     ` Xuan Zhuo
2023-02-21  1:49       ` Jason Wang
2023-02-14  7:26 ` [PATCH vhost 05/10] virtio_ring: packed: introduce virtqueue_add_packed_premapped() Xuan Zhuo
2023-02-14  7:27 ` [PATCH vhost 06/10] virtio_ring: introduce virtqueue_add_inbuf_premapped() Xuan Zhuo
2023-02-14  7:27 ` [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma Xuan Zhuo
2023-02-20  5:38   ` Jason Wang
2023-02-20  6:59     ` Xuan Zhuo
2023-02-21  1:51       ` Jason Wang
2023-02-28 11:15         ` Xuan Zhuo
2023-03-02  2:04           ` Xuan Zhuo
2023-03-02  3:05             ` Jason Wang
2023-03-02  3:21               ` Xuan Zhuo
2023-03-02  3:26                 ` Jason Wang
2023-03-02  6:09                   ` Michael S. Tsirkin
2023-03-02  6:43                     ` Xuan Zhuo
2023-03-02  6:56                     ` Jason Wang
2023-03-02  7:31                       ` Xuan Zhuo
2023-03-02  7:56                         ` Jason Wang
2023-03-02 11:08                           ` Xuan Zhuo
2023-03-01 11:47         ` Xuan Zhuo
2023-02-14  7:27 ` [PATCH vhost 08/10] virtio_ring: introduce dma sync api for virtio Xuan Zhuo
2023-02-20  5:38   ` Jason Wang
2023-02-20  7:04     ` Xuan Zhuo
2023-02-21  1:52       ` Jason Wang
2023-02-14  7:27 ` [PATCH vhost 09/10] virtio_ring: correct the expression of the description of virtqueue_resize() Xuan Zhuo
2023-02-20  5:38   ` Jason Wang
2023-02-14  7:27 ` [PATCH vhost 10/10] virtio_ring: introduce virtqueue_reset() Xuan Zhuo
2023-02-20  5:38   ` Jason Wang
2023-02-20  7:03     ` Xuan Zhuo
2023-02-21  1:51       ` Jason Wang
2023-02-16  5:27 ` [PATCH vhost 00/10] virtio core prepares for AF_XDP Jason Wang
2023-02-16 11:46   ` Xuan Zhuo
2023-02-17  5:23     ` Jason Wang
2023-02-17  9:02       ` Xuan Zhuo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.