* [PATCH vhost v12 01/10] virtio_ring: check use_dma_api before unmap desc for indirect
2023-07-19 4:04 [PATCH vhost v12 00/10] virtio core prepares for AF_XDP Xuan Zhuo
@ 2023-07-19 4:04 ` Xuan Zhuo
2023-07-19 4:04 ` [PATCH vhost v12 02/10] virtio_ring: put mapping error check in vring_map_one_sg Xuan Zhuo
` (8 subsequent siblings)
9 siblings, 0 replies; 14+ messages in thread
From: Xuan Zhuo @ 2023-07-19 4:04 UTC (permalink / raw)
To: virtualization
Cc: Michael S. Tsirkin, Jason Wang, Xuan Zhuo, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend, netdev,
bpf, Christoph Hellwig
Inside detach_buf_split(), if use_dma_api is false,
vring_unmap_one_split_indirect will be called many times, but actually
nothing is done. So this patch check use_dma_api firstly.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
drivers/virtio/virtio_ring.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index c5310eaf8b46..f8754f1d64d3 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -774,8 +774,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
VRING_DESC_F_INDIRECT));
BUG_ON(len == 0 || len % sizeof(struct vring_desc));
- for (j = 0; j < len / sizeof(struct vring_desc); j++)
- vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+ if (vq->use_dma_api) {
+ for (j = 0; j < len / sizeof(struct vring_desc); j++)
+ vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+ }
kfree(indir_desc);
vq->split.desc_state[head].indir_desc = NULL;
--
2.32.0.3.g01195cf9f
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH vhost v12 02/10] virtio_ring: put mapping error check in vring_map_one_sg
2023-07-19 4:04 [PATCH vhost v12 00/10] virtio core prepares for AF_XDP Xuan Zhuo
2023-07-19 4:04 ` [PATCH vhost v12 01/10] virtio_ring: check use_dma_api before unmap desc for indirect Xuan Zhuo
@ 2023-07-19 4:04 ` Xuan Zhuo
2023-07-19 4:04 ` [PATCH vhost v12 03/10] virtio_ring: introduce virtqueue_set_dma_premapped() Xuan Zhuo
` (7 subsequent siblings)
9 siblings, 0 replies; 14+ messages in thread
From: Xuan Zhuo @ 2023-07-19 4:04 UTC (permalink / raw)
To: virtualization
Cc: Michael S. Tsirkin, Jason Wang, Xuan Zhuo, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend, netdev,
bpf, Christoph Hellwig
This patch put the dma addr error check in vring_map_one_sg().
The benefits of doing this:
1. reduce one judgment of vq->use_dma_api.
2. make vring_map_one_sg more simple, without calling
vring_mapping_error to check the return value. simplifies subsequent
code
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
drivers/virtio/virtio_ring.c | 37 +++++++++++++++++++++---------------
1 file changed, 22 insertions(+), 15 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index f8754f1d64d3..87d7ceeecdbd 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -355,9 +355,8 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
}
/* Map one sg entry. */
-static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
- struct scatterlist *sg,
- enum dma_data_direction direction)
+static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
+ enum dma_data_direction direction, dma_addr_t *addr)
{
if (!vq->use_dma_api) {
/*
@@ -366,7 +365,8 @@ static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
* depending on the direction.
*/
kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction);
- return (dma_addr_t)sg_phys(sg);
+ *addr = (dma_addr_t)sg_phys(sg);
+ return 0;
}
/*
@@ -374,9 +374,14 @@ static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
* the way it expects (we don't guarantee that the scatterlist
* will exist for the lifetime of the mapping).
*/
- return dma_map_page(vring_dma_dev(vq),
+ *addr = dma_map_page(vring_dma_dev(vq),
sg_page(sg), sg->offset, sg->length,
direction);
+
+ if (dma_mapping_error(vring_dma_dev(vq), *addr))
+ return -ENOMEM;
+
+ return 0;
}
static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
@@ -588,8 +593,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
for (n = 0; n < out_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
- if (vring_mapping_error(vq, addr))
+ dma_addr_t addr;
+
+ if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr))
goto unmap_release;
prev = i;
@@ -603,8 +609,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
}
for (; n < (out_sgs + in_sgs); n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
- if (vring_mapping_error(vq, addr))
+ dma_addr_t addr;
+
+ if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr))
goto unmap_release;
prev = i;
@@ -1281,9 +1288,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
for (n = 0; n < out_sgs + in_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- addr = vring_map_one_sg(vq, sg, n < out_sgs ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (vring_mapping_error(vq, addr))
+ if (vring_map_one_sg(vq, sg, n < out_sgs ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr))
goto unmap_release;
desc[i].flags = cpu_to_le16(n < out_sgs ?
@@ -1428,9 +1434,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
c = 0;
for (n = 0; n < out_sgs + in_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (vring_mapping_error(vq, addr))
+ dma_addr_t addr;
+
+ if (vring_map_one_sg(vq, sg, n < out_sgs ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr))
goto unmap_release;
flags = cpu_to_le16(vq->packed.avail_used_flags |
--
2.32.0.3.g01195cf9f
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH vhost v12 03/10] virtio_ring: introduce virtqueue_set_dma_premapped()
2023-07-19 4:04 [PATCH vhost v12 00/10] virtio core prepares for AF_XDP Xuan Zhuo
2023-07-19 4:04 ` [PATCH vhost v12 01/10] virtio_ring: check use_dma_api before unmap desc for indirect Xuan Zhuo
2023-07-19 4:04 ` [PATCH vhost v12 02/10] virtio_ring: put mapping error check in vring_map_one_sg Xuan Zhuo
@ 2023-07-19 4:04 ` Xuan Zhuo
2023-07-19 4:04 ` [PATCH vhost v12 04/10] virtio_ring: support add premapped buf Xuan Zhuo
` (6 subsequent siblings)
9 siblings, 0 replies; 14+ messages in thread
From: Xuan Zhuo @ 2023-07-19 4:04 UTC (permalink / raw)
To: virtualization
Cc: Michael S. Tsirkin, Jason Wang, Xuan Zhuo, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend, netdev,
bpf, Christoph Hellwig
This helper allows the driver change the dma mode to premapped mode.
Under the premapped mode, the virtio core do not do dma mapping
internally.
This just work when the use_dma_api is true. If the use_dma_api is false,
the dma options is not through the DMA APIs, that is not the standard
way of the linux kernel.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++
include/linux/virtio.h | 2 ++
2 files changed, 55 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 87d7ceeecdbd..8e81b01e0735 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -172,6 +172,9 @@ struct vring_virtqueue {
/* Host publishes avail event idx */
bool event;
+ /* Do DMA mapping by driver */
+ bool premapped;
+
/* Head of free buffer list. */
unsigned int free_head;
/* Number we've added since last sync. */
@@ -2061,6 +2064,7 @@ static struct virtqueue *vring_create_virtqueue_packed(
vq->packed_ring = true;
vq->dma_dev = dma_dev;
vq->use_dma_api = vring_use_dma_api(vdev);
+ vq->premapped = false;
vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
!context;
@@ -2550,6 +2554,7 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
#endif
vq->dma_dev = dma_dev;
vq->use_dma_api = vring_use_dma_api(vdev);
+ vq->premapped = false;
vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
!context;
@@ -2693,6 +2698,54 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
}
EXPORT_SYMBOL_GPL(virtqueue_resize);
+/**
+ * virtqueue_set_dma_premapped - set the vring premapped mode
+ * @_vq: the struct virtqueue we're talking about.
+ *
+ * Enable the premapped mode of the vq.
+ *
+ * The vring in premapped mode does not do dma internally, so the driver must
+ * do dma mapping in advance. The driver must pass the dma_address through
+ * dma_address of scatterlist. When the driver got a used buffer from
+ * the vring, it has to unmap the dma address.
+ *
+ * This function must be called immediately after creating the vq, or after vq
+ * reset, and before adding any buffers to it.
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error.
+ * 0: success.
+ * -EINVAL: vring does not use the dma api, so we can not enable premapped mode.
+ */
+int virtqueue_set_dma_premapped(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ u32 num;
+
+ START_USE(vq);
+
+ num = vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
+
+ if (num != vq->vq.num_free) {
+ END_USE(vq);
+ return -EINVAL;
+ }
+
+ if (!vq->use_dma_api) {
+ END_USE(vq);
+ return -EINVAL;
+ }
+
+ vq->premapped = true;
+
+ END_USE(vq);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(virtqueue_set_dma_premapped);
+
/* Only available for split ring */
struct virtqueue *vring_new_virtqueue(unsigned int index,
unsigned int num,
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index de6041deee37..8add38038877 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -78,6 +78,8 @@ bool virtqueue_enable_cb(struct virtqueue *vq);
unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq);
+int virtqueue_set_dma_premapped(struct virtqueue *_vq);
+
bool virtqueue_poll(struct virtqueue *vq, unsigned);
bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
--
2.32.0.3.g01195cf9f
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH vhost v12 04/10] virtio_ring: support add premapped buf
2023-07-19 4:04 [PATCH vhost v12 00/10] virtio core prepares for AF_XDP Xuan Zhuo
` (2 preceding siblings ...)
2023-07-19 4:04 ` [PATCH vhost v12 03/10] virtio_ring: introduce virtqueue_set_dma_premapped() Xuan Zhuo
@ 2023-07-19 4:04 ` Xuan Zhuo
2023-07-19 4:04 ` [PATCH vhost v12 05/10] virtio_ring: introduce virtqueue_dma_dev() Xuan Zhuo
` (5 subsequent siblings)
9 siblings, 0 replies; 14+ messages in thread
From: Xuan Zhuo @ 2023-07-19 4:04 UTC (permalink / raw)
To: virtualization
Cc: Michael S. Tsirkin, Jason Wang, Xuan Zhuo, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend, netdev,
bpf, Christoph Hellwig
If the vq is the premapped mode, use the sg_dma_address() directly.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 8e81b01e0735..f9f772e85a38 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -361,6 +361,11 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
enum dma_data_direction direction, dma_addr_t *addr)
{
+ if (vq->premapped) {
+ *addr = sg_dma_address(sg);
+ return 0;
+ }
+
if (!vq->use_dma_api) {
/*
* If DMA is not used, KMSAN doesn't know that the scatterlist
@@ -639,8 +644,12 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
dma_addr_t addr = vring_map_single(
vq, desc, total_sg * sizeof(struct vring_desc),
DMA_TO_DEVICE);
- if (vring_mapping_error(vq, addr))
+ if (vring_mapping_error(vq, addr)) {
+ if (vq->premapped)
+ goto free_indirect;
+
goto unmap_release;
+ }
virtqueue_add_desc_split(_vq, vq->split.vring.desc,
head, addr,
@@ -706,6 +715,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
i = vring_unmap_one_split(vq, i);
}
+free_indirect:
if (indirect)
kfree(desc);
@@ -1307,8 +1317,12 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
addr = vring_map_single(vq, desc,
total_sg * sizeof(struct vring_packed_desc),
DMA_TO_DEVICE);
- if (vring_mapping_error(vq, addr))
+ if (vring_mapping_error(vq, addr)) {
+ if (vq->premapped)
+ goto free_desc;
+
goto unmap_release;
+ }
vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
@@ -1366,6 +1380,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
for (i = 0; i < err_idx; i++)
vring_unmap_desc_packed(vq, &desc[i]);
+free_desc:
kfree(desc);
END_USE(vq);
--
2.32.0.3.g01195cf9f
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH vhost v12 05/10] virtio_ring: introduce virtqueue_dma_dev()
2023-07-19 4:04 [PATCH vhost v12 00/10] virtio core prepares for AF_XDP Xuan Zhuo
` (3 preceding siblings ...)
2023-07-19 4:04 ` [PATCH vhost v12 04/10] virtio_ring: support add premapped buf Xuan Zhuo
@ 2023-07-19 4:04 ` Xuan Zhuo
2023-07-19 4:04 ` [PATCH vhost v12 06/10] virtio_ring: skip unmap for premapped Xuan Zhuo
` (4 subsequent siblings)
9 siblings, 0 replies; 14+ messages in thread
From: Xuan Zhuo @ 2023-07-19 4:04 UTC (permalink / raw)
To: virtualization
Cc: Michael S. Tsirkin, Jason Wang, Xuan Zhuo, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend, netdev,
bpf, Christoph Hellwig
Added virtqueue_dma_dev() to get DMA device for virtio. Then the
caller can do dma operation in advance. The purpose is to keep memory
mapped across multiple add/get buf operations.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
drivers/virtio/virtio_ring.c | 17 +++++++++++++++++
include/linux/virtio.h | 2 ++
2 files changed, 19 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index f9f772e85a38..bb3d73d221cd 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2265,6 +2265,23 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
}
EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
+/**
+ * virtqueue_dma_dev - get the dma dev
+ * @_vq: the struct virtqueue we're talking about.
+ *
+ * Returns the dma dev. That can been used for dma api.
+ */
+struct device *virtqueue_dma_dev(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+ if (vq->use_dma_api)
+ return vring_dma_dev(vq);
+ else
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(virtqueue_dma_dev);
+
/**
* virtqueue_kick_prepare - first half of split virtqueue_kick call.
* @_vq: the struct virtqueue
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 8add38038877..bd55a05eec04 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -61,6 +61,8 @@ int virtqueue_add_sgs(struct virtqueue *vq,
void *data,
gfp_t gfp);
+struct device *virtqueue_dma_dev(struct virtqueue *vq);
+
bool virtqueue_kick(struct virtqueue *vq);
bool virtqueue_kick_prepare(struct virtqueue *vq);
--
2.32.0.3.g01195cf9f
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH vhost v12 06/10] virtio_ring: skip unmap for premapped
2023-07-19 4:04 [PATCH vhost v12 00/10] virtio core prepares for AF_XDP Xuan Zhuo
` (4 preceding siblings ...)
2023-07-19 4:04 ` [PATCH vhost v12 05/10] virtio_ring: introduce virtqueue_dma_dev() Xuan Zhuo
@ 2023-07-19 4:04 ` Xuan Zhuo
2023-07-19 4:04 ` [PATCH vhost v12 07/10] virtio_ring: correct the expression of the description of virtqueue_resize() Xuan Zhuo
` (3 subsequent siblings)
9 siblings, 0 replies; 14+ messages in thread
From: Xuan Zhuo @ 2023-07-19 4:04 UTC (permalink / raw)
To: virtualization
Cc: Michael S. Tsirkin, Jason Wang, Xuan Zhuo, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend, netdev,
bpf, Christoph Hellwig
Now we add a case where we skip dma unmap, the vq->premapped is true.
We can't just rely on use_dma_api to determine whether to skip the dma
operation. For convenience, I introduced the "do_unmap". By default, it
is the same as use_dma_api. If the driver is configured with premapped,
then do_unmap is false.
So as long as do_unmap is false, for addr of desc, we should skip dma
unmap operation.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 42 ++++++++++++++++++++++++------------
1 file changed, 28 insertions(+), 14 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index bb3d73d221cd..7973814b6e31 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -175,6 +175,11 @@ struct vring_virtqueue {
/* Do DMA mapping by driver */
bool premapped;
+ /* Do unmap or not for desc. Just when premapped is False and
+ * use_dma_api is true, this is true.
+ */
+ bool do_unmap;
+
/* Head of free buffer list. */
unsigned int free_head;
/* Number we've added since last sync. */
@@ -440,7 +445,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
{
u16 flags;
- if (!vq->use_dma_api)
+ if (!vq->do_unmap)
return;
flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
@@ -458,18 +463,21 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
struct vring_desc_extra *extra = vq->split.desc_extra;
u16 flags;
- if (!vq->use_dma_api)
- goto out;
-
flags = extra[i].flags;
if (flags & VRING_DESC_F_INDIRECT) {
+ if (!vq->use_dma_api)
+ goto out;
+
dma_unmap_single(vring_dma_dev(vq),
extra[i].addr,
extra[i].len,
(flags & VRING_DESC_F_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
} else {
+ if (!vq->do_unmap)
+ goto out;
+
dma_unmap_page(vring_dma_dev(vq),
extra[i].addr,
extra[i].len,
@@ -635,7 +643,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
}
/* Last one doesn't continue. */
desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
- if (!indirect && vq->use_dma_api)
+ if (!indirect && vq->do_unmap)
vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
~VRING_DESC_F_NEXT;
@@ -794,7 +802,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
VRING_DESC_F_INDIRECT));
BUG_ON(len == 0 || len % sizeof(struct vring_desc));
- if (vq->use_dma_api) {
+ if (vq->do_unmap) {
for (j = 0; j < len / sizeof(struct vring_desc); j++)
vring_unmap_one_split_indirect(vq, &indir_desc[j]);
}
@@ -1217,17 +1225,20 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
{
u16 flags;
- if (!vq->use_dma_api)
- return;
-
flags = extra->flags;
if (flags & VRING_DESC_F_INDIRECT) {
+ if (!vq->use_dma_api)
+ return;
+
dma_unmap_single(vring_dma_dev(vq),
extra->addr, extra->len,
(flags & VRING_DESC_F_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
} else {
+ if (!vq->do_unmap)
+ return;
+
dma_unmap_page(vring_dma_dev(vq),
extra->addr, extra->len,
(flags & VRING_DESC_F_WRITE) ?
@@ -1240,7 +1251,7 @@ static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
{
u16 flags;
- if (!vq->use_dma_api)
+ if (!vq->do_unmap)
return;
flags = le16_to_cpu(desc->flags);
@@ -1329,7 +1340,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
sizeof(struct vring_packed_desc));
vq->packed.vring.desc[head].id = cpu_to_le16(id);
- if (vq->use_dma_api) {
+ if (vq->do_unmap) {
vq->packed.desc_extra[id].addr = addr;
vq->packed.desc_extra[id].len = total_sg *
sizeof(struct vring_packed_desc);
@@ -1470,7 +1481,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
desc[i].len = cpu_to_le32(sg->length);
desc[i].id = cpu_to_le16(id);
- if (unlikely(vq->use_dma_api)) {
+ if (unlikely(vq->do_unmap)) {
vq->packed.desc_extra[curr].addr = addr;
vq->packed.desc_extra[curr].len = sg->length;
vq->packed.desc_extra[curr].flags =
@@ -1604,7 +1615,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
vq->free_head = id;
vq->vq.num_free += state->num;
- if (unlikely(vq->use_dma_api)) {
+ if (unlikely(vq->do_unmap)) {
curr = id;
for (i = 0; i < state->num; i++) {
vring_unmap_extra_packed(vq,
@@ -1621,7 +1632,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
if (!desc)
return;
- if (vq->use_dma_api) {
+ if (vq->do_unmap) {
len = vq->packed.desc_extra[id].len;
for (i = 0; i < len / sizeof(struct vring_packed_desc);
i++)
@@ -2080,6 +2091,7 @@ static struct virtqueue *vring_create_virtqueue_packed(
vq->dma_dev = dma_dev;
vq->use_dma_api = vring_use_dma_api(vdev);
vq->premapped = false;
+ vq->do_unmap = vq->use_dma_api;
vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
!context;
@@ -2587,6 +2599,7 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
vq->dma_dev = dma_dev;
vq->use_dma_api = vring_use_dma_api(vdev);
vq->premapped = false;
+ vq->do_unmap = vq->use_dma_api;
vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
!context;
@@ -2771,6 +2784,7 @@ int virtqueue_set_dma_premapped(struct virtqueue *_vq)
}
vq->premapped = true;
+ vq->do_unmap = false;
END_USE(vq);
--
2.32.0.3.g01195cf9f
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH vhost v12 07/10] virtio_ring: correct the expression of the description of virtqueue_resize()
2023-07-19 4:04 [PATCH vhost v12 00/10] virtio core prepares for AF_XDP Xuan Zhuo
` (5 preceding siblings ...)
2023-07-19 4:04 ` [PATCH vhost v12 06/10] virtio_ring: skip unmap for premapped Xuan Zhuo
@ 2023-07-19 4:04 ` Xuan Zhuo
2023-07-19 4:04 ` [PATCH vhost v12 08/10] virtio_ring: separate the logic of reset/enable from virtqueue_resize Xuan Zhuo
` (2 subsequent siblings)
9 siblings, 0 replies; 14+ messages in thread
From: Xuan Zhuo @ 2023-07-19 4:04 UTC (permalink / raw)
To: virtualization
Cc: Michael S. Tsirkin, Jason Wang, Xuan Zhuo, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend, netdev,
bpf, Christoph Hellwig
Modify the "useless" to a more accurate "unused".
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
drivers/virtio/virtio_ring.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 7973814b6e31..fd9ae020e0a3 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2678,7 +2678,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma);
* virtqueue_resize - resize the vring of vq
* @_vq: the struct virtqueue we're talking about.
* @num: new ring num
- * @recycle: callback for recycle the useless buffer
+ * @recycle: callback to recycle unused buffers
*
* When it is really necessary to create a new vring, it will set the current vq
* into the reset state. Then call the passed callback to recycle the buffer
--
2.32.0.3.g01195cf9f
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH vhost v12 08/10] virtio_ring: separate the logic of reset/enable from virtqueue_resize
2023-07-19 4:04 [PATCH vhost v12 00/10] virtio core prepares for AF_XDP Xuan Zhuo
` (6 preceding siblings ...)
2023-07-19 4:04 ` [PATCH vhost v12 07/10] virtio_ring: correct the expression of the description of virtqueue_resize() Xuan Zhuo
@ 2023-07-19 4:04 ` Xuan Zhuo
2023-07-19 4:04 ` [PATCH vhost v12 09/10] virtio_ring: introduce virtqueue_reset() Xuan Zhuo
2023-07-19 4:04 ` [PATCH vhost v12 10/10] virtio_net: merge dma operations when filling mergeable buffers Xuan Zhuo
9 siblings, 0 replies; 14+ messages in thread
From: Xuan Zhuo @ 2023-07-19 4:04 UTC (permalink / raw)
To: virtualization
Cc: Michael S. Tsirkin, Jason Wang, Xuan Zhuo, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend, netdev,
bpf, Christoph Hellwig
The subsequent reset function will reuse these logic.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
drivers/virtio/virtio_ring.c | 58 ++++++++++++++++++++++++------------
1 file changed, 39 insertions(+), 19 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index fd9ae020e0a3..23172d98e48e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2152,6 +2152,43 @@ static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
return -ENOMEM;
}
+static int virtqueue_disable_and_recycle(struct virtqueue *_vq,
+ void (*recycle)(struct virtqueue *vq, void *buf))
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ struct virtio_device *vdev = vq->vq.vdev;
+ void *buf;
+ int err;
+
+ if (!vq->we_own_ring)
+ return -EPERM;
+
+ if (!vdev->config->disable_vq_and_reset)
+ return -ENOENT;
+
+ if (!vdev->config->enable_vq_after_reset)
+ return -ENOENT;
+
+ err = vdev->config->disable_vq_and_reset(_vq);
+ if (err)
+ return err;
+
+ while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
+ recycle(_vq, buf);
+
+ return 0;
+}
+
+static int virtqueue_enable_after_reset(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ struct virtio_device *vdev = vq->vq.vdev;
+
+ if (vdev->config->enable_vq_after_reset(_vq))
+ return -EBUSY;
+
+ return 0;
+}
/*
* Generic functions and exported symbols.
@@ -2702,13 +2739,8 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
void (*recycle)(struct virtqueue *vq, void *buf))
{
struct vring_virtqueue *vq = to_vvq(_vq);
- struct virtio_device *vdev = vq->vq.vdev;
- void *buf;
int err;
- if (!vq->we_own_ring)
- return -EPERM;
-
if (num > vq->vq.num_max)
return -E2BIG;
@@ -2718,28 +2750,16 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
return 0;
- if (!vdev->config->disable_vq_and_reset)
- return -ENOENT;
-
- if (!vdev->config->enable_vq_after_reset)
- return -ENOENT;
-
- err = vdev->config->disable_vq_and_reset(_vq);
+ err = virtqueue_disable_and_recycle(_vq, recycle);
if (err)
return err;
- while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
- recycle(_vq, buf);
-
if (vq->packed_ring)
err = virtqueue_resize_packed(_vq, num);
else
err = virtqueue_resize_split(_vq, num);
- if (vdev->config->enable_vq_after_reset(_vq))
- return -EBUSY;
-
- return err;
+ return virtqueue_enable_after_reset(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_resize);
--
2.32.0.3.g01195cf9f
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH vhost v12 09/10] virtio_ring: introduce virtqueue_reset()
2023-07-19 4:04 [PATCH vhost v12 00/10] virtio core prepares for AF_XDP Xuan Zhuo
` (7 preceding siblings ...)
2023-07-19 4:04 ` [PATCH vhost v12 08/10] virtio_ring: separate the logic of reset/enable from virtqueue_resize Xuan Zhuo
@ 2023-07-19 4:04 ` Xuan Zhuo
2023-07-19 4:04 ` [PATCH vhost v12 10/10] virtio_net: merge dma operations when filling mergeable buffers Xuan Zhuo
9 siblings, 0 replies; 14+ messages in thread
From: Xuan Zhuo @ 2023-07-19 4:04 UTC (permalink / raw)
To: virtualization
Cc: Michael S. Tsirkin, Jason Wang, Xuan Zhuo, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend, netdev,
bpf, Christoph Hellwig
Introduce virtqueue_reset() to release all buffer inside vq.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
drivers/virtio/virtio_ring.c | 33 +++++++++++++++++++++++++++++++++
include/linux/virtio.h | 2 ++
2 files changed, 35 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 23172d98e48e..639c20b19e06 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2812,6 +2812,39 @@ int virtqueue_set_dma_premapped(struct virtqueue *_vq)
}
EXPORT_SYMBOL_GPL(virtqueue_set_dma_premapped);
+/**
+ * virtqueue_reset - detach and recycle all unused buffers
+ * @_vq: the struct virtqueue we're talking about.
+ * @recycle: callback to recycle unused buffers
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error.
+ * 0: success.
+ * -EBUSY: Failed to sync with device, vq may not work properly
+ * -ENOENT: Transport or device not supported
+ * -EPERM: Operation not permitted
+ */
+int virtqueue_reset(struct virtqueue *_vq,
+ void (*recycle)(struct virtqueue *vq, void *buf))
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ int err;
+
+ err = virtqueue_disable_and_recycle(_vq, recycle);
+ if (err)
+ return err;
+
+ if (vq->packed_ring)
+ virtqueue_reinit_packed(vq);
+ else
+ virtqueue_reinit_split(vq);
+
+ return virtqueue_enable_after_reset(_vq);
+}
+EXPORT_SYMBOL_GPL(virtqueue_reset);
+
/* Only available for split ring */
struct virtqueue *vring_new_virtqueue(unsigned int index,
unsigned int num,
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index bd55a05eec04..49a640e0a6f4 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -99,6 +99,8 @@ dma_addr_t virtqueue_get_used_addr(const struct virtqueue *vq);
int virtqueue_resize(struct virtqueue *vq, u32 num,
void (*recycle)(struct virtqueue *vq, void *buf));
+int virtqueue_reset(struct virtqueue *vq,
+ void (*recycle)(struct virtqueue *vq, void *buf));
/**
* struct virtio_device - representation of a device using virtio
--
2.32.0.3.g01195cf9f
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH vhost v12 10/10] virtio_net: merge dma operations when filling mergeable buffers
2023-07-19 4:04 [PATCH vhost v12 00/10] virtio core prepares for AF_XDP Xuan Zhuo
` (8 preceding siblings ...)
2023-07-19 4:04 ` [PATCH vhost v12 09/10] virtio_ring: introduce virtqueue_reset() Xuan Zhuo
@ 2023-07-19 4:04 ` Xuan Zhuo
2023-07-19 10:33 ` kernel test robot
9 siblings, 1 reply; 14+ messages in thread
From: Xuan Zhuo @ 2023-07-19 4:04 UTC (permalink / raw)
To: virtualization
Cc: Michael S. Tsirkin, Jason Wang, Xuan Zhuo, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend, netdev,
bpf, Christoph Hellwig
Currently, the virtio core will perform a dma operation for each
buffer. Although, the same page may be operated multiple times.
This patch, the driver does the dma operation and manages the dma
address based the feature premapped of virtio core.
This way, we can perform only one dma operation for the pages of the
alloc frag. This is beneficial for the iommu device.
kernel command line: intel_iommu=on iommu.passthrough=0
| strict=0 | strict=1
Before | 775496pps | 428614pps
After | 1109316pps | 742853pps
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
drivers/net/virtio_net.c | 225 ++++++++++++++++++++++++++++++++++-----
1 file changed, 199 insertions(+), 26 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 486b5849033d..496344468e7c 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -126,6 +126,14 @@ static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
#define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc)
#define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc)
+/* The dma information of pages allocated at a time. */
+struct virtnet_rq_dma {
+ dma_addr_t addr;
+ u32 ref;
+ u16 len;
+ u16 need_sync;
+};
+
/* Internal representation of a send virtqueue */
struct send_queue {
/* Virtqueue associated with this send _queue */
@@ -175,6 +183,12 @@ struct receive_queue {
char name[16];
struct xdp_rxq_info xdp_rxq;
+
+ /* Record the last dma info to free after new pages is allocated. */
+ struct virtnet_rq_dma *last_dma;
+
+ /* Do dma by self */
+ bool do_dma;
};
/* This structure can contain rss message with maximum settings for indirection table and keysize
@@ -549,6 +563,151 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
return skb;
}
+static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len)
+{
+ struct page *page = virt_to_head_page(buf);
+ struct virtnet_rq_dma *dma;
+ struct device *dev;
+ void *head;
+ int offset;
+
+ head = page_address(page);
+
+ dma = head;
+
+ --dma->ref;
+
+ if (dma->ref) {
+ if (dma->need_sync && len) {
+ dev = virtqueue_dma_dev(rq->vq);
+
+ offset = buf - (head + sizeof(*dma));
+
+ dma_sync_single_range_for_cpu(dev, dma->addr, offset, len, DMA_FROM_DEVICE);
+ }
+
+ return;
+ }
+
+ dev = virtqueue_dma_dev(rq->vq);
+
+ dma_unmap_single(dev, dma->addr, dma->len, DMA_FROM_DEVICE);
+ put_page(page);
+}
+
+static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx)
+{
+ void *buf;
+
+ buf = virtqueue_get_buf_ctx(rq->vq, len, ctx);
+ if (buf && rq->do_dma)
+ virtnet_rq_unmap(rq, buf, *len);
+
+ return buf;
+}
+
+static void *virtnet_rq_detach_unused_buf(struct receive_queue *rq)
+{
+ void *buf;
+
+ buf = virtqueue_detach_unused_buf(rq->vq);
+ if (buf && rq->do_dma)
+ virtnet_rq_unmap(rq, buf, 0);
+
+ return buf;
+}
+
+static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *addr, u32 len)
+{
+ if (rq->do_dma) {
+ sg_init_table(rq->sg, 1);
+ rq->sg[0].dma_address = (dma_addr_t)addr;
+ rq->sg[0].length = len;
+ } else {
+ sg_init_one(rq->sg, addr, len);
+ }
+}
+
+static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size,
+ void **sg_addr, gfp_t gfp)
+{
+ struct page_frag *alloc_frag = &rq->alloc_frag;
+ struct virtnet_rq_dma *dma;
+ struct device *dev;
+ void *buf, *head;
+ dma_addr_t addr;
+
+ if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp)))
+ return NULL;
+
+ head = (char *)page_address(alloc_frag->page);
+
+ if (rq->do_dma) {
+ dma = head;
+
+ /* new pages */
+ if (!alloc_frag->offset) {
+ if (rq->last_dma) {
+ /* Now, the new page is allocated, the last dma
+ * will not be used. So the dma can be unmapped
+ * if the ref is 0.
+ */
+ virtnet_rq_unmap(rq, rq->last_dma, 0);
+ rq->last_dma = NULL;
+ }
+
+ dev = virtqueue_dma_dev(rq->vq);
+
+ dma->len = alloc_frag->size - sizeof(*dma);
+
+ addr = dma_map_single_attrs(dev, dma + 1, dma->len, DMA_FROM_DEVICE, 0);
+ if (addr == DMA_MAPPING_ERROR)
+ return NULL;
+
+ dma->addr = addr;
+ dma->need_sync = dma_need_sync(dev, addr);
+
+ /* Add a reference to dma to prevent the entire dma from
+ * being released during error handling. This reference
+ * will be freed after the pages are no longer used.
+ */
+ get_page(alloc_frag->page);
+ dma->ref = 1;
+ alloc_frag->offset = sizeof(*dma);
+
+ rq->last_dma = dma;
+ }
+
+ ++dma->ref;
+ *sg_addr = (void *)(dma->addr + alloc_frag->offset - sizeof(*dma));
+ } else {
+ *sg_addr = head + alloc_frag->offset;
+ }
+
+ buf = head + alloc_frag->offset;
+
+ get_page(alloc_frag->page);
+ alloc_frag->offset += size;
+
+ return buf;
+}
+
+static void virtnet_rq_set_premapped(struct virtnet_info *vi)
+{
+ int i;
+
+ /* disable for big mode */
+ if (!vi->mergeable_rx_bufs && vi->big_packets)
+ return;
+
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ if (virtqueue_set_dma_premapped(vi->rq[i].vq))
+ continue;
+
+ vi->rq[i].do_dma = true;
+ }
+}
+
static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
{
unsigned int len;
@@ -835,7 +994,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
void *buf;
int off;
- buf = virtqueue_get_buf(rq->vq, &buflen);
+ buf = virtnet_rq_get_buf(rq, &buflen, NULL);
if (unlikely(!buf))
goto err_buf;
@@ -1126,7 +1285,7 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
return -EINVAL;
while (--*num_buf > 0) {
- buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
+ buf = virtnet_rq_get_buf(rq, &len, &ctx);
if (unlikely(!buf)) {
pr_debug("%s: rx error: %d buffers out of %d missing\n",
dev->name, *num_buf,
@@ -1351,7 +1510,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
while (--num_buf) {
int num_skb_frags;
- buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
+ buf = virtnet_rq_get_buf(rq, &len, &ctx);
if (unlikely(!buf)) {
pr_debug("%s: rx error: %d buffers out of %d missing\n",
dev->name, num_buf,
@@ -1414,7 +1573,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
err_skb:
put_page(page);
while (num_buf-- > 1) {
- buf = virtqueue_get_buf(rq->vq, &len);
+ buf = virtnet_rq_get_buf(rq, &len, NULL);
if (unlikely(!buf)) {
pr_debug("%s: rx error: %d buffers missing\n",
dev->name, num_buf);
@@ -1524,26 +1683,30 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
gfp_t gfp)
{
- struct page_frag *alloc_frag = &rq->alloc_frag;
char *buf;
unsigned int xdp_headroom = virtnet_get_headroom(vi);
void *ctx = (void *)(unsigned long)xdp_headroom;
int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
+ void *sg_addr;
int err;
len = SKB_DATA_ALIGN(len) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
+
+ buf = virtnet_rq_alloc(rq, len, &sg_addr, gfp);
+ if (unlikely(!buf))
return -ENOMEM;
- buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
- get_page(alloc_frag->page);
- alloc_frag->offset += len;
- sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom,
- vi->hdr_len + GOOD_PACKET_LEN);
+ virtnet_rq_init_one_sg(rq, sg_addr + VIRTNET_RX_PAD + xdp_headroom,
+ vi->hdr_len + GOOD_PACKET_LEN);
+
err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
- if (err < 0)
+ if (err < 0) {
+ if (rq->do_dma)
+ virtnet_rq_unmap(rq, buf, 0);
put_page(virt_to_head_page(buf));
+ }
+
return err;
}
@@ -1620,23 +1783,23 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
unsigned int headroom = virtnet_get_headroom(vi);
unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
- char *buf;
+ unsigned int len, hole;
+ void *sg_addr;
void *ctx;
+ char *buf;
int err;
- unsigned int len, hole;
/* Extra tailroom is needed to satisfy XDP's assumption. This
* means rx frags coalescing won't work, but consider we've
* disabled GSO for XDP, it won't be a big issue.
*/
len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
- if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
+
+ buf = virtnet_rq_alloc(rq, len + room, &sg_addr, gfp);
+ if (unlikely(!buf))
return -ENOMEM;
- buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
buf += headroom; /* advance address leaving hole at front of pkt */
- get_page(alloc_frag->page);
- alloc_frag->offset += len + room;
hole = alloc_frag->size - alloc_frag->offset;
if (hole < len + room) {
/* To avoid internal fragmentation, if there is very likely not
@@ -1650,11 +1813,15 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
alloc_frag->offset += hole;
}
- sg_init_one(rq->sg, buf, len);
+ virtnet_rq_init_one_sg(rq, sg_addr + headroom, len);
+
ctx = mergeable_len_to_ctx(len + room, headroom);
err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
- if (err < 0)
+ if (err < 0) {
+ if (rq->do_dma)
+ virtnet_rq_unmap(rq, buf, 0);
put_page(virt_to_head_page(buf));
+ }
return err;
}
@@ -1775,13 +1942,13 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
void *ctx;
while (stats.packets < budget &&
- (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
+ (buf = virtnet_rq_get_buf(rq, &len, &ctx))) {
receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats);
stats.packets++;
}
} else {
while (stats.packets < budget &&
- (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
+ (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) {
receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats);
stats.packets++;
}
@@ -3553,8 +3720,11 @@ static void free_receive_page_frags(struct virtnet_info *vi)
{
int i;
for (i = 0; i < vi->max_queue_pairs; i++)
- if (vi->rq[i].alloc_frag.page)
+ if (vi->rq[i].alloc_frag.page) {
+ if (vi->rq[i].do_dma && vi->rq[i].last_dma)
+ virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0);
put_page(vi->rq[i].alloc_frag.page);
+ }
}
static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
@@ -3591,9 +3761,10 @@ static void free_unused_bufs(struct virtnet_info *vi)
}
for (i = 0; i < vi->max_queue_pairs; i++) {
- struct virtqueue *vq = vi->rq[i].vq;
- while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
- virtnet_rq_free_unused_buf(vq, buf);
+ struct receive_queue *rq = &vi->rq[i];
+
+ while ((buf = virtnet_rq_detach_unused_buf(rq)) != NULL)
+ virtnet_rq_free_unused_buf(rq->vq, buf);
cond_resched();
}
}
@@ -3767,6 +3938,8 @@ static int init_vqs(struct virtnet_info *vi)
if (ret)
goto err_free;
+ virtnet_rq_set_premapped(vi);
+
cpus_read_lock();
virtnet_set_affinity(vi);
cpus_read_unlock();
--
2.32.0.3.g01195cf9f
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH vhost v12 10/10] virtio_net: merge dma operations when filling mergeable buffers
2023-07-19 4:04 ` [PATCH vhost v12 10/10] virtio_net: merge dma operations when filling mergeable buffers Xuan Zhuo
@ 2023-07-19 10:33 ` kernel test robot
2023-07-19 11:05 ` Michael S. Tsirkin
0 siblings, 1 reply; 14+ messages in thread
From: kernel test robot @ 2023-07-19 10:33 UTC (permalink / raw)
To: Xuan Zhuo, virtualization
Cc: oe-kbuild-all, Michael S. Tsirkin, Jason Wang, Xuan Zhuo,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend, netdev,
bpf, Christoph Hellwig
Hi Xuan,
kernel test robot noticed the following build warnings:
[auto build test WARNING on v6.4]
[cannot apply to mst-vhost/linux-next linus/master v6.5-rc2 v6.5-rc1 next-20230719]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Xuan-Zhuo/virtio_ring-check-use_dma_api-before-unmap-desc-for-indirect/20230719-121424
base: v6.4
patch link: https://lore.kernel.org/r/20230719040422.126357-11-xuanzhuo%40linux.alibaba.com
patch subject: [PATCH vhost v12 10/10] virtio_net: merge dma operations when filling mergeable buffers
config: i386-randconfig-i006-20230718 (https://download.01.org/0day-ci/archive/20230719/202307191819.0tatknWa-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce: (https://download.01.org/0day-ci/archive/20230719/202307191819.0tatknWa-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202307191819.0tatknWa-lkp@intel.com/
All warnings (new ones prefixed by >>):
drivers/net/virtio_net.c: In function 'virtnet_rq_init_one_sg':
>> drivers/net/virtio_net.c:624:41: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
624 | rq->sg[0].dma_address = (dma_addr_t)addr;
| ^
drivers/net/virtio_net.c: In function 'virtnet_rq_alloc':
>> drivers/net/virtio_net.c:682:28: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
682 | *sg_addr = (void *)(dma->addr + alloc_frag->offset - sizeof(*dma));
| ^
vim +624 drivers/net/virtio_net.c
619
620 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *addr, u32 len)
621 {
622 if (rq->do_dma) {
623 sg_init_table(rq->sg, 1);
> 624 rq->sg[0].dma_address = (dma_addr_t)addr;
625 rq->sg[0].length = len;
626 } else {
627 sg_init_one(rq->sg, addr, len);
628 }
629 }
630
631 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size,
632 void **sg_addr, gfp_t gfp)
633 {
634 struct page_frag *alloc_frag = &rq->alloc_frag;
635 struct virtnet_rq_dma *dma;
636 struct device *dev;
637 void *buf, *head;
638 dma_addr_t addr;
639
640 if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp)))
641 return NULL;
642
643 head = (char *)page_address(alloc_frag->page);
644
645 if (rq->do_dma) {
646 dma = head;
647
648 /* new pages */
649 if (!alloc_frag->offset) {
650 if (rq->last_dma) {
651 /* Now, the new page is allocated, the last dma
652 * will not be used. So the dma can be unmapped
653 * if the ref is 0.
654 */
655 virtnet_rq_unmap(rq, rq->last_dma, 0);
656 rq->last_dma = NULL;
657 }
658
659 dev = virtqueue_dma_dev(rq->vq);
660
661 dma->len = alloc_frag->size - sizeof(*dma);
662
663 addr = dma_map_single_attrs(dev, dma + 1, dma->len, DMA_FROM_DEVICE, 0);
664 if (addr == DMA_MAPPING_ERROR)
665 return NULL;
666
667 dma->addr = addr;
668 dma->need_sync = dma_need_sync(dev, addr);
669
670 /* Add a reference to dma to prevent the entire dma from
671 * being released during error handling. This reference
672 * will be freed after the pages are no longer used.
673 */
674 get_page(alloc_frag->page);
675 dma->ref = 1;
676 alloc_frag->offset = sizeof(*dma);
677
678 rq->last_dma = dma;
679 }
680
681 ++dma->ref;
> 682 *sg_addr = (void *)(dma->addr + alloc_frag->offset - sizeof(*dma));
683 } else {
684 *sg_addr = head + alloc_frag->offset;
685 }
686
687 buf = head + alloc_frag->offset;
688
689 get_page(alloc_frag->page);
690 alloc_frag->offset += size;
691
692 return buf;
693 }
694
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH vhost v12 10/10] virtio_net: merge dma operations when filling mergeable buffers
2023-07-19 10:33 ` kernel test robot
@ 2023-07-19 11:05 ` Michael S. Tsirkin
2023-07-20 2:31 ` Xuan Zhuo
0 siblings, 1 reply; 14+ messages in thread
From: Michael S. Tsirkin @ 2023-07-19 11:05 UTC (permalink / raw)
To: kernel test robot
Cc: Xuan Zhuo, virtualization, oe-kbuild-all, Jason Wang,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend, netdev,
bpf, Christoph Hellwig
On Wed, Jul 19, 2023 at 06:33:05PM +0800, kernel test robot wrote:
> Hi Xuan,
>
> kernel test robot noticed the following build warnings:
>
> [auto build test WARNING on v6.4]
> [cannot apply to mst-vhost/linux-next linus/master v6.5-rc2 v6.5-rc1 next-20230719]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Xuan-Zhuo/virtio_ring-check-use_dma_api-before-unmap-desc-for-indirect/20230719-121424
> base: v6.4
> patch link: https://lore.kernel.org/r/20230719040422.126357-11-xuanzhuo%40linux.alibaba.com
> patch subject: [PATCH vhost v12 10/10] virtio_net: merge dma operations when filling mergeable buffers
> config: i386-randconfig-i006-20230718 (https://download.01.org/0day-ci/archive/20230719/202307191819.0tatknWa-lkp@intel.com/config)
> compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
> reproduce: (https://download.01.org/0day-ci/archive/20230719/202307191819.0tatknWa-lkp@intel.com/reproduce)
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202307191819.0tatknWa-lkp@intel.com/
>
> All warnings (new ones prefixed by >>):
>
> drivers/net/virtio_net.c: In function 'virtnet_rq_init_one_sg':
> >> drivers/net/virtio_net.c:624:41: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
> 624 | rq->sg[0].dma_address = (dma_addr_t)addr;
> | ^
> drivers/net/virtio_net.c: In function 'virtnet_rq_alloc':
> >> drivers/net/virtio_net.c:682:28: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
> 682 | *sg_addr = (void *)(dma->addr + alloc_frag->offset - sizeof(*dma));
> | ^
yea these casts are pretty creepy. I think it's possible dma_addr_t won't fit in a pointer
or a pointer won't fit in dma_addr_t.
>
> vim +624 drivers/net/virtio_net.c
>
> 619
> 620 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *addr, u32 len)
> 621 {
> 622 if (rq->do_dma) {
> 623 sg_init_table(rq->sg, 1);
> > 624 rq->sg[0].dma_address = (dma_addr_t)addr;
> 625 rq->sg[0].length = len;
> 626 } else {
> 627 sg_init_one(rq->sg, addr, len);
> 628 }
> 629 }
> 630
> 631 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size,
> 632 void **sg_addr, gfp_t gfp)
> 633 {
> 634 struct page_frag *alloc_frag = &rq->alloc_frag;
> 635 struct virtnet_rq_dma *dma;
> 636 struct device *dev;
> 637 void *buf, *head;
> 638 dma_addr_t addr;
> 639
> 640 if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp)))
> 641 return NULL;
> 642
> 643 head = (char *)page_address(alloc_frag->page);
> 644
> 645 if (rq->do_dma) {
> 646 dma = head;
> 647
> 648 /* new pages */
> 649 if (!alloc_frag->offset) {
> 650 if (rq->last_dma) {
> 651 /* Now, the new page is allocated, the last dma
> 652 * will not be used. So the dma can be unmapped
> 653 * if the ref is 0.
> 654 */
> 655 virtnet_rq_unmap(rq, rq->last_dma, 0);
> 656 rq->last_dma = NULL;
> 657 }
> 658
> 659 dev = virtqueue_dma_dev(rq->vq);
> 660
> 661 dma->len = alloc_frag->size - sizeof(*dma);
> 662
> 663 addr = dma_map_single_attrs(dev, dma + 1, dma->len, DMA_FROM_DEVICE, 0);
> 664 if (addr == DMA_MAPPING_ERROR)
> 665 return NULL;
> 666
> 667 dma->addr = addr;
> 668 dma->need_sync = dma_need_sync(dev, addr);
> 669
> 670 /* Add a reference to dma to prevent the entire dma from
> 671 * being released during error handling. This reference
> 672 * will be freed after the pages are no longer used.
> 673 */
> 674 get_page(alloc_frag->page);
> 675 dma->ref = 1;
> 676 alloc_frag->offset = sizeof(*dma);
> 677
> 678 rq->last_dma = dma;
> 679 }
> 680
> 681 ++dma->ref;
> > 682 *sg_addr = (void *)(dma->addr + alloc_frag->offset - sizeof(*dma));
> 683 } else {
> 684 *sg_addr = head + alloc_frag->offset;
> 685 }
> 686
> 687 buf = head + alloc_frag->offset;
> 688
> 689 get_page(alloc_frag->page);
> 690 alloc_frag->offset += size;
> 691
> 692 return buf;
> 693 }
> 694
>
> --
> 0-DAY CI Kernel Test Service
> https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH vhost v12 10/10] virtio_net: merge dma operations when filling mergeable buffers
2023-07-19 11:05 ` Michael S. Tsirkin
@ 2023-07-20 2:31 ` Xuan Zhuo
0 siblings, 0 replies; 14+ messages in thread
From: Xuan Zhuo @ 2023-07-20 2:31 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: virtualization, oe-kbuild-all, Jason Wang, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Alexei Starovoitov, Daniel Borkmann,
Jesper Dangaard Brouer, John Fastabend, netdev, bpf,
Christoph Hellwig, kernel test robot
On Wed, 19 Jul 2023 07:05:50 -0400, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Wed, Jul 19, 2023 at 06:33:05PM +0800, kernel test robot wrote:
> > Hi Xuan,
> >
> > kernel test robot noticed the following build warnings:
> >
> > [auto build test WARNING on v6.4]
> > [cannot apply to mst-vhost/linux-next linus/master v6.5-rc2 v6.5-rc1 next-20230719]
> > [If your patch is applied to the wrong git tree, kindly drop us a note.
> > And when submitting patch, we suggest to use '--base' as documented in
> > https://git-scm.com/docs/git-format-patch#_base_tree_information]
> >
> > url: https://github.com/intel-lab-lkp/linux/commits/Xuan-Zhuo/virtio_ring-check-use_dma_api-before-unmap-desc-for-indirect/20230719-121424
> > base: v6.4
> > patch link: https://lore.kernel.org/r/20230719040422.126357-11-xuanzhuo%40linux.alibaba.com
> > patch subject: [PATCH vhost v12 10/10] virtio_net: merge dma operations when filling mergeable buffers
> > config: i386-randconfig-i006-20230718 (https://download.01.org/0day-ci/archive/20230719/202307191819.0tatknWa-lkp@intel.com/config)
> > compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
> > reproduce: (https://download.01.org/0day-ci/archive/20230719/202307191819.0tatknWa-lkp@intel.com/reproduce)
> >
> > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > the same patch/commit), kindly add following tags
> > | Reported-by: kernel test robot <lkp@intel.com>
> > | Closes: https://lore.kernel.org/oe-kbuild-all/202307191819.0tatknWa-lkp@intel.com/
> >
> > All warnings (new ones prefixed by >>):
> >
> > drivers/net/virtio_net.c: In function 'virtnet_rq_init_one_sg':
> > >> drivers/net/virtio_net.c:624:41: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
> > 624 | rq->sg[0].dma_address = (dma_addr_t)addr;
> > | ^
> > drivers/net/virtio_net.c: In function 'virtnet_rq_alloc':
> > >> drivers/net/virtio_net.c:682:28: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
> > 682 | *sg_addr = (void *)(dma->addr + alloc_frag->offset - sizeof(*dma));
> > | ^
>
>
> yea these casts are pretty creepy. I think it's possible dma_addr_t won't fit in a pointer
> or a pointer won't fit in dma_addr_t.
Yes.
I will fix this.
I hope this will not affect the review.
Thanks.
>
> >
> > vim +624 drivers/net/virtio_net.c
> >
> > 619
> > 620 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *addr, u32 len)
> > 621 {
> > 622 if (rq->do_dma) {
> > 623 sg_init_table(rq->sg, 1);
> > > 624 rq->sg[0].dma_address = (dma_addr_t)addr;
> > 625 rq->sg[0].length = len;
> > 626 } else {
> > 627 sg_init_one(rq->sg, addr, len);
> > 628 }
> > 629 }
> > 630
> > 631 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size,
> > 632 void **sg_addr, gfp_t gfp)
> > 633 {
> > 634 struct page_frag *alloc_frag = &rq->alloc_frag;
> > 635 struct virtnet_rq_dma *dma;
> > 636 struct device *dev;
> > 637 void *buf, *head;
> > 638 dma_addr_t addr;
> > 639
> > 640 if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp)))
> > 641 return NULL;
> > 642
> > 643 head = (char *)page_address(alloc_frag->page);
> > 644
> > 645 if (rq->do_dma) {
> > 646 dma = head;
> > 647
> > 648 /* new pages */
> > 649 if (!alloc_frag->offset) {
> > 650 if (rq->last_dma) {
> > 651 /* Now, the new page is allocated, the last dma
> > 652 * will not be used. So the dma can be unmapped
> > 653 * if the ref is 0.
> > 654 */
> > 655 virtnet_rq_unmap(rq, rq->last_dma, 0);
> > 656 rq->last_dma = NULL;
> > 657 }
> > 658
> > 659 dev = virtqueue_dma_dev(rq->vq);
> > 660
> > 661 dma->len = alloc_frag->size - sizeof(*dma);
> > 662
> > 663 addr = dma_map_single_attrs(dev, dma + 1, dma->len, DMA_FROM_DEVICE, 0);
> > 664 if (addr == DMA_MAPPING_ERROR)
> > 665 return NULL;
> > 666
> > 667 dma->addr = addr;
> > 668 dma->need_sync = dma_need_sync(dev, addr);
> > 669
> > 670 /* Add a reference to dma to prevent the entire dma from
> > 671 * being released during error handling. This reference
> > 672 * will be freed after the pages are no longer used.
> > 673 */
> > 674 get_page(alloc_frag->page);
> > 675 dma->ref = 1;
> > 676 alloc_frag->offset = sizeof(*dma);
> > 677
> > 678 rq->last_dma = dma;
> > 679 }
> > 680
> > 681 ++dma->ref;
> > > 682 *sg_addr = (void *)(dma->addr + alloc_frag->offset - sizeof(*dma));
> > 683 } else {
> > 684 *sg_addr = head + alloc_frag->offset;
> > 685 }
> > 686
> > 687 buf = head + alloc_frag->offset;
> > 688
> > 689 get_page(alloc_frag->page);
> > 690 alloc_frag->offset += size;
> > 691
> > 692 return buf;
> > 693 }
> > 694
> >
> > --
> > 0-DAY CI Kernel Test Service
> > https://github.com/intel/lkp-tests/wiki
>
^ permalink raw reply [flat|nested] 14+ messages in thread