* [PATCH v3 1/9] virtio-net: disable the hole mechanism for xdp
2023-01-03 6:40 [PATCH v3 0/9] virtio-net: support multi buffer xdp Heng Qi
@ 2023-01-03 6:40 ` Heng Qi
2023-01-03 6:40 ` [PATCH v3 2/9] virtio-net: set up xdp for multi buffer packets Heng Qi
` (7 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Heng Qi @ 2023-01-03 6:40 UTC (permalink / raw)
To: netdev, bpf
Cc: Jason Wang, Michael S . Tsirkin, Paolo Abeni, Jakub Kicinski,
John Fastabend, David S . Miller, Daniel Borkmann,
Alexei Starovoitov, Eric Dumazet, Xuan Zhuo
XDP core assumes that the frame_size of xdp_buff and the length of
the frag are PAGE_SIZE. The hole may cause the processing of xdp to
fail, so we disable the hole mechanism when xdp is set.
Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
drivers/net/virtio_net.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9cce7dec7366..443aa7b8f0ad 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1419,8 +1419,11 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
/* To avoid internal fragmentation, if there is very likely not
* enough space for another buffer, add the remaining space to
* the current buffer.
+ * XDP core assumes that frame_size of xdp_buff and the length
+ * of the frag are PAGE_SIZE, so we disable the hole mechanism.
*/
- len += hole;
+ if (!headroom)
+ len += hole;
alloc_frag->offset += hole;
}
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v3 2/9] virtio-net: set up xdp for multi buffer packets
2023-01-03 6:40 [PATCH v3 0/9] virtio-net: support multi buffer xdp Heng Qi
2023-01-03 6:40 ` [PATCH v3 1/9] virtio-net: disable the hole mechanism for xdp Heng Qi
@ 2023-01-03 6:40 ` Heng Qi
2023-01-09 2:48 ` Heng Qi
2023-01-13 2:49 ` Jason Wang
2023-01-03 6:40 ` [PATCH v3 3/9] virtio-net: update bytes calculation for xdp_frame Heng Qi
` (6 subsequent siblings)
8 siblings, 2 replies; 16+ messages in thread
From: Heng Qi @ 2023-01-03 6:40 UTC (permalink / raw)
To: netdev, bpf
Cc: Jason Wang, Michael S . Tsirkin, Paolo Abeni, Jakub Kicinski,
John Fastabend, David S . Miller, Daniel Borkmann,
Alexei Starovoitov, Eric Dumazet, Xuan Zhuo
When the xdp program sets xdp.frags, which means it can process
multi-buffer packets over larger MTU, so we continue to support xdp.
But for single-buffer xdp, we should keep checking for MTU.
Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
drivers/net/virtio_net.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 443aa7b8f0ad..60e199811212 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3074,7 +3074,9 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
- unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
+ unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
+ sizeof(struct skb_shared_info));
+ unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN;
struct virtnet_info *vi = netdev_priv(dev);
struct bpf_prog *old_prog;
u16 xdp_qp = 0, curr_qp;
@@ -3095,9 +3097,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
return -EINVAL;
}
- if (dev->mtu > max_sz) {
- NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
- netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz);
+ if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) {
+ NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags");
+ netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz);
return -EINVAL;
}
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH v3 2/9] virtio-net: set up xdp for multi buffer packets
2023-01-03 6:40 ` [PATCH v3 2/9] virtio-net: set up xdp for multi buffer packets Heng Qi
@ 2023-01-09 2:48 ` Heng Qi
2023-01-09 8:56 ` Jason Wang
2023-01-13 2:49 ` Jason Wang
1 sibling, 1 reply; 16+ messages in thread
From: Heng Qi @ 2023-01-09 2:48 UTC (permalink / raw)
To: Jason Wang, netdev, bpf
Cc: Michael S . Tsirkin, Paolo Abeni, Jakub Kicinski, John Fastabend,
David S . Miller, Daniel Borkmann, Alexei Starovoitov,
Eric Dumazet, Xuan Zhuo
在 2023/1/3 下午2:40, Heng Qi 写道:
> When the xdp program sets xdp.frags, which means it can process
> multi-buffer packets over larger MTU, so we continue to support xdp.
> But for single-buffer xdp, we should keep checking for MTU.
>
> Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
> drivers/net/virtio_net.c | 10 ++++++----
> 1 file changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 443aa7b8f0ad..60e199811212 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -3074,7 +3074,9 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
> static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
> struct netlink_ext_ack *extack)
> {
> - unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
> + unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
> + sizeof(struct skb_shared_info));
> + unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN;
Hi Jason, I've updated the calculation of 'max_sz' in this patch instead
of a separate bugfix, since doing so also seemed clear.
Thanks.
> struct virtnet_info *vi = netdev_priv(dev);
> struct bpf_prog *old_prog;
> u16 xdp_qp = 0, curr_qp;
> @@ -3095,9 +3097,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
> return -EINVAL;
> }
>
> - if (dev->mtu > max_sz) {
> - NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
> - netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz);
> + if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) {
> + NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags");
> + netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz);
> return -EINVAL;
> }
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v3 2/9] virtio-net: set up xdp for multi buffer packets
2023-01-09 2:48 ` Heng Qi
@ 2023-01-09 8:56 ` Jason Wang
0 siblings, 0 replies; 16+ messages in thread
From: Jason Wang @ 2023-01-09 8:56 UTC (permalink / raw)
To: Heng Qi
Cc: netdev, bpf, Michael S . Tsirkin, Paolo Abeni, Jakub Kicinski,
John Fastabend, David S . Miller, Daniel Borkmann,
Alexei Starovoitov, Eric Dumazet, Xuan Zhuo
On Mon, Jan 9, 2023 at 10:48 AM Heng Qi <hengqi@linux.alibaba.com> wrote:
>
>
>
> 在 2023/1/3 下午2:40, Heng Qi 写道:
> > When the xdp program sets xdp.frags, which means it can process
> > multi-buffer packets over larger MTU, so we continue to support xdp.
> > But for single-buffer xdp, we should keep checking for MTU.
> >
> > Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
> > Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> > drivers/net/virtio_net.c | 10 ++++++----
> > 1 file changed, 6 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 443aa7b8f0ad..60e199811212 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -3074,7 +3074,9 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
> > static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
> > struct netlink_ext_ack *extack)
> > {
> > - unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
> > + unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
> > + sizeof(struct skb_shared_info));
> > + unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN;
>
> Hi Jason, I've updated the calculation of 'max_sz' in this patch instead
> of a separate bugfix, since doing so also seemed clear.
Sure, I will review it with this series no later than the end of this week.
Thanks
>
> Thanks.
>
> > struct virtnet_info *vi = netdev_priv(dev);
> > struct bpf_prog *old_prog;
> > u16 xdp_qp = 0, curr_qp;
> > @@ -3095,9 +3097,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
> > return -EINVAL;
> > }
> >
> > - if (dev->mtu > max_sz) {
> > - NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
> > - netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz);
> > + if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) {
> > + NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags");
> > + netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz);
> > return -EINVAL;
> > }
> >
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v3 2/9] virtio-net: set up xdp for multi buffer packets
2023-01-03 6:40 ` [PATCH v3 2/9] virtio-net: set up xdp for multi buffer packets Heng Qi
2023-01-09 2:48 ` Heng Qi
@ 2023-01-13 2:49 ` Jason Wang
2023-01-13 2:59 ` Heng Qi
1 sibling, 1 reply; 16+ messages in thread
From: Jason Wang @ 2023-01-13 2:49 UTC (permalink / raw)
To: Heng Qi, netdev, bpf
Cc: Michael S . Tsirkin, Paolo Abeni, Jakub Kicinski, John Fastabend,
David S . Miller, Daniel Borkmann, Alexei Starovoitov,
Eric Dumazet, Xuan Zhuo
在 2023/1/3 14:40, Heng Qi 写道:
> When the xdp program sets xdp.frags, which means it can process
> multi-buffer packets over larger MTU, so we continue to support xdp.
> But for single-buffer xdp, we should keep checking for MTU.
>
> Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
> drivers/net/virtio_net.c | 10 ++++++----
> 1 file changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 443aa7b8f0ad..60e199811212 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -3074,7 +3074,9 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
> static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
> struct netlink_ext_ack *extack)
> {
> - unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
> + unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
> + sizeof(struct skb_shared_info));
> + unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN;
> struct virtnet_info *vi = netdev_priv(dev);
> struct bpf_prog *old_prog;
> u16 xdp_qp = 0, curr_qp;
> @@ -3095,9 +3097,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
> return -EINVAL;
> }
>
> - if (dev->mtu > max_sz) {
> - NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
> - netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz);
> + if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) {
> + NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags");
> + netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz);
> return -EINVAL;
> }
I think we probably need to backport this to -stable. So I suggest to
move/squash the check of !prog->aux->xdp_has_frags to one of the
following patch.
With this,
Acked-by: Jason Wang <jasowang@redhat.com>
Thanks
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v3 2/9] virtio-net: set up xdp for multi buffer packets
2023-01-13 2:49 ` Jason Wang
@ 2023-01-13 2:59 ` Heng Qi
0 siblings, 0 replies; 16+ messages in thread
From: Heng Qi @ 2023-01-13 2:59 UTC (permalink / raw)
To: Jason Wang, netdev, bpf
Cc: Michael S . Tsirkin, Paolo Abeni, Jakub Kicinski, John Fastabend,
David S . Miller, Daniel Borkmann, Alexei Starovoitov,
Eric Dumazet, Xuan Zhuo
在 2023/1/13 上午10:49, Jason Wang 写道:
>
> 在 2023/1/3 14:40, Heng Qi 写道:
>> When the xdp program sets xdp.frags, which means it can process
>> multi-buffer packets over larger MTU, so we continue to support xdp.
>> But for single-buffer xdp, we should keep checking for MTU.
>>
>> Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
>> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
>> ---
>> drivers/net/virtio_net.c | 10 ++++++----
>> 1 file changed, 6 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 443aa7b8f0ad..60e199811212 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -3074,7 +3074,9 @@ static int
>> virtnet_restore_guest_offloads(struct virtnet_info *vi)
>> static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog
>> *prog,
>> struct netlink_ext_ack *extack)
>> {
>> - unsigned long int max_sz = PAGE_SIZE - sizeof(struct
>> padded_vnet_hdr);
>> + unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
>> + sizeof(struct skb_shared_info));
>> + unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN;
>> struct virtnet_info *vi = netdev_priv(dev);
>> struct bpf_prog *old_prog;
>> u16 xdp_qp = 0, curr_qp;
>> @@ -3095,9 +3097,9 @@ static int virtnet_xdp_set(struct net_device
>> *dev, struct bpf_prog *prog,
>> return -EINVAL;
>> }
>> - if (dev->mtu > max_sz) {
>> - NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
>> - netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz);
>> + if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) {
>> + NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP
>> without frags");
>> + netdev_warn(dev, "single-buffer XDP requires MTU less than
>> %u\n", max_sz);
>> return -EINVAL;
>> }
>
>
> I think we probably need to backport this to -stable. So I suggest to
> move/squash the check of !prog->aux->xdp_has_frags to one of the
> following patch.
Sure, and you are right.
Thanks.
>
> With this,
>
> Acked-by: Jason Wang <jasowang@redhat.com>
>
> Thanks
>
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v3 3/9] virtio-net: update bytes calculation for xdp_frame
2023-01-03 6:40 [PATCH v3 0/9] virtio-net: support multi buffer xdp Heng Qi
2023-01-03 6:40 ` [PATCH v3 1/9] virtio-net: disable the hole mechanism for xdp Heng Qi
2023-01-03 6:40 ` [PATCH v3 2/9] virtio-net: set up xdp for multi buffer packets Heng Qi
@ 2023-01-03 6:40 ` Heng Qi
2023-01-03 6:40 ` [PATCH v3 4/9] virtio-net: build xdp_buff with multi buffers Heng Qi
` (5 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Heng Qi @ 2023-01-03 6:40 UTC (permalink / raw)
To: netdev, bpf
Cc: Jason Wang, Michael S . Tsirkin, Paolo Abeni, Jakub Kicinski,
John Fastabend, David S . Miller, Daniel Borkmann,
Alexei Starovoitov, Eric Dumazet, Xuan Zhuo
Update relative record value for xdp_frame as basis
for multi-buffer xdp transmission.
Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
drivers/net/virtio_net.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 60e199811212..6fc5302ca5ff 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -658,7 +658,7 @@ static int virtnet_xdp_xmit(struct net_device *dev,
if (likely(is_xdp_frame(ptr))) {
struct xdp_frame *frame = ptr_to_xdp(ptr);
- bytes += frame->len;
+ bytes += xdp_get_frame_len(frame);
xdp_return_frame(frame);
} else {
struct sk_buff *skb = ptr;
@@ -1604,7 +1604,7 @@ static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
} else {
struct xdp_frame *frame = ptr_to_xdp(ptr);
- bytes += frame->len;
+ bytes += xdp_get_frame_len(frame);
xdp_return_frame(frame);
}
packets++;
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v3 4/9] virtio-net: build xdp_buff with multi buffers
2023-01-03 6:40 [PATCH v3 0/9] virtio-net: support multi buffer xdp Heng Qi
` (2 preceding siblings ...)
2023-01-03 6:40 ` [PATCH v3 3/9] virtio-net: update bytes calculation for xdp_frame Heng Qi
@ 2023-01-03 6:40 ` Heng Qi
2023-01-13 2:58 ` Jason Wang
2023-01-03 6:40 ` [PATCH v3 5/9] virtio-net: construct multi-buffer xdp in mergeable Heng Qi
` (4 subsequent siblings)
8 siblings, 1 reply; 16+ messages in thread
From: Heng Qi @ 2023-01-03 6:40 UTC (permalink / raw)
To: netdev, bpf
Cc: Jason Wang, Michael S . Tsirkin, Paolo Abeni, Jakub Kicinski,
John Fastabend, David S . Miller, Daniel Borkmann,
Alexei Starovoitov, Eric Dumazet, Xuan Zhuo
Support xdp for multi buffer packets in mergeable mode.
Putting the first buffer as the linear part for xdp_buff,
and the rest of the buffers as non-linear fragments to struct
skb_shared_info in the tailroom belonging to xdp_buff.
Let 'truesize' return to its literal meaning, that is, when
xdp is set, it includes the length of headroom and tailroom.
Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
drivers/net/virtio_net.c | 108 ++++++++++++++++++++++++++++++++++++---
1 file changed, 100 insertions(+), 8 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6fc5302ca5ff..699e376b8f8b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -931,6 +931,91 @@ static struct sk_buff *receive_big(struct net_device *dev,
return NULL;
}
+/* TODO: build xdp in big mode */
+static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
+ struct virtnet_info *vi,
+ struct receive_queue *rq,
+ struct xdp_buff *xdp,
+ void *buf,
+ unsigned int len,
+ unsigned int frame_sz,
+ u16 *num_buf,
+ unsigned int *xdp_frags_truesize,
+ struct virtnet_rq_stats *stats)
+{
+ struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
+ unsigned int headroom, tailroom, room;
+ unsigned int truesize, cur_frag_size;
+ struct skb_shared_info *shinfo;
+ unsigned int xdp_frags_truesz = 0;
+ struct page *page;
+ skb_frag_t *frag;
+ int offset;
+ void *ctx;
+
+ xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq);
+ xdp_prepare_buff(xdp, buf - VIRTIO_XDP_HEADROOM,
+ VIRTIO_XDP_HEADROOM + vi->hdr_len, len - vi->hdr_len, true);
+
+ if (*num_buf > 1) {
+ /* If we want to build multi-buffer xdp, we need
+ * to specify that the flags of xdp_buff have the
+ * XDP_FLAGS_HAS_FRAG bit.
+ */
+ if (!xdp_buff_has_frags(xdp))
+ xdp_buff_set_frags_flag(xdp);
+
+ shinfo = xdp_get_shared_info_from_buff(xdp);
+ shinfo->nr_frags = 0;
+ shinfo->xdp_frags_size = 0;
+ }
+
+ if ((*num_buf - 1) > MAX_SKB_FRAGS)
+ return -EINVAL;
+
+ while ((--*num_buf) >= 1) {
+ buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
+ if (unlikely(!buf)) {
+ pr_debug("%s: rx error: %d buffers out of %d missing\n",
+ dev->name, *num_buf,
+ virtio16_to_cpu(vi->vdev, hdr->num_buffers));
+ dev->stats.rx_length_errors++;
+ return -EINVAL;
+ }
+
+ stats->bytes += len;
+ page = virt_to_head_page(buf);
+ offset = buf - page_address(page);
+
+ truesize = mergeable_ctx_to_truesize(ctx);
+ headroom = mergeable_ctx_to_headroom(ctx);
+ tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
+ room = SKB_DATA_ALIGN(headroom + tailroom);
+
+ cur_frag_size = truesize;
+ xdp_frags_truesz += cur_frag_size;
+ if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) {
+ put_page(page);
+ pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
+ dev->name, len, (unsigned long)(truesize - room));
+ dev->stats.rx_length_errors++;
+ return -EINVAL;
+ }
+
+ frag = &shinfo->frags[shinfo->nr_frags++];
+ __skb_frag_set_page(frag, page);
+ skb_frag_off_set(frag, offset);
+ skb_frag_size_set(frag, len);
+ if (page_is_pfmemalloc(page))
+ xdp_buff_set_frag_pfmemalloc(xdp);
+
+ shinfo->xdp_frags_size += len;
+ }
+
+ *xdp_frags_truesize = xdp_frags_truesz;
+ return 0;
+}
+
static struct sk_buff *receive_mergeable(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
@@ -949,15 +1034,17 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
unsigned int truesize = mergeable_ctx_to_truesize(ctx);
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
unsigned int metasize = 0;
+ unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
+ unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
unsigned int frame_sz;
int err;
head_skb = NULL;
stats->bytes += len - vi->hdr_len;
- if (unlikely(len > truesize)) {
+ if (unlikely(len > truesize - room)) {
pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
- dev->name, len, (unsigned long)ctx);
+ dev->name, len, (unsigned long)(truesize - room));
dev->stats.rx_length_errors++;
goto err_skb;
}
@@ -983,10 +1070,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
if (unlikely(hdr->hdr.gso_type))
goto err_xdp;
- /* Buffers with headroom use PAGE_SIZE as alloc size,
- * see add_recvbuf_mergeable() + get_mergeable_buf_len()
+ /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
+ * with headroom may add hole in truesize, which
+ * make their length exceed PAGE_SIZE. So we disabled the
+ * hole mechanism for xdp. See add_recvbuf_mergeable().
*/
- frame_sz = headroom ? PAGE_SIZE : truesize;
+ frame_sz = truesize;
/* This happens when rx buffer size is underestimated
* or headroom is not enough because of the buffer
@@ -1139,9 +1228,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
page = virt_to_head_page(buf);
truesize = mergeable_ctx_to_truesize(ctx);
- if (unlikely(len > truesize)) {
+ headroom = mergeable_ctx_to_headroom(ctx);
+ tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
+ room = SKB_DATA_ALIGN(headroom + tailroom);
+ if (unlikely(len > truesize - room)) {
pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
- dev->name, len, (unsigned long)ctx);
+ dev->name, len, (unsigned long)(truesize - room));
dev->stats.rx_length_errors++;
goto err_skb;
}
@@ -1428,7 +1520,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
}
sg_init_one(rq->sg, buf, len);
- ctx = mergeable_len_to_ctx(len, headroom);
+ ctx = mergeable_len_to_ctx(len + room, headroom);
err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
if (err < 0)
put_page(virt_to_head_page(buf));
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH v3 4/9] virtio-net: build xdp_buff with multi buffers
2023-01-03 6:40 ` [PATCH v3 4/9] virtio-net: build xdp_buff with multi buffers Heng Qi
@ 2023-01-13 2:58 ` Jason Wang
0 siblings, 0 replies; 16+ messages in thread
From: Jason Wang @ 2023-01-13 2:58 UTC (permalink / raw)
To: Heng Qi, netdev, bpf
Cc: Michael S . Tsirkin, Paolo Abeni, Jakub Kicinski, John Fastabend,
David S . Miller, Daniel Borkmann, Alexei Starovoitov,
Eric Dumazet, Xuan Zhuo
在 2023/1/3 14:40, Heng Qi 写道:
> Support xdp for multi buffer packets in mergeable mode.
>
> Putting the first buffer as the linear part for xdp_buff,
> and the rest of the buffers as non-linear fragments to struct
> skb_shared_info in the tailroom belonging to xdp_buff.
>
> Let 'truesize' return to its literal meaning, that is, when
> xdp is set, it includes the length of headroom and tailroom.
>
> Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Thanks
> ---
> drivers/net/virtio_net.c | 108 ++++++++++++++++++++++++++++++++++++---
> 1 file changed, 100 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 6fc5302ca5ff..699e376b8f8b 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -931,6 +931,91 @@ static struct sk_buff *receive_big(struct net_device *dev,
> return NULL;
> }
>
> +/* TODO: build xdp in big mode */
> +static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
> + struct virtnet_info *vi,
> + struct receive_queue *rq,
> + struct xdp_buff *xdp,
> + void *buf,
> + unsigned int len,
> + unsigned int frame_sz,
> + u16 *num_buf,
> + unsigned int *xdp_frags_truesize,
> + struct virtnet_rq_stats *stats)
> +{
> + struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
> + unsigned int headroom, tailroom, room;
> + unsigned int truesize, cur_frag_size;
> + struct skb_shared_info *shinfo;
> + unsigned int xdp_frags_truesz = 0;
> + struct page *page;
> + skb_frag_t *frag;
> + int offset;
> + void *ctx;
> +
> + xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq);
> + xdp_prepare_buff(xdp, buf - VIRTIO_XDP_HEADROOM,
> + VIRTIO_XDP_HEADROOM + vi->hdr_len, len - vi->hdr_len, true);
> +
> + if (*num_buf > 1) {
> + /* If we want to build multi-buffer xdp, we need
> + * to specify that the flags of xdp_buff have the
> + * XDP_FLAGS_HAS_FRAG bit.
> + */
> + if (!xdp_buff_has_frags(xdp))
> + xdp_buff_set_frags_flag(xdp);
> +
> + shinfo = xdp_get_shared_info_from_buff(xdp);
> + shinfo->nr_frags = 0;
> + shinfo->xdp_frags_size = 0;
> + }
> +
> + if ((*num_buf - 1) > MAX_SKB_FRAGS)
> + return -EINVAL;
> +
> + while ((--*num_buf) >= 1) {
> + buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
> + if (unlikely(!buf)) {
> + pr_debug("%s: rx error: %d buffers out of %d missing\n",
> + dev->name, *num_buf,
> + virtio16_to_cpu(vi->vdev, hdr->num_buffers));
> + dev->stats.rx_length_errors++;
> + return -EINVAL;
> + }
> +
> + stats->bytes += len;
> + page = virt_to_head_page(buf);
> + offset = buf - page_address(page);
> +
> + truesize = mergeable_ctx_to_truesize(ctx);
> + headroom = mergeable_ctx_to_headroom(ctx);
> + tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
> + room = SKB_DATA_ALIGN(headroom + tailroom);
> +
> + cur_frag_size = truesize;
> + xdp_frags_truesz += cur_frag_size;
> + if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) {
> + put_page(page);
> + pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
> + dev->name, len, (unsigned long)(truesize - room));
> + dev->stats.rx_length_errors++;
> + return -EINVAL;
> + }
> +
> + frag = &shinfo->frags[shinfo->nr_frags++];
> + __skb_frag_set_page(frag, page);
> + skb_frag_off_set(frag, offset);
> + skb_frag_size_set(frag, len);
> + if (page_is_pfmemalloc(page))
> + xdp_buff_set_frag_pfmemalloc(xdp);
> +
> + shinfo->xdp_frags_size += len;
> + }
> +
> + *xdp_frags_truesize = xdp_frags_truesz;
> + return 0;
> +}
> +
> static struct sk_buff *receive_mergeable(struct net_device *dev,
> struct virtnet_info *vi,
> struct receive_queue *rq,
> @@ -949,15 +1034,17 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
> unsigned int truesize = mergeable_ctx_to_truesize(ctx);
> unsigned int headroom = mergeable_ctx_to_headroom(ctx);
> unsigned int metasize = 0;
> + unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
> + unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
> unsigned int frame_sz;
> int err;
>
> head_skb = NULL;
> stats->bytes += len - vi->hdr_len;
>
> - if (unlikely(len > truesize)) {
> + if (unlikely(len > truesize - room)) {
> pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
> - dev->name, len, (unsigned long)ctx);
> + dev->name, len, (unsigned long)(truesize - room));
> dev->stats.rx_length_errors++;
> goto err_skb;
> }
> @@ -983,10 +1070,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
> if (unlikely(hdr->hdr.gso_type))
> goto err_xdp;
>
> - /* Buffers with headroom use PAGE_SIZE as alloc size,
> - * see add_recvbuf_mergeable() + get_mergeable_buf_len()
> + /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
> + * with headroom may add hole in truesize, which
> + * make their length exceed PAGE_SIZE. So we disabled the
> + * hole mechanism for xdp. See add_recvbuf_mergeable().
> */
> - frame_sz = headroom ? PAGE_SIZE : truesize;
> + frame_sz = truesize;
>
> /* This happens when rx buffer size is underestimated
> * or headroom is not enough because of the buffer
> @@ -1139,9 +1228,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
> page = virt_to_head_page(buf);
>
> truesize = mergeable_ctx_to_truesize(ctx);
> - if (unlikely(len > truesize)) {
> + headroom = mergeable_ctx_to_headroom(ctx);
> + tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
> + room = SKB_DATA_ALIGN(headroom + tailroom);
> + if (unlikely(len > truesize - room)) {
> pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
> - dev->name, len, (unsigned long)ctx);
> + dev->name, len, (unsigned long)(truesize - room));
> dev->stats.rx_length_errors++;
> goto err_skb;
> }
> @@ -1428,7 +1520,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
> }
>
> sg_init_one(rq->sg, buf, len);
> - ctx = mergeable_len_to_ctx(len, headroom);
> + ctx = mergeable_len_to_ctx(len + room, headroom);
> err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
> if (err < 0)
> put_page(virt_to_head_page(buf));
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v3 5/9] virtio-net: construct multi-buffer xdp in mergeable
2023-01-03 6:40 [PATCH v3 0/9] virtio-net: support multi buffer xdp Heng Qi
` (3 preceding siblings ...)
2023-01-03 6:40 ` [PATCH v3 4/9] virtio-net: build xdp_buff with multi buffers Heng Qi
@ 2023-01-03 6:40 ` Heng Qi
2023-01-13 3:01 ` Jason Wang
2023-01-03 6:40 ` [PATCH v3 6/9] virtio-net: transmit the multi-buffer xdp Heng Qi
` (3 subsequent siblings)
8 siblings, 1 reply; 16+ messages in thread
From: Heng Qi @ 2023-01-03 6:40 UTC (permalink / raw)
To: netdev, bpf
Cc: Jason Wang, Michael S . Tsirkin, Paolo Abeni, Jakub Kicinski,
John Fastabend, David S . Miller, Daniel Borkmann,
Alexei Starovoitov, Eric Dumazet, Xuan Zhuo
Build multi-buffer xdp using virtnet_build_xdp_buff_mrg().
For the prefilled buffer before xdp is set, we will probably use
vq reset in the future. At the same time, virtio net currently
uses comp pages, and bpf_xdp_frags_increase_tail() needs to calculate
the tailroom of the last frag, which will involve the offset of the
corresponding page and cause a negative value, so we disable tail
increase by not setting xdp_rxq->frag_size.
Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
drivers/net/virtio_net.c | 58 ++++++++++++++++++++++++++++++----------
1 file changed, 44 insertions(+), 14 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 699e376b8f8b..ab01cf3855bc 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1036,7 +1036,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
unsigned int metasize = 0;
unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
- unsigned int frame_sz;
+ unsigned int frame_sz, xdp_room;
int err;
head_skb = NULL;
@@ -1057,11 +1057,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) {
+ unsigned int xdp_frags_truesz = 0;
+ struct skb_shared_info *shinfo;
struct xdp_frame *xdpf;
struct page *xdp_page;
struct xdp_buff xdp;
void *data;
u32 act;
+ int i;
/* Transient failure which in theory could occur if
* in-flight packets from before XDP was enabled reach
@@ -1077,14 +1080,16 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
*/
frame_sz = truesize;
- /* This happens when rx buffer size is underestimated
- * or headroom is not enough because of the buffer
- * was refilled before XDP is set. This should only
- * happen for the first several packets, so we don't
- * care much about its performance.
+ /* This happens when headroom is not enough because
+ * of the buffer was prefilled before XDP is set.
+ * This should only happen for the first several packets.
+ * In fact, vq reset can be used here to help us clean up
+ * the prefilled buffers, but many existing devices do not
+ * support it, and we don't want to bother users who are
+ * using xdp normally.
*/
- if (unlikely(num_buf > 1 ||
- headroom < virtnet_get_headroom(vi))) {
+ if (!xdp_prog->aux->xdp_has_frags &&
+ (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {
/* linearize data for XDP */
xdp_page = xdp_linearize_page(rq, &num_buf,
page, offset,
@@ -1095,17 +1100,29 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
if (!xdp_page)
goto err_xdp;
offset = VIRTIO_XDP_HEADROOM;
+ } else if (unlikely(headroom < virtnet_get_headroom(vi))) {
+ xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
+ sizeof(struct skb_shared_info));
+ if (len + xdp_room > PAGE_SIZE)
+ goto err_xdp;
+
+ xdp_page = alloc_page(GFP_ATOMIC);
+ if (!xdp_page)
+ goto err_xdp;
+
+ memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
+ page_address(page) + offset, len);
+ frame_sz = PAGE_SIZE;
+ offset = VIRTIO_XDP_HEADROOM;
} else {
xdp_page = page;
}
- /* Allow consuming headroom but reserve enough space to push
- * the descriptor on if we get an XDP_TX return code.
- */
data = page_address(xdp_page) + offset;
- xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq);
- xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + vi->hdr_len,
- VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true);
+ err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz,
+ &num_buf, &xdp_frags_truesz, stats);
+ if (unlikely(err))
+ goto err_xdp_frags;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
stats->xdp_packets++;
@@ -1201,6 +1218,19 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
__free_pages(xdp_page, 0);
goto err_xdp;
}
+err_xdp_frags:
+ if (unlikely(xdp_page != page))
+ __free_pages(xdp_page, 0);
+
+ if (xdp_buff_has_frags(&xdp)) {
+ shinfo = xdp_get_shared_info_from_buff(&xdp);
+ for (i = 0; i < shinfo->nr_frags; i++) {
+ xdp_page = skb_frag_page(&shinfo->frags[i]);
+ put_page(xdp_page);
+ }
+ }
+
+ goto err_xdp;
}
rcu_read_unlock();
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH v3 5/9] virtio-net: construct multi-buffer xdp in mergeable
2023-01-03 6:40 ` [PATCH v3 5/9] virtio-net: construct multi-buffer xdp in mergeable Heng Qi
@ 2023-01-13 3:01 ` Jason Wang
0 siblings, 0 replies; 16+ messages in thread
From: Jason Wang @ 2023-01-13 3:01 UTC (permalink / raw)
To: Heng Qi, netdev, bpf
Cc: Michael S . Tsirkin, Paolo Abeni, Jakub Kicinski, John Fastabend,
David S . Miller, Daniel Borkmann, Alexei Starovoitov,
Eric Dumazet, Xuan Zhuo
在 2023/1/3 14:40, Heng Qi 写道:
> Build multi-buffer xdp using virtnet_build_xdp_buff_mrg().
>
> For the prefilled buffer before xdp is set, we will probably use
> vq reset in the future. At the same time, virtio net currently
> uses comp pages, and bpf_xdp_frags_increase_tail() needs to calculate
> the tailroom of the last frag, which will involve the offset of the
> corresponding page and cause a negative value, so we disable tail
> increase by not setting xdp_rxq->frag_size.
>
> Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Thanks
> ---
> drivers/net/virtio_net.c | 58 ++++++++++++++++++++++++++++++----------
> 1 file changed, 44 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 699e376b8f8b..ab01cf3855bc 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -1036,7 +1036,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
> unsigned int metasize = 0;
> unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
> unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
> - unsigned int frame_sz;
> + unsigned int frame_sz, xdp_room;
> int err;
>
> head_skb = NULL;
> @@ -1057,11 +1057,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
> rcu_read_lock();
> xdp_prog = rcu_dereference(rq->xdp_prog);
> if (xdp_prog) {
> + unsigned int xdp_frags_truesz = 0;
> + struct skb_shared_info *shinfo;
> struct xdp_frame *xdpf;
> struct page *xdp_page;
> struct xdp_buff xdp;
> void *data;
> u32 act;
> + int i;
>
> /* Transient failure which in theory could occur if
> * in-flight packets from before XDP was enabled reach
> @@ -1077,14 +1080,16 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
> */
> frame_sz = truesize;
>
> - /* This happens when rx buffer size is underestimated
> - * or headroom is not enough because of the buffer
> - * was refilled before XDP is set. This should only
> - * happen for the first several packets, so we don't
> - * care much about its performance.
> + /* This happens when headroom is not enough because
> + * of the buffer was prefilled before XDP is set.
> + * This should only happen for the first several packets.
> + * In fact, vq reset can be used here to help us clean up
> + * the prefilled buffers, but many existing devices do not
> + * support it, and we don't want to bother users who are
> + * using xdp normally.
> */
> - if (unlikely(num_buf > 1 ||
> - headroom < virtnet_get_headroom(vi))) {
> + if (!xdp_prog->aux->xdp_has_frags &&
> + (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {
> /* linearize data for XDP */
> xdp_page = xdp_linearize_page(rq, &num_buf,
> page, offset,
> @@ -1095,17 +1100,29 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
> if (!xdp_page)
> goto err_xdp;
> offset = VIRTIO_XDP_HEADROOM;
> + } else if (unlikely(headroom < virtnet_get_headroom(vi))) {
> + xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
> + sizeof(struct skb_shared_info));
> + if (len + xdp_room > PAGE_SIZE)
> + goto err_xdp;
> +
> + xdp_page = alloc_page(GFP_ATOMIC);
> + if (!xdp_page)
> + goto err_xdp;
> +
> + memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
> + page_address(page) + offset, len);
> + frame_sz = PAGE_SIZE;
> + offset = VIRTIO_XDP_HEADROOM;
> } else {
> xdp_page = page;
> }
>
> - /* Allow consuming headroom but reserve enough space to push
> - * the descriptor on if we get an XDP_TX return code.
> - */
> data = page_address(xdp_page) + offset;
> - xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq);
> - xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + vi->hdr_len,
> - VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true);
> + err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz,
> + &num_buf, &xdp_frags_truesz, stats);
> + if (unlikely(err))
> + goto err_xdp_frags;
>
> act = bpf_prog_run_xdp(xdp_prog, &xdp);
> stats->xdp_packets++;
> @@ -1201,6 +1218,19 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
> __free_pages(xdp_page, 0);
> goto err_xdp;
> }
> +err_xdp_frags:
> + if (unlikely(xdp_page != page))
> + __free_pages(xdp_page, 0);
> +
> + if (xdp_buff_has_frags(&xdp)) {
> + shinfo = xdp_get_shared_info_from_buff(&xdp);
> + for (i = 0; i < shinfo->nr_frags; i++) {
> + xdp_page = skb_frag_page(&shinfo->frags[i]);
> + put_page(xdp_page);
> + }
> + }
> +
> + goto err_xdp;
> }
> rcu_read_unlock();
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v3 6/9] virtio-net: transmit the multi-buffer xdp
2023-01-03 6:40 [PATCH v3 0/9] virtio-net: support multi buffer xdp Heng Qi
` (4 preceding siblings ...)
2023-01-03 6:40 ` [PATCH v3 5/9] virtio-net: construct multi-buffer xdp in mergeable Heng Qi
@ 2023-01-03 6:40 ` Heng Qi
2023-01-03 6:40 ` [PATCH v3 7/9] virtio-net: build skb from " Heng Qi
` (2 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Heng Qi @ 2023-01-03 6:40 UTC (permalink / raw)
To: netdev, bpf
Cc: Jason Wang, Michael S . Tsirkin, Paolo Abeni, Jakub Kicinski,
John Fastabend, David S . Miller, Daniel Borkmann,
Alexei Starovoitov, Eric Dumazet, Xuan Zhuo
This serves as the basis for XDP_TX and XDP_REDIRECT
to send a multi-buffer xdp_frame.
Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
drivers/net/virtio_net.c | 31 ++++++++++++++++++++++++++-----
1 file changed, 26 insertions(+), 5 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ab01cf3855bc..fee9ce31f6c7 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -563,22 +563,43 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
struct xdp_frame *xdpf)
{
struct virtio_net_hdr_mrg_rxbuf *hdr;
- int err;
+ struct skb_shared_info *shinfo;
+ u8 nr_frags = 0;
+ int err, i;
if (unlikely(xdpf->headroom < vi->hdr_len))
return -EOVERFLOW;
- /* Make room for virtqueue hdr (also change xdpf->headroom?) */
+ if (unlikely(xdp_frame_has_frags(xdpf))) {
+ shinfo = xdp_get_shared_info_from_frame(xdpf);
+ nr_frags = shinfo->nr_frags;
+ }
+
+ /* In wrapping function virtnet_xdp_xmit(), we need to free
+ * up the pending old buffers, where we need to calculate the
+ * position of skb_shared_info in xdp_get_frame_len() and
+ * xdp_return_frame(), which will involve to xdpf->data and
+ * xdpf->headroom. Therefore, we need to update the value of
+ * headroom synchronously here.
+ */
+ xdpf->headroom -= vi->hdr_len;
xdpf->data -= vi->hdr_len;
/* Zero header and leave csum up to XDP layers */
hdr = xdpf->data;
memset(hdr, 0, vi->hdr_len);
xdpf->len += vi->hdr_len;
- sg_init_one(sq->sg, xdpf->data, xdpf->len);
+ sg_init_table(sq->sg, nr_frags + 1);
+ sg_set_buf(sq->sg, xdpf->data, xdpf->len);
+ for (i = 0; i < nr_frags; i++) {
+ skb_frag_t *frag = &shinfo->frags[i];
+
+ sg_set_page(&sq->sg[i + 1], skb_frag_page(frag),
+ skb_frag_size(frag), skb_frag_off(frag));
+ }
- err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf),
- GFP_ATOMIC);
+ err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1,
+ xdp_to_ptr(xdpf), GFP_ATOMIC);
if (unlikely(err))
return -ENOSPC; /* Caller handle free/refcnt */
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v3 7/9] virtio-net: build skb from multi-buffer xdp
2023-01-03 6:40 [PATCH v3 0/9] virtio-net: support multi buffer xdp Heng Qi
` (5 preceding siblings ...)
2023-01-03 6:40 ` [PATCH v3 6/9] virtio-net: transmit the multi-buffer xdp Heng Qi
@ 2023-01-03 6:40 ` Heng Qi
2023-01-03 6:40 ` [PATCH v3 8/9] virtio-net: remove xdp related info from page_to_skb() Heng Qi
2023-01-03 6:40 ` [PATCH v3 9/9] virtio-net: support multi-buffer xdp Heng Qi
8 siblings, 0 replies; 16+ messages in thread
From: Heng Qi @ 2023-01-03 6:40 UTC (permalink / raw)
To: netdev, bpf
Cc: Jason Wang, Michael S . Tsirkin, Paolo Abeni, Jakub Kicinski,
John Fastabend, David S . Miller, Daniel Borkmann,
Alexei Starovoitov, Eric Dumazet, Xuan Zhuo
This converts the xdp_buff directly to a skb, including
multi-buffer and single buffer xdp. We'll isolate the
construction of skb based on xdp from page_to_skb().
Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
drivers/net/virtio_net.c | 49 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 49 insertions(+)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index fee9ce31f6c7..87d65b7a5033 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -952,6 +952,55 @@ static struct sk_buff *receive_big(struct net_device *dev,
return NULL;
}
+/* Why not use xdp_build_skb_from_frame() ?
+ * XDP core assumes that xdp frags are PAGE_SIZE in length, while in
+ * virtio-net there are 2 points that do not match its requirements:
+ * 1. The size of the prefilled buffer is not fixed before xdp is set.
+ * 2. xdp_build_skb_from_frame() does more checks that we don't need,
+ * like eth_type_trans() (which virtio-net does in receive_buf()).
+ */
+static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev,
+ struct virtnet_info *vi,
+ struct xdp_buff *xdp,
+ unsigned int xdp_frags_truesz)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ unsigned int headroom, data_len;
+ struct sk_buff *skb;
+ int metasize;
+ u8 nr_frags;
+
+ if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
+ pr_debug("Error building skb as missing reserved tailroom for xdp");
+ return NULL;
+ }
+
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ nr_frags = sinfo->nr_frags;
+
+ skb = build_skb(xdp->data_hard_start, xdp->frame_sz);
+ if (unlikely(!skb))
+ return NULL;
+
+ headroom = xdp->data - xdp->data_hard_start;
+ data_len = xdp->data_end - xdp->data;
+ skb_reserve(skb, headroom);
+ __skb_put(skb, data_len);
+
+ metasize = xdp->data - xdp->data_meta;
+ metasize = metasize > 0 ? metasize : 0;
+ if (metasize)
+ skb_metadata_set(skb, metasize);
+
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ xdp_update_skb_shared_info(skb, nr_frags,
+ sinfo->xdp_frags_size,
+ xdp_frags_truesz,
+ xdp_buff_is_frag_pfmemalloc(xdp));
+
+ return skb;
+}
+
/* TODO: build xdp in big mode */
static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
struct virtnet_info *vi,
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v3 8/9] virtio-net: remove xdp related info from page_to_skb()
2023-01-03 6:40 [PATCH v3 0/9] virtio-net: support multi buffer xdp Heng Qi
` (6 preceding siblings ...)
2023-01-03 6:40 ` [PATCH v3 7/9] virtio-net: build skb from " Heng Qi
@ 2023-01-03 6:40 ` Heng Qi
2023-01-03 6:40 ` [PATCH v3 9/9] virtio-net: support multi-buffer xdp Heng Qi
8 siblings, 0 replies; 16+ messages in thread
From: Heng Qi @ 2023-01-03 6:40 UTC (permalink / raw)
To: netdev, bpf
Cc: Jason Wang, Michael S . Tsirkin, Paolo Abeni, Jakub Kicinski,
John Fastabend, David S . Miller, Daniel Borkmann,
Alexei Starovoitov, Eric Dumazet, Xuan Zhuo
For the clear construction of xdp_buff, we remove the xdp processing
interleaved with page_to_skb(). Now, the logic of xdp and building
skb from xdp are separate and independent.
Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
drivers/net/virtio_net.c | 41 +++++++++-------------------------------
1 file changed, 9 insertions(+), 32 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 87d65b7a5033..2c7dcad049fb 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -439,9 +439,7 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
static struct sk_buff *page_to_skb(struct virtnet_info *vi,
struct receive_queue *rq,
struct page *page, unsigned int offset,
- unsigned int len, unsigned int truesize,
- bool hdr_valid, unsigned int metasize,
- unsigned int headroom)
+ unsigned int len, unsigned int truesize)
{
struct sk_buff *skb;
struct virtio_net_hdr_mrg_rxbuf *hdr;
@@ -459,21 +457,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
else
hdr_padded_len = sizeof(struct padded_vnet_hdr);
- /* If headroom is not 0, there is an offset between the beginning of the
- * data and the allocated space, otherwise the data and the allocated
- * space are aligned.
- *
- * Buffers with headroom use PAGE_SIZE as alloc size, see
- * add_recvbuf_mergeable() + get_mergeable_buf_len()
- */
- truesize = headroom ? PAGE_SIZE : truesize;
- tailroom = truesize - headroom;
- buf = p - headroom;
-
+ buf = p;
len -= hdr_len;
offset += hdr_padded_len;
p += hdr_padded_len;
- tailroom -= hdr_padded_len + len;
+ tailroom = truesize - hdr_padded_len - len;
shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -503,7 +491,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
if (len <= skb_tailroom(skb))
copy = len;
else
- copy = ETH_HLEN + metasize;
+ copy = ETH_HLEN;
skb_put_data(skb, p, copy);
len -= copy;
@@ -542,19 +530,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
give_pages(rq, page);
ok:
- /* hdr_valid means no XDP, so we can copy the vnet header */
- if (hdr_valid) {
- hdr = skb_vnet_hdr(skb);
- memcpy(hdr, hdr_p, hdr_len);
- }
+ hdr = skb_vnet_hdr(skb);
+ memcpy(hdr, hdr_p, hdr_len);
if (page_to_free)
put_page(page_to_free);
- if (metasize) {
- __skb_pull(skb, metasize);
- skb_metadata_set(skb, metasize);
- }
-
return skb;
}
@@ -938,7 +918,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
{
struct page *page = buf;
struct sk_buff *skb =
- page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0, 0);
+ page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
stats->bytes += len - vi->hdr_len;
if (unlikely(!skb))
@@ -1236,9 +1216,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
rcu_read_unlock();
put_page(page);
head_skb = page_to_skb(vi, rq, xdp_page, offset,
- len, PAGE_SIZE, false,
- metasize,
- headroom);
+ len, PAGE_SIZE);
return head_skb;
}
break;
@@ -1305,8 +1283,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
rcu_read_unlock();
skip_xdp:
- head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
- metasize, headroom);
+ head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
curr_skb = head_skb;
if (unlikely(!curr_skb))
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v3 9/9] virtio-net: support multi-buffer xdp
2023-01-03 6:40 [PATCH v3 0/9] virtio-net: support multi buffer xdp Heng Qi
` (7 preceding siblings ...)
2023-01-03 6:40 ` [PATCH v3 8/9] virtio-net: remove xdp related info from page_to_skb() Heng Qi
@ 2023-01-03 6:40 ` Heng Qi
8 siblings, 0 replies; 16+ messages in thread
From: Heng Qi @ 2023-01-03 6:40 UTC (permalink / raw)
To: netdev, bpf
Cc: Jason Wang, Michael S . Tsirkin, Paolo Abeni, Jakub Kicinski,
John Fastabend, David S . Miller, Daniel Borkmann,
Alexei Starovoitov, Eric Dumazet, Xuan Zhuo
Driver can pass the skb to stack by build_skb_from_xdp_buff().
Driver forwards multi-buffer packets using the send queue
when XDP_TX and XDP_REDIRECT, and clears the reference of multi
pages when XDP_DROP.
Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
drivers/net/virtio_net.c | 65 +++++++---------------------------------
1 file changed, 10 insertions(+), 55 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 2c7dcad049fb..aaa6fe9b214a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1083,7 +1083,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
struct bpf_prog *xdp_prog;
unsigned int truesize = mergeable_ctx_to_truesize(ctx);
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
- unsigned int metasize = 0;
unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
unsigned int frame_sz, xdp_room;
@@ -1179,63 +1178,24 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
switch (act) {
case XDP_PASS:
- metasize = xdp.data - xdp.data_meta;
-
- /* recalculate offset to account for any header
- * adjustments and minus the metasize to copy the
- * metadata in page_to_skb(). Note other cases do not
- * build an skb and avoid using offset
- */
- offset = xdp.data - page_address(xdp_page) -
- vi->hdr_len - metasize;
-
- /* recalculate len if xdp.data, xdp.data_end or
- * xdp.data_meta were adjusted
- */
- len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
-
- /* recalculate headroom if xdp.data or xdp_data_meta
- * were adjusted, note that offset should always point
- * to the start of the reserved bytes for virtio_net
- * header which are followed by xdp.data, that means
- * that offset is equal to the headroom (when buf is
- * starting at the beginning of the page, otherwise
- * there is a base offset inside the page) but it's used
- * with a different starting point (buf start) than
- * xdp.data (buf start + vnet hdr size). If xdp.data or
- * data_meta were adjusted by the xdp prog then the
- * headroom size has changed and so has the offset, we
- * can use data_hard_start, which points at buf start +
- * vnet hdr size, to calculate the new headroom and use
- * it later to compute buf start in page_to_skb()
- */
- headroom = xdp.data - xdp.data_hard_start - metasize;
-
- /* We can only create skb based on xdp_page. */
- if (unlikely(xdp_page != page)) {
- rcu_read_unlock();
+ if (unlikely(xdp_page != page))
put_page(page);
- head_skb = page_to_skb(vi, rq, xdp_page, offset,
- len, PAGE_SIZE);
- return head_skb;
- }
- break;
+ head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
+ rcu_read_unlock();
+ return head_skb;
case XDP_TX:
stats->xdp_tx++;
xdpf = xdp_convert_buff_to_frame(&xdp);
if (unlikely(!xdpf)) {
- if (unlikely(xdp_page != page))
- put_page(xdp_page);
- goto err_xdp;
+ netdev_dbg(dev, "convert buff to frame failed for xdp\n");
+ goto err_xdp_frags;
}
err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
if (unlikely(!err)) {
xdp_return_frame_rx_napi(xdpf);
} else if (unlikely(err < 0)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
- if (unlikely(xdp_page != page))
- put_page(xdp_page);
- goto err_xdp;
+ goto err_xdp_frags;
}
*xdp_xmit |= VIRTIO_XDP_TX;
if (unlikely(xdp_page != page))
@@ -1245,11 +1205,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
case XDP_REDIRECT:
stats->xdp_redirects++;
err = xdp_do_redirect(dev, &xdp, xdp_prog);
- if (err) {
- if (unlikely(xdp_page != page))
- put_page(xdp_page);
- goto err_xdp;
- }
+ if (err)
+ goto err_xdp_frags;
*xdp_xmit |= VIRTIO_XDP_REDIR;
if (unlikely(xdp_page != page))
put_page(page);
@@ -1262,9 +1219,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
trace_xdp_exception(vi->dev, xdp_prog, act);
fallthrough;
case XDP_DROP:
- if (unlikely(xdp_page != page))
- __free_pages(xdp_page, 0);
- goto err_xdp;
+ goto err_xdp_frags;
}
err_xdp_frags:
if (unlikely(xdp_page != page))
--
2.19.1.6.gb485710b
^ permalink raw reply related [flat|nested] 16+ messages in thread