* [PATCH v2 bpf-next 1/3] net: veth: account total xdp_frame len running ndo_xdp_xmit
2022-02-15 13:08 [PATCH v2 bpf-next 0/3] introduce xdp frags support to veth driver Lorenzo Bianconi
@ 2022-02-15 13:08 ` Lorenzo Bianconi
2022-02-15 13:08 ` [PATCH v2 bpf-next 2/3] veth: rework veth_xdp_rcv_skb in order to accept non-linear skb Lorenzo Bianconi
2022-02-15 13:08 ` [PATCH v2 bpf-next 3/3] veth: allow jumbo frames in xdp mode Lorenzo Bianconi
2 siblings, 0 replies; 6+ messages in thread
From: Lorenzo Bianconi @ 2022-02-15 13:08 UTC (permalink / raw)
To: bpf, netdev
Cc: davem, kuba, ast, daniel, brouer, toke, pabeni, echaudro,
lorenzo.bianconi, toshiaki.makita1, andrii
Even if this is a theoretical issue since it is not possible to perform
XDP_REDIRECT on a non-linear xdp_frame, veth driver does not account
paged area in ndo_xdp_xmit function pointer.
Introduce xdp_get_frame_len utility routine to get the xdp_frame full
length and account total frame size running XDP_REDIRECT of a
non-linear xdp frame into a veth device.
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
drivers/net/veth.c | 4 ++--
include/net/xdp.h | 14 ++++++++++++++
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 354a963075c5..22ecaf8b8f98 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -493,7 +493,7 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
struct xdp_frame *frame = frames[i];
void *ptr = veth_xdp_to_ptr(frame);
- if (unlikely(frame->len > max_len ||
+ if (unlikely(xdp_get_frame_len(frame) > max_len ||
__ptr_ring_produce(&rq->xdp_ring, ptr)))
break;
nxmit++;
@@ -854,7 +854,7 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget,
/* ndo_xdp_xmit */
struct xdp_frame *frame = veth_ptr_to_xdp(ptr);
- stats->xdp_bytes += frame->len;
+ stats->xdp_bytes += xdp_get_frame_len(frame);
frame = veth_xdp_rcv_one(rq, frame, bq, stats);
if (frame) {
/* XDP_PASS */
diff --git a/include/net/xdp.h b/include/net/xdp.h
index b7721c3e4d1f..04c852c7a77f 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -343,6 +343,20 @@ static inline void xdp_release_frame(struct xdp_frame *xdpf)
__xdp_release_frame(xdpf->data, mem);
}
+static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
+{
+ struct skb_shared_info *sinfo;
+ unsigned int len = xdpf->len;
+
+ if (likely(!xdp_frame_has_frags(xdpf)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ len += sinfo->xdp_frags_size;
+out:
+ return len;
+}
+
int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
struct net_device *dev, u32 queue_index,
unsigned int napi_id, u32 frag_size);
--
2.35.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v2 bpf-next 2/3] veth: rework veth_xdp_rcv_skb in order to accept non-linear skb
2022-02-15 13:08 [PATCH v2 bpf-next 0/3] introduce xdp frags support to veth driver Lorenzo Bianconi
2022-02-15 13:08 ` [PATCH v2 bpf-next 1/3] net: veth: account total xdp_frame len running ndo_xdp_xmit Lorenzo Bianconi
@ 2022-02-15 13:08 ` Lorenzo Bianconi
2022-02-15 13:08 ` [PATCH v2 bpf-next 3/3] veth: allow jumbo frames in xdp mode Lorenzo Bianconi
2 siblings, 0 replies; 6+ messages in thread
From: Lorenzo Bianconi @ 2022-02-15 13:08 UTC (permalink / raw)
To: bpf, netdev
Cc: davem, kuba, ast, daniel, brouer, toke, pabeni, echaudro,
lorenzo.bianconi, toshiaki.makita1, andrii
Introduce veth_convert_xdp_buff_from_skb routine in order to
convert a non-linear skb into a xdp buffer. If the received skb
is cloned or shared, veth_convert_xdp_buff_from_skb will copy it
in a new skb composed by order-0 pages for the linear and the
fragmented area. Moreover veth_convert_xdp_buff_from_skb guarantees
we have enough headroom for xdp.
This is a preliminary patch to allow attaching xdp programs with frags
support on veth devices.
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
drivers/net/veth.c | 174 ++++++++++++++++++++++++++++++---------------
net/core/xdp.c | 1 +
2 files changed, 119 insertions(+), 56 deletions(-)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 22ecaf8b8f98..a45aaaecc21f 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -432,21 +432,6 @@ static void veth_set_multicast_list(struct net_device *dev)
{
}
-static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
- int buflen)
-{
- struct sk_buff *skb;
-
- skb = build_skb(head, buflen);
- if (!skb)
- return NULL;
-
- skb_reserve(skb, headroom);
- skb_put(skb, len);
-
- return skb;
-}
-
static int veth_select_rxq(struct net_device *dev)
{
return smp_processor_id() % dev->real_num_rx_queues;
@@ -694,72 +679,143 @@ static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames,
}
}
-static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
- struct sk_buff *skb,
- struct veth_xdp_tx_bq *bq,
- struct veth_stats *stats)
+static void veth_xdp_get(struct xdp_buff *xdp)
{
- u32 pktlen, headroom, act, metalen, frame_sz;
- void *orig_data, *orig_data_end;
- struct bpf_prog *xdp_prog;
- int mac_len, delta, off;
- struct xdp_buff xdp;
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ int i;
- skb_prepare_for_gro(skb);
+ get_page(virt_to_page(xdp->data));
+ if (likely(!xdp_buff_has_frags(xdp)))
+ return;
- rcu_read_lock();
- xdp_prog = rcu_dereference(rq->xdp_prog);
- if (unlikely(!xdp_prog)) {
- rcu_read_unlock();
- goto out;
- }
+ for (i = 0; i < sinfo->nr_frags; i++)
+ __skb_frag_ref(&sinfo->frags[i]);
+}
- mac_len = skb->data - skb_mac_header(skb);
- pktlen = skb->len + mac_len;
- headroom = skb_headroom(skb) - mac_len;
+static int veth_convert_xdp_buff_from_skb(struct veth_rq *rq,
+ struct xdp_buff *xdp,
+ struct sk_buff **pskb)
+{
+ struct sk_buff *skb = *pskb;
+ u32 frame_sz;
if (skb_shared(skb) || skb_head_is_locked(skb) ||
- skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) {
+ skb_shinfo(skb)->nr_frags) {
+ u32 size, len, max_head_size, off;
struct sk_buff *nskb;
- int size, head_off;
- void *head, *start;
struct page *page;
+ int i, head_off;
- size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) +
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- if (size > PAGE_SIZE)
+ /* We need a private copy of the skb and data buffers since
+ * the ebpf program can modify it. We segment the original skb
+ * into order-0 pages without linearize it.
+ *
+ * Make sure we have enough space for linear and paged area
+ */
+ max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE -
+ VETH_XDP_HEADROOM);
+ if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size)
goto drop;
+ /* Allocate skb head */
page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
if (!page)
goto drop;
- head = page_address(page);
- start = head + VETH_XDP_HEADROOM;
- if (skb_copy_bits(skb, -mac_len, start, pktlen)) {
- page_frag_free(head);
+ nskb = build_skb(page_address(page), PAGE_SIZE);
+ if (!nskb) {
+ put_page(page);
goto drop;
}
- nskb = veth_build_skb(head, VETH_XDP_HEADROOM + mac_len,
- skb->len, PAGE_SIZE);
- if (!nskb) {
- page_frag_free(head);
+ skb_reserve(nskb, VETH_XDP_HEADROOM);
+ size = min_t(u32, skb->len, max_head_size);
+ if (skb_copy_bits(skb, 0, nskb->data, size)) {
+ consume_skb(nskb);
goto drop;
}
+ skb_put(nskb, size);
skb_copy_header(nskb, skb);
head_off = skb_headroom(nskb) - skb_headroom(skb);
skb_headers_offset_update(nskb, head_off);
+
+ /* Allocate paged area of new skb */
+ off = size;
+ len = skb->len - off;
+
+ for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+ if (!page) {
+ consume_skb(nskb);
+ goto drop;
+ }
+
+ size = min_t(u32, len, PAGE_SIZE);
+ skb_add_rx_frag(nskb, i, page, 0, size, PAGE_SIZE);
+ if (skb_copy_bits(skb, off, page_address(page),
+ size)) {
+ consume_skb(nskb);
+ goto drop;
+ }
+
+ len -= size;
+ off += size;
+ }
+
consume_skb(skb);
skb = nskb;
+ } else if (skb_headroom(skb) < XDP_PACKET_HEADROOM &&
+ pskb_expand_head(skb, VETH_XDP_HEADROOM, 0, GFP_ATOMIC)) {
+ goto drop;
}
/* SKB "head" area always have tailroom for skb_shared_info */
frame_sz = skb_end_pointer(skb) - skb->head;
frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- xdp_init_buff(&xdp, frame_sz, &rq->xdp_rxq);
- xdp_prepare_buff(&xdp, skb->head, skb->mac_header, pktlen, true);
+ xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq);
+ xdp_prepare_buff(xdp, skb->head, skb_headroom(skb),
+ skb_headlen(skb), true);
+
+ if (skb_is_nonlinear(skb)) {
+ skb_shinfo(skb)->xdp_frags_size = skb->data_len;
+ xdp_buff_set_frags_flag(xdp);
+ } else {
+ xdp_buff_clear_frags_flag(xdp);
+ }
+ *pskb = skb;
+
+ return 0;
+drop:
+ consume_skb(skb);
+ *pskb = NULL;
+
+ return -ENOMEM;
+}
+
+static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
+ struct sk_buff *skb,
+ struct veth_xdp_tx_bq *bq,
+ struct veth_stats *stats)
+{
+ void *orig_data, *orig_data_end;
+ struct bpf_prog *xdp_prog;
+ struct xdp_buff xdp;
+ u32 act, metalen;
+ int off;
+
+ skb_prepare_for_gro(skb);
+
+ rcu_read_lock();
+ xdp_prog = rcu_dereference(rq->xdp_prog);
+ if (unlikely(!xdp_prog)) {
+ rcu_read_unlock();
+ goto out;
+ }
+
+ __skb_push(skb, skb->data - skb_mac_header(skb));
+ if (veth_convert_xdp_buff_from_skb(rq, &xdp, &skb))
+ goto drop;
orig_data = xdp.data;
orig_data_end = xdp.data_end;
@@ -770,7 +826,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
case XDP_PASS:
break;
case XDP_TX:
- get_page(virt_to_page(xdp.data));
+ veth_xdp_get(&xdp);
consume_skb(skb);
xdp.rxq->mem = rq->xdp_mem;
if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
@@ -782,7 +838,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
rcu_read_unlock();
goto xdp_xmit;
case XDP_REDIRECT:
- get_page(virt_to_page(xdp.data));
+ veth_xdp_get(&xdp);
consume_skb(skb);
xdp.rxq->mem = rq->xdp_mem;
if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
@@ -805,18 +861,24 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
rcu_read_unlock();
/* check if bpf_xdp_adjust_head was used */
- delta = orig_data - xdp.data;
- off = mac_len + delta;
+ off = orig_data - xdp.data;
if (off > 0)
__skb_push(skb, off);
else if (off < 0)
__skb_pull(skb, -off);
- skb->mac_header -= delta;
+
+ skb_reset_mac_header(skb);
/* check if bpf_xdp_adjust_tail was used */
off = xdp.data_end - orig_data_end;
if (off != 0)
__skb_put(skb, off); /* positive on grow, negative on shrink */
+
+ if (xdp_buff_has_frags(&xdp))
+ skb->data_len = skb_shinfo(skb)->xdp_frags_size;
+ else
+ skb->data_len = 0;
+
skb->protocol = eth_type_trans(skb, rq->dev);
metalen = xdp.data - xdp.data_meta;
@@ -832,7 +894,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
return NULL;
err_xdp:
rcu_read_unlock();
- page_frag_free(xdp.data);
+ xdp_return_buff(&xdp);
xdp_xmit:
return NULL;
}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 361df312ee7f..b5f2d428d856 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -528,6 +528,7 @@ void xdp_return_buff(struct xdp_buff *xdp)
out:
__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
}
+EXPORT_SYMBOL_GPL(xdp_return_buff);
/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
--
2.35.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v2 bpf-next 3/3] veth: allow jumbo frames in xdp mode
2022-02-15 13:08 [PATCH v2 bpf-next 0/3] introduce xdp frags support to veth driver Lorenzo Bianconi
2022-02-15 13:08 ` [PATCH v2 bpf-next 1/3] net: veth: account total xdp_frame len running ndo_xdp_xmit Lorenzo Bianconi
2022-02-15 13:08 ` [PATCH v2 bpf-next 2/3] veth: rework veth_xdp_rcv_skb in order to accept non-linear skb Lorenzo Bianconi
@ 2022-02-15 13:08 ` Lorenzo Bianconi
2022-02-22 17:21 ` Paolo Abeni
2 siblings, 1 reply; 6+ messages in thread
From: Lorenzo Bianconi @ 2022-02-15 13:08 UTC (permalink / raw)
To: bpf, netdev
Cc: davem, kuba, ast, daniel, brouer, toke, pabeni, echaudro,
lorenzo.bianconi, toshiaki.makita1, andrii
Allow increasing the MTU over page boundaries on veth devices
if the attached xdp program declares to support xdp fragments.
Enable NETIF_F_ALL_TSO when the device is running in xdp mode.
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
drivers/net/veth.c | 26 +++++++++++---------------
1 file changed, 11 insertions(+), 15 deletions(-)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index a45aaaecc21f..2e048f957bc6 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -292,8 +292,6 @@ static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb,
/* return true if the specified skb has chances of GRO aggregation
* Don't strive for accuracy, but try to avoid GRO overhead in the most
* common scenarios.
- * When XDP is enabled, all traffic is considered eligible, as the xmit
- * device has TSO off.
* When TSO is enabled on the xmit device, we are likely interested only
* in UDP aggregation, explicitly check for that if the skb is suspected
* - the sock_wfree destructor is used by UDP, ICMP and XDP sockets -
@@ -334,7 +332,8 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
* Don't bother with napi/GRO if the skb can't be aggregated
*/
use_napi = rcu_access_pointer(rq->napi) &&
- veth_skb_is_eligible_for_gro(dev, rcv, skb);
+ (rcu_access_pointer(rq->xdp_prog) ||
+ veth_skb_is_eligible_for_gro(dev, rcv, skb));
}
skb_tx_timestamp(skb);
@@ -1508,7 +1507,6 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
struct veth_priv *priv = netdev_priv(dev);
struct bpf_prog *old_prog;
struct net_device *peer;
- unsigned int max_mtu;
int err;
old_prog = priv->_xdp_prog;
@@ -1516,6 +1514,8 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
peer = rtnl_dereference(priv->peer);
if (prog) {
+ unsigned int max_mtu;
+
if (!peer) {
NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached");
err = -ENOTCONN;
@@ -1525,9 +1525,9 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
max_mtu = PAGE_SIZE - VETH_XDP_HEADROOM -
peer->hard_header_len -
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- if (peer->mtu > max_mtu) {
- NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP");
- err = -ERANGE;
+ if (!prog->aux->xdp_has_frags && peer->mtu > max_mtu) {
+ NL_SET_ERR_MSG_MOD(extack, "prog does not support XDP frags");
+ err = -EOPNOTSUPP;
goto err;
}
@@ -1545,10 +1545,8 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
}
}
- if (!old_prog) {
- peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
- peer->max_mtu = max_mtu;
- }
+ if (!old_prog)
+ peer->hw_features &= ~NETIF_F_GSO_FRAGLIST;
}
if (old_prog) {
@@ -1556,10 +1554,8 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
if (dev->flags & IFF_UP)
veth_disable_xdp(dev);
- if (peer) {
- peer->hw_features |= NETIF_F_GSO_SOFTWARE;
- peer->max_mtu = ETH_MAX_MTU;
- }
+ if (peer)
+ peer->hw_features |= NETIF_F_GSO_FRAGLIST;
}
bpf_prog_put(old_prog);
}
--
2.35.1
^ permalink raw reply related [flat|nested] 6+ messages in thread