bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
To: bpf@vger.kernel.org, ast@kernel.org, daniel@iogearbox.net,
	andrii@kernel.org
Cc: netdev@vger.kernel.org, magnus.karlsson@intel.com,
	bjorn@kernel.org, tirthendu.sarkar@intel.com
Subject: [PATCH v2 bpf-next 05/22] xsk: add support for AF_XDP multi-buffer on Rx path
Date: Mon, 29 May 2023 17:50:07 +0200	[thread overview]
Message-ID: <20230529155024.222213-6-maciej.fijalkowski@intel.com> (raw)
In-Reply-To: <20230529155024.222213-1-maciej.fijalkowski@intel.com>

From: Tirthendu Sarkar <tirthendu.sarkar@intel.com>

Add multi-buffer support for AF_XDP by extending the XDP multi-buffer
support to be reflected in user-space when a packet is redirected to
an AF_XDP socket.

In the XDP implementation, the NIC driver builds the xdp_buff from the
first frag of the packet and adds any subsequent frags in the skb_shinfo
area of the xdp_buff. In AF_XDP core, XDP buffers are allocated from
xdp_sock's pool and data is copied from the driver's xdp_buff and frags.

Once an allocated XDP buffer is full and there is still data to be
copied, the 'XDP_PKT_CONTD' flag in'options' field of the corresponding
xdp ring descriptor is set and passed to the application. When application
sees the aforementioned flag set it knows there is pending data for this
packet that will be carried in the following descriptors. If there is no
more data to be copied, the flag in 'options' field is cleared for that
descriptor signalling EOP to the application.

If application reads a batch of descriptors using for example the libxdp
interfaces, it is not guaranteed that the batch will end with a full
packet. It might end in the middle of a packet and the rest of the frames
of that packet will arrive at the beginning of the next batch.

AF_XDP ensures that only a complete packet (along with all its frags) is
sent to application.

Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
---
 net/core/filter.c |   7 +--
 net/xdp/xsk.c     | 110 ++++++++++++++++++++++++++++++++++++----------
 2 files changed, 88 insertions(+), 29 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 968139f4a1ac..07c2ad8f3df9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4344,13 +4344,8 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	enum bpf_map_type map_type = ri->map_type;
 
-	if (map_type == BPF_MAP_TYPE_XSKMAP) {
-		/* XDP_REDIRECT is not supported AF_XDP yet. */
-		if (unlikely(xdp_buff_has_frags(xdp)))
-			return -EOPNOTSUPP;
-
+	if (map_type == BPF_MAP_TYPE_XSKMAP)
 		return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
-	}
 
 	return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
 				       xdp_prog);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 22eeb7f6ac05..86d8b23ae0a7 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -159,43 +159,107 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 	return __xsk_rcv_zc(xs, xskb, len, 0);
 }
 
-static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len)
+static void *xsk_copy_xdp_start(struct xdp_buff *from)
 {
-	void *from_buf, *to_buf;
-	u32 metalen;
+	if (unlikely(xdp_data_meta_unsupported(from)))
+		return from->data;
+	else
+		return from->data_meta;
+}
 
-	if (unlikely(xdp_data_meta_unsupported(from))) {
-		from_buf = from->data;
-		to_buf = to->data;
-		metalen = 0;
-	} else {
-		from_buf = from->data_meta;
-		metalen = from->data - from->data_meta;
-		to_buf = to->data - metalen;
-	}
+static u32 xsk_copy_xdp(void *to, void **from, u32 to_len,
+			u32 *from_len, skb_frag_t **frag, u32 rem)
+{
+	u32 copied = 0;
+
+	while (1) {
+		u32 copy_len = min_t(u32, *from_len, to_len);
+
+		memcpy(to, *from, copy_len);
+		copied += copy_len;
+		if (rem == copied)
+			return copied;
+
+		if (*from_len == copy_len) {
+			*from = skb_frag_address(*frag);
+			*from_len = skb_frag_size((*frag)++);
+		} else {
+			*from += copy_len;
+			*from_len -= copy_len;
+		}
+		if (to_len == copy_len)
+			return copied;
 
-	memcpy(to_buf, from_buf, len + metalen);
+		to_len -= copy_len;
+		to += copy_len;
+	}
 }
 
 static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 {
+	u32 frame_size = xsk_pool_get_rx_frame_size(xs->pool);
+	void *copy_from = xsk_copy_xdp_start(xdp), *copy_to;
+	u32 from_len, meta_len, rem, num_desc;
 	struct xdp_buff_xsk *xskb;
 	struct xdp_buff *xsk_xdp;
-	int err;
+	skb_frag_t *frag;
+
+	from_len = xdp->data_end - copy_from;
+	meta_len = xdp->data - copy_from;
+	rem = len + meta_len;
+
+	if (len <= frame_size && !xdp_buff_has_frags(xdp)) {
+		int err;
 
-	xsk_xdp = xsk_buff_alloc(xs->pool);
-	if (!xsk_xdp) {
+		xsk_xdp = xsk_buff_alloc(xs->pool);
+		if (!xsk_xdp) {
+			xs->rx_dropped++;
+			return -ENOMEM;
+		}
+		memcpy(xsk_xdp->data - meta_len, copy_from, rem);
+		xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
+		err = __xsk_rcv_zc(xs, xskb, len, 0);
+		if (err) {
+			xsk_buff_free(xsk_xdp);
+			return err;
+		}
+
+		return 0;
+	}
+
+	num_desc = (len - 1) / frame_size + 1;
+
+	if (!xsk_buff_can_alloc(xs->pool, num_desc)) {
 		xs->rx_dropped++;
 		return -ENOMEM;
 	}
+	if (xskq_prod_nb_free(xs->rx, num_desc) < num_desc) {
+		xs->rx_queue_full++;
+		return -ENOBUFS;
+	}
 
-	xsk_copy_xdp(xsk_xdp, xdp, len);
-	xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
-	err = __xsk_rcv_zc(xs, xskb, len, 0);
-	if (err) {
-		xsk_buff_free(xsk_xdp);
-		return err;
+	if (xdp_buff_has_frags(xdp)) {
+		struct skb_shared_info *sinfo;
+
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		frag =  &sinfo->frags[0];
 	}
+
+	do {
+		u32 to_len = frame_size + meta_len;
+		u32 copied;
+
+		xsk_xdp = xsk_buff_alloc(xs->pool);
+		copy_to = xsk_xdp->data - meta_len;
+
+		copied = xsk_copy_xdp(copy_to, &copy_from, to_len, &from_len, &frag, rem);
+		rem -= copied;
+
+		xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
+		__xsk_rcv_zc(xs, xskb, copied - meta_len, rem ? XDP_PKT_CONTD : 0);
+		meta_len = 0;
+	} while (rem);
+
 	return 0;
 }
 
@@ -225,7 +289,7 @@ static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
 		return -EINVAL;
 
-	if (len > xsk_pool_get_rx_frame_size(xs->pool)) {
+	if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) {
 		xs->rx_dropped++;
 		return -ENOSPC;
 	}
-- 
2.35.3


  parent reply	other threads:[~2023-05-29 15:50 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-29 15:50 [PATCH v2 bpf-next 00/22] xsk: multi-buffer support Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 01/22] xsk: prepare 'options' in xdp_desc for multi-buffer use Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 02/22] xsk: introduce XSK_USE_SG bind flag for xsk socket Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 03/22] xsk: prepare both copy and zero-copy modes to co-exist Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 04/22] xsk: move xdp_buff's data length check to xsk_rcv_check Maciej Fijalkowski
2023-05-29 15:50 ` Maciej Fijalkowski [this message]
2023-05-29 15:50 ` [PATCH v2 bpf-next 06/22] xsk: introduce wrappers and helpers for supporting multi-buffer in Tx path Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 07/22] xsk: allow core/drivers to test EOP bit Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 08/22] xsk: add support for AF_XDP multi-buffer on Tx path Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 09/22] xsk: discard zero length descriptors in " Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 10/22] xsk: support mbuf on ZC RX Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 11/22] ice: xsk: add RX multi-buffer support Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 12/22] xsk: support ZC Tx multi-buffer in batch API Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 13/22] xsk: report ZC multi-buffer capability via xdp_features Maciej Fijalkowski
2023-05-30 11:55   ` Simon Horman
2023-05-30 12:31     ` Maciej Fijalkowski
2023-05-30 19:57       ` Simon Horman
2023-05-31 10:16   ` Magnus Karlsson
2023-05-31 13:20     ` Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 14/22] ice: xsk: Tx multi-buffer support Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 15/22] xsk: add multi-buffer documentation Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 16/22] selftests/xsk: transmit and receive multi-buffer packets Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 17/22] selftests/xsk: add basic multi-buffer test Maciej Fijalkowski
2023-06-01  5:46   ` kernel test robot
2023-05-29 15:50 ` [PATCH v2 bpf-next 18/22] selftests/xsk: add unaligned mode test for multi-buffer Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 19/22] selftests/xsk: add invalid descriptor " Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 20/22] selftests/xsk: add metadata copy test for multi-buff Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 21/22] selftests/xsk: add test for too many frags Maciej Fijalkowski
2023-05-29 15:50 ` [PATCH v2 bpf-next 22/22] selftests/xsk: reset NIC settings to default after running test suite Maciej Fijalkowski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230529155024.222213-6-maciej.fijalkowski@intel.com \
    --to=maciej.fijalkowski@intel.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bjorn@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=tirthendu.sarkar@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).