bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
To: bpf@vger.kernel.org, ast@kernel.org, daniel@iogearbox.net,
	andrii@kernel.org
Cc: netdev@vger.kernel.org, magnus.karlsson@intel.com,
	bjorn@kernel.org, toke@kernel.org, kuba@kernel.org,
	horms@kernel.org, tirthendu.sarkar@intel.com
Subject: [PATCH v7 bpf-next 06/24] xsk: introduce wrappers and helpers for supporting multi-buffer in Tx path
Date: Wed, 19 Jul 2023 15:24:03 +0200	[thread overview]
Message-ID: <20230719132421.584801-7-maciej.fijalkowski@intel.com> (raw)
In-Reply-To: <20230719132421.584801-1-maciej.fijalkowski@intel.com>

From: Tirthendu Sarkar <tirthendu.sarkar@intel.com>

In Tx path, xsk core reserves space for each desc to be transmitted in
the completion queue and it's address contained in it is stored in the
skb destructor arg. After successful transmission the skb destructor
submits the addr marking completion.

To handle multiple descriptors per packet, now along with reserving
space for each descriptor, the corresponding address is also stored in
completion queue. The number of pending descriptors are stored in skb
destructor arg and is used by the skb destructor to update completions.

Introduce 'skb' in xdp_sock to store a partially built packet when
__xsk_generic_xmit() must return before it sees the EOP descriptor for
the current packet so that packet building can resume in next call of
__xsk_generic_xmit().

Helper functions are introduced to set and get the pending descriptors
in the skb destructor arg. Also, wrappers are introduced for storing
descriptor addresses, submitting and cancelling (for unsuccessful
transmissions) the number of completions.

Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
---
 include/net/xdp_sock.h |  6 ++++
 net/xdp/xsk.c          | 74 ++++++++++++++++++++++++++++++------------
 net/xdp/xsk_queue.h    | 19 ++++-------
 3 files changed, 67 insertions(+), 32 deletions(-)

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 36b0411a0d1b..1617af380162 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -68,6 +68,12 @@ struct xdp_sock {
 	u64 rx_dropped;
 	u64 rx_queue_full;
 
+	/* When __xsk_generic_xmit() must return before it sees the EOP descriptor for the current
+	 * packet, the partially built skb is saved here so that packet building can resume in next
+	 * call of __xsk_generic_xmit().
+	 */
+	struct sk_buff *skb;
+
 	struct list_head map_list;
 	/* Protects map_list */
 	spinlock_t map_list_lock;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 01c134b1186e..ed7f86300087 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -480,19 +480,65 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
 	return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
 }
 
-static void xsk_destruct_skb(struct sk_buff *skb)
+static int xsk_cq_reserve_addr_locked(struct xdp_sock *xs, u64 addr)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&xs->pool->cq_lock, flags);
+	ret = xskq_prod_reserve_addr(xs->pool->cq, addr);
+	spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+
+	return ret;
+}
+
+static void xsk_cq_submit_locked(struct xdp_sock *xs, u32 n)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&xs->pool->cq_lock, flags);
+	xskq_prod_submit_n(xs->pool->cq, n);
+	spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+}
+
+static void xsk_cq_cancel_locked(struct xdp_sock *xs, u32 n)
 {
-	u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
-	struct xdp_sock *xs = xdp_sk(skb->sk);
 	unsigned long flags;
 
 	spin_lock_irqsave(&xs->pool->cq_lock, flags);
-	xskq_prod_submit_addr(xs->pool->cq, addr);
+	xskq_prod_cancel_n(xs->pool->cq, n);
 	spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+}
+
+static u32 xsk_get_num_desc(struct sk_buff *skb)
+{
+	return skb ? (long)skb_shinfo(skb)->destructor_arg : 0;
+}
 
+static void xsk_destruct_skb(struct sk_buff *skb)
+{
+	xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
 	sock_wfree(skb);
 }
 
+static void xsk_set_destructor_arg(struct sk_buff *skb)
+{
+	long num = xsk_get_num_desc(xdp_sk(skb->sk)->skb) + 1;
+
+	skb_shinfo(skb)->destructor_arg = (void *)num;
+}
+
+static void xsk_consume_skb(struct sk_buff *skb)
+{
+	struct xdp_sock *xs = xdp_sk(skb->sk);
+
+	skb->destructor = sock_wfree;
+	xsk_cq_cancel_locked(xs, xsk_get_num_desc(skb));
+	/* Free skb without triggering the perf drop trace */
+	consume_skb(skb);
+	xs->skb = NULL;
+}
+
 static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
 					      struct xdp_desc *desc)
 {
@@ -578,8 +624,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
 	skb->dev = dev;
 	skb->priority = xs->sk.sk_priority;
 	skb->mark = xs->sk.sk_mark;
-	skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
 	skb->destructor = xsk_destruct_skb;
+	xsk_set_destructor_arg(skb);
 
 	return skb;
 }
@@ -591,7 +637,6 @@ static int __xsk_generic_xmit(struct sock *sk)
 	bool sent_frame = false;
 	struct xdp_desc desc;
 	struct sk_buff *skb;
-	unsigned long flags;
 	int err = 0;
 
 	mutex_lock(&xs->mutex);
@@ -616,31 +661,20 @@ static int __xsk_generic_xmit(struct sock *sk)
 		 * if there is space in it. This avoids having to implement
 		 * any buffering in the Tx path.
 		 */
-		spin_lock_irqsave(&xs->pool->cq_lock, flags);
-		if (xskq_prod_reserve(xs->pool->cq)) {
-			spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+		if (xsk_cq_reserve_addr_locked(xs, desc.addr))
 			goto out;
-		}
-		spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
 
 		skb = xsk_build_skb(xs, &desc);
 		if (IS_ERR(skb)) {
 			err = PTR_ERR(skb);
-			spin_lock_irqsave(&xs->pool->cq_lock, flags);
-			xskq_prod_cancel(xs->pool->cq);
-			spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+			xsk_cq_cancel_locked(xs, 1);
 			goto out;
 		}
 
 		err = __dev_direct_xmit(skb, xs->queue_id);
 		if  (err == NETDEV_TX_BUSY) {
 			/* Tell user-space to retry the send */
-			skb->destructor = sock_wfree;
-			spin_lock_irqsave(&xs->pool->cq_lock, flags);
-			xskq_prod_cancel(xs->pool->cq);
-			spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
-			/* Free skb without triggering the perf drop trace */
-			consume_skb(skb);
+			xsk_consume_skb(skb);
 			err = -EAGAIN;
 			goto out;
 		}
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 2f1aacae81af..423ad7368fa2 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -297,6 +297,11 @@ static inline void xskq_cons_release(struct xsk_queue *q)
 	q->cached_cons++;
 }
 
+static inline void xskq_cons_cancel_n(struct xsk_queue *q, u32 cnt)
+{
+	q->cached_cons -= cnt;
+}
+
 static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
 {
 	/* No barriers needed since data is not accessed */
@@ -324,9 +329,9 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q)
 	return xskq_prod_nb_free(q, 1) ? false : true;
 }
 
-static inline void xskq_prod_cancel(struct xsk_queue *q)
+static inline void xskq_prod_cancel_n(struct xsk_queue *q, u32 cnt)
 {
-	q->cached_prod--;
+	q->cached_prod -= cnt;
 }
 
 static inline int xskq_prod_reserve(struct xsk_queue *q)
@@ -392,16 +397,6 @@ static inline void xskq_prod_submit(struct xsk_queue *q)
 	__xskq_prod_submit(q, q->cached_prod);
 }
 
-static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr)
-{
-	struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
-	u32 idx = q->ring->producer;
-
-	ring->desc[idx++ & q->ring_mask] = addr;
-
-	__xskq_prod_submit(q, idx);
-}
-
 static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries)
 {
 	__xskq_prod_submit(q, q->ring->producer + nb_entries);
-- 
2.34.1


  parent reply	other threads:[~2023-07-19 13:25 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-19 13:23 [PATCH v7 bpf-next 00/24] xsk: multi-buffer support Maciej Fijalkowski
2023-07-19 13:23 ` [PATCH v7 bpf-next 01/24] xsk: prepare 'options' in xdp_desc for multi-buffer use Maciej Fijalkowski
2023-07-19 13:23 ` [PATCH v7 bpf-next 02/24] xsk: introduce XSK_USE_SG bind flag for xsk socket Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 03/24] xsk: prepare both copy and zero-copy modes to co-exist Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 04/24] xsk: move xdp_buff's data length check to xsk_rcv_check Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 05/24] xsk: add support for AF_XDP multi-buffer on Rx path Maciej Fijalkowski
2023-07-19 13:24 ` Maciej Fijalkowski [this message]
2023-07-19 13:24 ` [PATCH v7 bpf-next 07/24] xsk: allow core/drivers to test EOP bit Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 08/24] xsk: add support for AF_XDP multi-buffer on Tx path Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 09/24] xsk: discard zero length descriptors in " Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 10/24] xsk: add new netlink attribute dedicated for ZC max frags Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 11/24] xsk: support mbuf on ZC RX Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 12/24] ice: xsk: add RX multi-buffer support Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 13/24] i40e: " Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 14/24] xsk: support ZC Tx multi-buffer in batch API Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 15/24] ice: xsk: Tx multi-buffer support Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 16/24] i40e: xsk: add TX " Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 17/24] xsk: add multi-buffer documentation Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 18/24] selftests/xsk: transmit and receive multi-buffer packets Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 19/24] selftests/xsk: add basic multi-buffer test Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 20/24] selftests/xsk: add unaligned mode test for multi-buffer Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 21/24] selftests/xsk: add invalid descriptor " Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 22/24] selftests/xsk: add metadata copy test for multi-buff Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 23/24] selftests/xsk: add test for too many frags Maciej Fijalkowski
2023-07-19 13:24 ` [PATCH v7 bpf-next 24/24] selftests/xsk: reset NIC settings to default after running test suite Maciej Fijalkowski
2023-07-19 17:10 ` [PATCH v7 bpf-next 00/24] xsk: multi-buffer support patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230719132421.584801-7-maciej.fijalkowski@intel.com \
    --to=maciej.fijalkowski@intel.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bjorn@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=horms@kernel.org \
    --cc=kuba@kernel.org \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=tirthendu.sarkar@intel.com \
    --cc=toke@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).