netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
To: magnus.karlsson@intel.com
Cc: "Björn Töpel" <bjorn.topel@intel.com>,
	"Jonathan Lemon" <jonathan.lemon@gmail.com>,
	"David S. Miller" <davem@davemloft.net>,
	"Jakub Kicinski" <kuba@kernel.org>,
	"Alexei Starovoitov" <ast@kernel.org>,
	"Daniel Borkmann" <daniel@iogearbox.net>,
	"Jesper Dangaard Brouer" <hawk@kernel.org>,
	"John Fastabend" <john.fastabend@gmail.com>,
	"Andrii Nakryiko" <andrii@kernel.org>,
	"Martin KaFai Lau" <kafai@fb.com>,
	"Song Liu" <songliubraving@fb.com>, "Yonghong Song" <yhs@fb.com>,
	"KP Singh" <kpsingh@kernel.org>,
	netdev@vger.kernel.org (open list:XDP SOCKETS (AF_XDP)),
	bpf@vger.kernel.org (open list:XDP SOCKETS (AF_XDP)),
	linux-kernel@vger.kernel.org (open list)
Subject: [PATCH bpf-next] xsk: build skb by page
Date: Tue, 29 Dec 2020 16:32:41 +0800	[thread overview]
Message-ID: <3fab080e36b322b6749190ad6054d53c67658050.1609230615.git.xuanzhuo@linux.alibaba.com> (raw)
In-Reply-To: <CAJ8uoz2Enx-WwY6RmCp0RXBG2U3BUpagw-X8hQChPResHCM-XA@mail.gmail.com>

This patch is used to construct skb based on page to save memory copy
overhead.

Taking into account the problem of addr unaligned, and the
possibility of frame size greater than page in the future.

The test environment is Aliyun ECS server.
Test cmd:
```
xdpsock -i eth0 -t  -S -s <msg size>
```

Test result data:

size 	64	512	1024	1500
copy	1916747	1775988	1600203	1440054
page	1974058	1953655	1945463	1904478
percent	3.0%	10.0%	21.58%	32.3%

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 net/xdp/xsk.c | 68 ++++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 17 deletions(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ac4a317..7cab40f 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -430,6 +430,55 @@ static void xsk_destruct_skb(struct sk_buff *skb)
 	sock_wfree(skb);
 }
 
+static struct sk_buff *xsk_build_skb_bypage(struct xdp_sock *xs, struct xdp_desc *desc)
+{
+	char *buffer;
+	u64 addr;
+	u32 len, offset, copy, copied;
+	int err, i;
+	struct page *page;
+	struct sk_buff *skb;
+
+	skb = sock_alloc_send_skb(&xs->sk, 0, 1, &err);
+	if (unlikely(!skb))
+		return NULL;
+
+	addr = desc->addr;
+	len = desc->len;
+
+	buffer = xsk_buff_raw_get_data(xs->pool, addr);
+	offset = offset_in_page(buffer);
+	addr = buffer - (char *)xs->pool->addrs;
+
+	for (copied = 0, i = 0; copied < len; ++i) {
+		page = xs->pool->umem->pgs[addr >> PAGE_SHIFT];
+
+		get_page(page);
+
+		copy = min((u32)(PAGE_SIZE - offset), len - copied);
+
+		skb_fill_page_desc(skb, i, page, offset, copy);
+
+		copied += copy;
+		addr += copy;
+		offset = 0;
+	}
+
+	skb->len += len;
+	skb->data_len += len;
+	skb->truesize += len;
+
+	refcount_add(len, &xs->sk.sk_wmem_alloc);
+
+	skb->dev = xs->dev;
+	skb->priority = xs->sk.sk_priority;
+	skb->mark = xs->sk.sk_mark;
+	skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
+	skb->destructor = xsk_destruct_skb;
+
+	return skb;
+}
+
 static int xsk_generic_xmit(struct sock *sk)
 {
 	struct xdp_sock *xs = xdp_sk(sk);
@@ -445,40 +494,25 @@ static int xsk_generic_xmit(struct sock *sk)
 		goto out;
 
 	while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
-		char *buffer;
-		u64 addr;
-		u32 len;
-
 		if (max_batch-- == 0) {
 			err = -EAGAIN;
 			goto out;
 		}
 
-		len = desc.len;
-		skb = sock_alloc_send_skb(sk, len, 1, &err);
+		skb = xsk_build_skb_bypage(xs, &desc);
 		if (unlikely(!skb))
 			goto out;
 
-		skb_put(skb, len);
-		addr = desc.addr;
-		buffer = xsk_buff_raw_get_data(xs->pool, addr);
-		err = skb_store_bits(skb, 0, buffer, len);
 		/* This is the backpressure mechanism for the Tx path.
 		 * Reserve space in the completion queue and only proceed
 		 * if there is space in it. This avoids having to implement
 		 * any buffering in the Tx path.
 		 */
-		if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
+		if (xskq_prod_reserve(xs->pool->cq)) {
 			kfree_skb(skb);
 			goto out;
 		}
 
-		skb->dev = xs->dev;
-		skb->priority = sk->sk_priority;
-		skb->mark = sk->sk_mark;
-		skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
-		skb->destructor = xsk_destruct_skb;
-
 		err = __dev_direct_xmit(skb, xs->queue_id);
 		if  (err == NETDEV_TX_BUSY) {
 			/* Tell user-space to retry the send */
-- 
1.8.3.1


  reply	other threads:[~2020-12-29  8:33 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-23  8:56 [PATCH bpf-next] xsk: build skb by page Xuan Zhuo
2020-12-23 10:04 ` Magnus Karlsson
2020-12-29  8:32   ` Xuan Zhuo [this message]
2020-12-31 16:29 ` John Fastabend
2021-01-16  2:44 Xuan Zhuo
2021-01-16  5:24 ` kernel test robot
2021-01-16  8:15 ` kernel test robot
2021-01-17 21:55 ` John Fastabend
2021-01-18  9:25 ` Magnus Karlsson
2021-01-18 11:57 ` Michael S. Tsirkin
2021-01-18 12:37 ` Alexander Lobakin
2021-01-18 12:40 ` Yunsheng Lin
2021-01-18 13:00   ` Alexander Lobakin
2021-01-18 14:40     ` Alexander Lobakin
2021-01-18 15:03       ` Magnus Karlsson
2021-01-18 15:10       ` Magnus Karlsson
2021-01-18 16:38         ` Alexander Lobakin
2021-01-19  7:01           ` Magnus Karlsson
2021-01-19 12:44 ` Alexander Lobakin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3fab080e36b322b6749190ad6054d53c67658050.1609230615.git.xuanzhuo@linux.alibaba.com \
    --to=xuanzhuo@linux.alibaba.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bjorn.topel@intel.com \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=hawk@kernel.org \
    --cc=john.fastabend@gmail.com \
    --cc=jonathan.lemon@gmail.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=songliubraving@fb.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).