bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next] virtio-net: Refactor the code related to page_to_skb()
@ 2021-05-19  9:00 Xuan Zhuo
  2021-05-31  6:24 ` Jason Wang
  0 siblings, 1 reply; 2+ messages in thread
From: Xuan Zhuo @ 2021-05-19  9:00 UTC (permalink / raw)
  To: netdev
  Cc: Michael S. Tsirkin, Jason Wang, David S. Miller, Jakub Kicinski,
	Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
	John Fastabend, Andrii Nakryiko, Martin KaFai Lau, Song Liu,
	Yonghong Song, KP Singh, virtualization, bpf

Due to long-term development, the current code structure of
page_to_skb() and the semantic expression of variables are rather
chaotic, so it is necessary to reconstruct this piece of logic.

The code has been tested with the following test code.
Call test_merge and test_big to test "merge" and "big" modes
respectively.

================================= test.sh ========================
function test()
{
    flag=$1
    max=$2
    inc=$3
    if [ "x$max" == "x" ]
    then
        max=4096
    fi
    echo $max

    for s in $(seq 64 $inc $max)
    do
        echo >> log
        echo $flag :UDP_STREAM $s >> log
        netperf -H 192.168.122.202 -l 5 -t UDP_STREAM -- -m $s >> log
        echo $flag :UDP_STREAM $s $?
    done

    for s in $(seq 64 $inc $max)
    do
        echo >> log
        echo $flag :TCP_STREAM $s >> log
        netperf -H 192.168.122.202 -l 5 -t TCP_STREAM -- -m $s >> log
        echo $flag :TCP_STREAM $s $?
    done
}

function test_merge()
{
    XDP='no-xdp'

    ssh root@192.168.122.202 ip link set dev eth0 xdp off
    test $XDP

    XDP='xdp-pass'

    ssh root@192.168.122.202 ip link set dev eth0 xdp off
    ssh root@192.168.122.202 ip link set dev eth0 xdp object xdp.o sec xdp
    test $XDP

    XDP='xdp-meta'

    ssh root@192.168.122.202 ip link set dev eth0 xdp off
    ssh root@192.168.122.202 ip link set dev eth0 xdp object xdp_meta.o sec xdp_mark
    test $XDP
}

function test_big()
{
    # set host net dev mtu to 60000
    test "big" 5000
    test "big" 60000 10
}

ssh root@192.168.122.202 ./netserver
echo '' > log

test_merge # or test_big
================================== xdp.c =================================

SEC("xdp")
int _xdp(struct xdp_md *xdp)
{
        return XDP_PASS;
}

char _license[] SEC("license") = "GPL";

================================== xdp_meta.c =================================

static long (*bpf_xdp_adjust_meta)(struct xdp_md *xdp_md, int delta) = (void *) 54;

struct meta_info {
        __u32 mark;
} __attribute__((aligned(4)));

SEC("xdp_mark")
int _xdp_mark(struct xdp_md *ctx)
{
        struct meta_info *meta;
        void *data, *data_end;
        int ret;

        ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta));
        if (ret < 0)
                return XDP_ABORTED;

        data = (void *)(unsigned long)ctx->data;

        /* Check data_meta have room for meta_info struct */
        meta = (void *)(unsigned long)ctx->data_meta;
        if ((void *)(meta + 1) > data)
                return XDP_ABORTED;

        meta->mark = 42;

        return XDP_PASS;
}

char _license[] SEC("license") = "GPL";
===================================================================

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/net/virtio_net.c | 309 +++++++++++++++++++++++----------------
 1 file changed, 185 insertions(+), 124 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 7fda2ae4c40f..a117b3496653 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -249,6 +249,35 @@ struct padded_vnet_hdr {
 	char padding[4];
 };
 
+struct virtnet_page_info {
+	struct virtnet_info *vi;
+	struct receive_queue *rq;
+
+	/* this may be the head_page, buf not starts with this page */
+	struct page *page;
+
+	/* the allcated buf. this may point to the headroom */
+	char *buf;
+
+	/* the size of the buf */
+	unsigned int buf_size;
+
+	/* OUT. the offset of the remaining data in the page */
+	unsigned int offset;
+
+	char *virtnet_hdr;
+
+	/* packet data. generally point to eth header */
+	char *packet;
+
+	/* IN. packet len without virtnet hdr
+	 * OUT. the size of the remaining data
+	 */
+	unsigned int len;
+
+	unsigned int metasize;
+};
+
 static bool is_xdp_frame(void *ptr)
 {
 	return (unsigned long)ptr & VIRTIO_XDP_FLAG;
@@ -357,6 +386,89 @@ static void skb_xmit_done(struct virtqueue *vq)
 		netif_wake_subqueue(vi->dev, vq2txq(vq));
 }
 
+static struct sk_buff *virtnet_page_to_skb(struct virtnet_page_info *pinfo)
+{
+	int shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	struct virtio_net_hdr_mrg_rxbuf *hdr;
+	struct sk_buff *skb;
+	int tailroom, copy;
+
+	/* In the case of "big", tailroom may be negative, because len can be
+	 * greater than PAGE_SIZE.
+	 */
+	tailroom = pinfo->buf + pinfo->buf_size - (pinfo->packet + pinfo->len);
+
+	if (!NET_IP_ALIGN && tailroom >= shinfo_size) {
+		skb = build_skb(pinfo->buf, pinfo->buf_size);
+		if (unlikely(!skb))
+			return NULL;
+
+		skb_reserve(skb, pinfo->packet - pinfo->buf);
+		skb_put(skb, pinfo->len);
+
+		/* mark. page has been used. */
+		pinfo->page = NULL;
+	} else {
+		/* copy small data so we can reuse these pages for small data
+		 *
+		 * GOOD_COPY_LEN is used to save network headers, such as eth
+		 * header, ip header, tcp header. If you want to save metadata
+		 * information, we should apply for a larger space. Prevent the
+		 * network header cannot fit in the linear space.
+		 */
+		skb = napi_alloc_skb(&pinfo->rq->napi,
+				     pinfo->metasize + GOOD_COPY_LEN);
+		if (unlikely(!skb))
+			return NULL;
+
+		/* Copy all frame if it fits skb->head, otherwise
+		 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
+		 */
+		if (pinfo->len <= GOOD_COPY_LEN)
+			copy = pinfo->len;
+		else
+			copy = ETH_HLEN;
+
+		skb_put_data(skb, pinfo->packet - pinfo->metasize,
+			     copy + pinfo->metasize);
+		__skb_pull(skb, pinfo->metasize);
+		pinfo->len -= copy;
+		pinfo->offset = pinfo->packet + copy -
+				(char *)page_address(pinfo->page);
+	}
+
+	if (pinfo->metasize)
+		skb_metadata_set(skb, pinfo->metasize);
+
+	if (pinfo->virtnet_hdr) {
+		hdr = skb_vnet_hdr(skb);
+		memcpy(hdr, pinfo->virtnet_hdr, pinfo->vi->hdr_len);
+	}
+
+	return skb;
+}
+
+static struct sk_buff *virtnet_merge_page_to_skb(struct virtnet_page_info *pinfo)
+{
+	struct sk_buff *skb;
+
+	skb = virtnet_page_to_skb(pinfo);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* page has been used by build_skb() */
+	if (!pinfo->page)
+		return skb;
+
+	if (pinfo->len)
+		skb_add_rx_frag(skb, 0, pinfo->page, pinfo->offset, pinfo->len,
+				pinfo->buf_size);
+	else
+		put_page(pinfo->page);
+
+	return skb;
+}
+
 #define MRG_CTX_HEADER_SHIFT 22
 static void *mergeable_len_to_ctx(unsigned int truesize,
 				  unsigned int headroom)
@@ -375,86 +487,30 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
 }
 
 /* Called from bottom half context */
-static struct sk_buff *page_to_skb(struct virtnet_info *vi,
-				   struct receive_queue *rq,
-				   struct page *page, unsigned int offset,
-				   unsigned int len, unsigned int truesize,
-				   bool hdr_valid, unsigned int metasize,
-				   unsigned int headroom)
+static struct sk_buff *virtnet_big_page_to_skb(struct virtnet_page_info *pinfo)
 {
+	unsigned int len, offset, truesize;
+	struct receive_queue *rq;
 	struct sk_buff *skb;
-	struct virtio_net_hdr_mrg_rxbuf *hdr;
-	unsigned int copy, hdr_len, hdr_padded_len;
-	struct page *page_to_free = NULL;
-	int tailroom, shinfo_size;
-	char *p, *hdr_p, *buf;
+	struct page *page;
 
-	p = page_address(page) + offset;
-	hdr_p = p;
+	/* save next page */
+	page = (struct page *)pinfo->page->private;
 
-	hdr_len = vi->hdr_len;
-	if (vi->mergeable_rx_bufs)
-		hdr_padded_len = sizeof(*hdr);
-	else
-		hdr_padded_len = sizeof(struct padded_vnet_hdr);
-
-	/* If headroom is not 0, there is an offset between the beginning of the
-	 * data and the allocated space, otherwise the data and the allocated
-	 * space are aligned.
-	 */
-	if (headroom) {
-		/* Buffers with headroom use PAGE_SIZE as alloc size,
-		 * see add_recvbuf_mergeable() + get_mergeable_buf_len()
-		 */
-		truesize = PAGE_SIZE;
-		tailroom = truesize - len - offset;
-		buf = page_address(page);
-	} else {
-		tailroom = truesize - len;
-		buf = p;
-	}
-
-	len -= hdr_len;
-	offset += hdr_padded_len;
-	p += hdr_padded_len;
-
-	shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-
-	/* copy small packet so we can reuse these pages */
-	if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
-		skb = build_skb(buf, truesize);
-		if (unlikely(!skb))
-			return NULL;
-
-		skb_reserve(skb, p - buf);
-		skb_put(skb, len);
-		goto ok;
-	}
-
-	/* copy small packet so we can reuse these pages for small data */
-	skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
+	skb = virtnet_page_to_skb(pinfo);
 	if (unlikely(!skb))
 		return NULL;
 
-	/* Copy all frame if it fits skb->head, otherwise
-	 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
-	 */
-	if (len <= skb_tailroom(skb))
-		copy = len;
-	else
-		copy = ETH_HLEN + metasize;
-	skb_put_data(skb, p, copy);
+	rq = pinfo->rq;
 
-	len -= copy;
-	offset += copy;
+	/* page has been used by build_skb() */
+	if (!pinfo->page)
+		goto end;
 
-	if (vi->mergeable_rx_bufs) {
-		if (len)
-			skb_add_rx_frag(skb, 0, page, offset, len, truesize);
-		else
-			page_to_free = page;
-		goto ok;
-	}
+	page     = pinfo->page;
+	len      = pinfo->len;
+	offset   = pinfo->offset;
+	truesize = pinfo->buf_size;
 
 	/*
 	 * Verify that we can indeed put this data into a skb.
@@ -477,23 +533,10 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 		offset = 0;
 	}
 
+end:
 	if (page)
 		give_pages(rq, page);
 
-ok:
-	/* hdr_valid means no XDP, so we can copy the vnet header */
-	if (hdr_valid) {
-		hdr = skb_vnet_hdr(skb);
-		memcpy(hdr, hdr_p, hdr_len);
-	}
-	if (page_to_free)
-		put_page(page_to_free);
-
-	if (metasize) {
-		__skb_pull(skb, metasize);
-		skb_metadata_set(skb, metasize);
-	}
-
 	return skb;
 }
 
@@ -654,17 +697,17 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
  */
 static struct page *xdp_linearize_page(struct receive_queue *rq,
 				       u16 *num_buf,
-				       struct page *p,
-				       int offset,
+				       void *buf,
 				       int page_off,
 				       unsigned int *len)
 {
 	struct page *page = alloc_page(GFP_ATOMIC);
+	struct page *p;
 
 	if (!page)
 		return NULL;
 
-	memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
+	memcpy(page_address(page) + page_off, buf, *len);
 	page_off += *len;
 
 	while (--*num_buf) {
@@ -739,18 +782,18 @@ static struct sk_buff *receive_small(struct net_device *dev,
 			goto err_xdp;
 
 		if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
-			int offset = buf - page_address(page) + header_offset;
 			unsigned int tlen = len + vi->hdr_len;
 			u16 num_buf = 1;
 
+			buf += header_offset;
+
 			xdp_headroom = virtnet_get_headroom(vi);
 			header_offset = VIRTNET_RX_PAD + xdp_headroom;
 			headroom = vi->hdr_len + header_offset;
 			buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
 				 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-			xdp_page = xdp_linearize_page(rq, &num_buf, page,
-						      offset, header_offset,
-						      &tlen);
+			xdp_page = xdp_linearize_page(rq, &num_buf, buf,
+						      header_offset, &tlen);
 			if (!xdp_page)
 				goto err_xdp;
 
@@ -842,9 +885,21 @@ static struct sk_buff *receive_big(struct net_device *dev,
 				   unsigned int len,
 				   struct virtnet_rq_stats *stats)
 {
+	struct virtnet_page_info pinfo;
 	struct page *page = buf;
-	struct sk_buff *skb =
-		page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0, 0);
+	struct sk_buff *skb;
+
+	pinfo.rq          = rq;
+	pinfo.vi          = vi;
+	pinfo.page        = page;
+	pinfo.buf         = page_address(page);
+	pinfo.buf_size    = PAGE_SIZE;
+	pinfo.virtnet_hdr = pinfo.buf;
+	pinfo.packet      = pinfo.virtnet_hdr + sizeof(struct padded_vnet_hdr);
+	pinfo.len         = len - vi->hdr_len;
+	pinfo.metasize    = 0;
+
+	skb = virtnet_big_page_to_skb(&pinfo);
 
 	stats->bytes += len - vi->hdr_len;
 	if (unlikely(!skb))
@@ -870,12 +925,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
 	u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
 	struct page *page = virt_to_head_page(buf);
-	int offset = buf - page_address(page);
 	struct sk_buff *head_skb, *curr_skb;
+	struct virtnet_page_info pinfo;
 	struct bpf_prog *xdp_prog;
 	unsigned int truesize = mergeable_ctx_to_truesize(ctx);
 	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
-	unsigned int metasize = 0;
 	unsigned int frame_sz;
 	int err;
 
@@ -887,8 +941,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	if (xdp_prog) {
 		struct xdp_frame *xdpf;
 		struct page *xdp_page;
+		void *hard_start;
 		struct xdp_buff xdp;
-		void *data;
 		u32 act;
 
 		/* Transient failure which in theory could occur if
@@ -912,54 +966,47 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		if (unlikely(num_buf > 1 ||
 			     headroom < virtnet_get_headroom(vi))) {
 			/* linearize data for XDP */
-			xdp_page = xdp_linearize_page(rq, &num_buf,
-						      page, offset,
+			xdp_page = xdp_linearize_page(rq, &num_buf, buf,
 						      VIRTIO_XDP_HEADROOM,
 						      &len);
 			frame_sz = PAGE_SIZE;
 
 			if (!xdp_page)
 				goto err_xdp;
-			offset = VIRTIO_XDP_HEADROOM;
+
+			hard_start = page_address(xdp_page) + vi->hdr_len;
 		} else {
 			xdp_page = page;
+			hard_start = buf + vi->hdr_len - VIRTIO_XDP_HEADROOM;
 		}
 
 		/* Allow consuming headroom but reserve enough space to push
 		 * the descriptor on if we get an XDP_TX return code.
 		 */
-		data = page_address(xdp_page) + offset;
 		xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq);
-		xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + vi->hdr_len,
-				 VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true);
+		xdp_prepare_buff(&xdp, hard_start, VIRTIO_XDP_HEADROOM,
+				 len - vi->hdr_len, true);
 
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 		stats->xdp_packets++;
 
 		switch (act) {
 		case XDP_PASS:
-			metasize = xdp.data - xdp.data_meta;
-
-			/* recalculate offset to account for any header
-			 * adjustments and minus the metasize to copy the
-			 * metadata in page_to_skb(). Note other cases do not
-			 * build an skb and avoid using offset
-			 */
-			offset = xdp.data - page_address(xdp_page) -
-				 vi->hdr_len - metasize;
-
-			/* recalculate len if xdp.data, xdp.data_end or
-			 * xdp.data_meta were adjusted
-			 */
-			len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
+			pinfo.rq          = rq;
+			pinfo.vi          = vi;
+			pinfo.page        = xdp_page;
+			pinfo.buf         = xdp.data_hard_start - vi->hdr_len;
+			pinfo.buf_size    = PAGE_SIZE;
+			pinfo.virtnet_hdr = NULL;
+			pinfo.packet      = xdp.data;
+			pinfo.len         = xdp.data_end - xdp.data;
+			pinfo.metasize    = xdp.data - xdp.data_meta;
 			/* We can only create skb based on xdp_page. */
 			if (unlikely(xdp_page != page)) {
 				rcu_read_unlock();
 				put_page(page);
-				head_skb = page_to_skb(vi, rq, xdp_page, offset,
-						       len, PAGE_SIZE, false,
-						       metasize, headroom);
-				return head_skb;
+
+				return virtnet_merge_page_to_skb(&pinfo);
 			}
 			break;
 		case XDP_TX:
@@ -1005,8 +1052,22 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 				__free_pages(xdp_page, 0);
 			goto err_xdp;
 		}
+		rcu_read_unlock();
+
+		/* pinfo has been filled inside XDP_PASS */
+	} else {
+		rcu_read_unlock();
+
+		pinfo.rq          = rq;
+		pinfo.vi          = vi;
+		pinfo.page        = page;
+		pinfo.buf         = buf - headroom;
+		pinfo.buf_size    = headroom ? PAGE_SIZE : truesize;
+		pinfo.virtnet_hdr = buf;
+		pinfo.packet      = buf + sizeof(*hdr);
+		pinfo.len         = len - sizeof(*hdr);
+		pinfo.metasize    = 0;
 	}
-	rcu_read_unlock();
 
 	if (unlikely(len > truesize)) {
 		pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
@@ -1015,14 +1076,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		goto err_skb;
 	}
 
-	head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
-			       metasize, headroom);
+	head_skb = virtnet_merge_page_to_skb(&pinfo);
 	curr_skb = head_skb;
 
 	if (unlikely(!curr_skb))
 		goto err_skb;
 	while (--num_buf) {
 		int num_skb_frags;
+		int offset;
 
 		buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
 		if (unlikely(!buf)) {
-- 
2.31.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH net-next] virtio-net: Refactor the code related to page_to_skb()
  2021-05-19  9:00 [PATCH net-next] virtio-net: Refactor the code related to page_to_skb() Xuan Zhuo
@ 2021-05-31  6:24 ` Jason Wang
  0 siblings, 0 replies; 2+ messages in thread
From: Jason Wang @ 2021-05-31  6:24 UTC (permalink / raw)
  To: Xuan Zhuo, netdev
  Cc: Michael S. Tsirkin, David S. Miller, Jakub Kicinski,
	Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
	John Fastabend, Andrii Nakryiko, Martin KaFai Lau, Song Liu,
	Yonghong Song, KP Singh, virtualization, bpf


在 2021/5/19 下午5:00, Xuan Zhuo 写道:
> Due to long-term development, the current code structure of
> page_to_skb() and the semantic expression of variables are rather
> chaotic, so it is necessary to reconstruct this piece of logic.
>
> The code has been tested with the following test code.
> Call test_merge and test_big to test "merge" and "big" modes
> respectively.
>
> ================================= test.sh ========================
> function test()
> {
>      flag=$1
>      max=$2
>      inc=$3
>      if [ "x$max" == "x" ]
>      then
>          max=4096
>      fi
>      echo $max
>
>      for s in $(seq 64 $inc $max)
>      do
>          echo >> log
>          echo $flag :UDP_STREAM $s >> log
>          netperf -H 192.168.122.202 -l 5 -t UDP_STREAM -- -m $s >> log
>          echo $flag :UDP_STREAM $s $?
>      done
>
>      for s in $(seq 64 $inc $max)
>      do
>          echo >> log
>          echo $flag :TCP_STREAM $s >> log
>          netperf -H 192.168.122.202 -l 5 -t TCP_STREAM -- -m $s >> log
>          echo $flag :TCP_STREAM $s $?
>      done
> }
>
> function test_merge()
> {
>      XDP='no-xdp'
>
>      ssh root@192.168.122.202 ip link set dev eth0 xdp off
>      test $XDP
>
>      XDP='xdp-pass'
>
>      ssh root@192.168.122.202 ip link set dev eth0 xdp off
>      ssh root@192.168.122.202 ip link set dev eth0 xdp object xdp.o sec xdp
>      test $XDP
>
>      XDP='xdp-meta'
>
>      ssh root@192.168.122.202 ip link set dev eth0 xdp off
>      ssh root@192.168.122.202 ip link set dev eth0 xdp object xdp_meta.o sec xdp_mark
>      test $XDP
> }
>
> function test_big()
> {
>      # set host net dev mtu to 60000
>      test "big" 5000
>      test "big" 60000 10
> }
>
> ssh root@192.168.122.202 ./netserver
> echo '' > log
>
> test_merge # or test_big
> ================================== xdp.c =================================
>
> SEC("xdp")
> int _xdp(struct xdp_md *xdp)
> {
>          return XDP_PASS;
> }
>
> char _license[] SEC("license") = "GPL";
>
> ================================== xdp_meta.c =================================
>
> static long (*bpf_xdp_adjust_meta)(struct xdp_md *xdp_md, int delta) = (void *) 54;
>
> struct meta_info {
>          __u32 mark;
> } __attribute__((aligned(4)));
>
> SEC("xdp_mark")
> int _xdp_mark(struct xdp_md *ctx)
> {
>          struct meta_info *meta;
>          void *data, *data_end;
>          int ret;
>
>          ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta));
>          if (ret < 0)
>                  return XDP_ABORTED;
>
>          data = (void *)(unsigned long)ctx->data;
>
>          /* Check data_meta have room for meta_info struct */
>          meta = (void *)(unsigned long)ctx->data_meta;
>          if ((void *)(meta + 1) > data)
>                  return XDP_ABORTED;
>
>          meta->mark = 42;
>
>          return XDP_PASS;
> }
>
> char _license[] SEC("license") = "GPL";
> ===================================================================


I'm not sure virtio-net needs such a huge refactoring that just do the 
simplification of parameters passing for page_to_skb(). This will 
complicate stable backports in the future.

I suspect it may have performance regression.

Do you have any perf numbers? E.g pps.

Thanks


>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>   drivers/net/virtio_net.c | 309 +++++++++++++++++++++++----------------
>   1 file changed, 185 insertions(+), 124 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 7fda2ae4c40f..a117b3496653 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -249,6 +249,35 @@ struct padded_vnet_hdr {
>   	char padding[4];
>   };
>   
> +struct virtnet_page_info {
> +	struct virtnet_info *vi;
> +	struct receive_queue *rq;
> +
> +	/* this may be the head_page, buf not starts with this page */
> +	struct page *page;
> +
> +	/* the allcated buf. this may point to the headroom */
> +	char *buf;
> +
> +	/* the size of the buf */
> +	unsigned int buf_size;
> +
> +	/* OUT. the offset of the remaining data in the page */
> +	unsigned int offset;
> +
> +	char *virtnet_hdr;
> +
> +	/* packet data. generally point to eth header */
> +	char *packet;
> +
> +	/* IN. packet len without virtnet hdr
> +	 * OUT. the size of the remaining data
> +	 */
> +	unsigned int len;
> +
> +	unsigned int metasize;
> +};
> +
>   static bool is_xdp_frame(void *ptr)
>   {
>   	return (unsigned long)ptr & VIRTIO_XDP_FLAG;
> @@ -357,6 +386,89 @@ static void skb_xmit_done(struct virtqueue *vq)
>   		netif_wake_subqueue(vi->dev, vq2txq(vq));
>   }
>   
> +static struct sk_buff *virtnet_page_to_skb(struct virtnet_page_info *pinfo)
> +{
> +	int shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> +	struct virtio_net_hdr_mrg_rxbuf *hdr;
> +	struct sk_buff *skb;
> +	int tailroom, copy;
> +
> +	/* In the case of "big", tailroom may be negative, because len can be
> +	 * greater than PAGE_SIZE.
> +	 */
> +	tailroom = pinfo->buf + pinfo->buf_size - (pinfo->packet + pinfo->len);
> +
> +	if (!NET_IP_ALIGN && tailroom >= shinfo_size) {
> +		skb = build_skb(pinfo->buf, pinfo->buf_size);
> +		if (unlikely(!skb))
> +			return NULL;
> +
> +		skb_reserve(skb, pinfo->packet - pinfo->buf);
> +		skb_put(skb, pinfo->len);
> +
> +		/* mark. page has been used. */
> +		pinfo->page = NULL;
> +	} else {
> +		/* copy small data so we can reuse these pages for small data
> +		 *
> +		 * GOOD_COPY_LEN is used to save network headers, such as eth
> +		 * header, ip header, tcp header. If you want to save metadata
> +		 * information, we should apply for a larger space. Prevent the
> +		 * network header cannot fit in the linear space.
> +		 */
> +		skb = napi_alloc_skb(&pinfo->rq->napi,
> +				     pinfo->metasize + GOOD_COPY_LEN);
> +		if (unlikely(!skb))
> +			return NULL;
> +
> +		/* Copy all frame if it fits skb->head, otherwise
> +		 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
> +		 */
> +		if (pinfo->len <= GOOD_COPY_LEN)
> +			copy = pinfo->len;
> +		else
> +			copy = ETH_HLEN;
> +
> +		skb_put_data(skb, pinfo->packet - pinfo->metasize,
> +			     copy + pinfo->metasize);
> +		__skb_pull(skb, pinfo->metasize);
> +		pinfo->len -= copy;
> +		pinfo->offset = pinfo->packet + copy -
> +				(char *)page_address(pinfo->page);
> +	}
> +
> +	if (pinfo->metasize)
> +		skb_metadata_set(skb, pinfo->metasize);
> +
> +	if (pinfo->virtnet_hdr) {
> +		hdr = skb_vnet_hdr(skb);
> +		memcpy(hdr, pinfo->virtnet_hdr, pinfo->vi->hdr_len);
> +	}
> +
> +	return skb;
> +}
> +
> +static struct sk_buff *virtnet_merge_page_to_skb(struct virtnet_page_info *pinfo)
> +{
> +	struct sk_buff *skb;
> +
> +	skb = virtnet_page_to_skb(pinfo);
> +	if (unlikely(!skb))
> +		return NULL;
> +
> +	/* page has been used by build_skb() */
> +	if (!pinfo->page)
> +		return skb;
> +
> +	if (pinfo->len)
> +		skb_add_rx_frag(skb, 0, pinfo->page, pinfo->offset, pinfo->len,
> +				pinfo->buf_size);
> +	else
> +		put_page(pinfo->page);
> +
> +	return skb;
> +}
> +
>   #define MRG_CTX_HEADER_SHIFT 22
>   static void *mergeable_len_to_ctx(unsigned int truesize,
>   				  unsigned int headroom)
> @@ -375,86 +487,30 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
>   }
>   
>   /* Called from bottom half context */
> -static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> -				   struct receive_queue *rq,
> -				   struct page *page, unsigned int offset,
> -				   unsigned int len, unsigned int truesize,
> -				   bool hdr_valid, unsigned int metasize,
> -				   unsigned int headroom)
> +static struct sk_buff *virtnet_big_page_to_skb(struct virtnet_page_info *pinfo)
>   {
> +	unsigned int len, offset, truesize;
> +	struct receive_queue *rq;
>   	struct sk_buff *skb;
> -	struct virtio_net_hdr_mrg_rxbuf *hdr;
> -	unsigned int copy, hdr_len, hdr_padded_len;
> -	struct page *page_to_free = NULL;
> -	int tailroom, shinfo_size;
> -	char *p, *hdr_p, *buf;
> +	struct page *page;
>   
> -	p = page_address(page) + offset;
> -	hdr_p = p;
> +	/* save next page */
> +	page = (struct page *)pinfo->page->private;
>   
> -	hdr_len = vi->hdr_len;
> -	if (vi->mergeable_rx_bufs)
> -		hdr_padded_len = sizeof(*hdr);
> -	else
> -		hdr_padded_len = sizeof(struct padded_vnet_hdr);
> -
> -	/* If headroom is not 0, there is an offset between the beginning of the
> -	 * data and the allocated space, otherwise the data and the allocated
> -	 * space are aligned.
> -	 */
> -	if (headroom) {
> -		/* Buffers with headroom use PAGE_SIZE as alloc size,
> -		 * see add_recvbuf_mergeable() + get_mergeable_buf_len()
> -		 */
> -		truesize = PAGE_SIZE;
> -		tailroom = truesize - len - offset;
> -		buf = page_address(page);
> -	} else {
> -		tailroom = truesize - len;
> -		buf = p;
> -	}
> -
> -	len -= hdr_len;
> -	offset += hdr_padded_len;
> -	p += hdr_padded_len;
> -
> -	shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> -
> -	/* copy small packet so we can reuse these pages */
> -	if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
> -		skb = build_skb(buf, truesize);
> -		if (unlikely(!skb))
> -			return NULL;
> -
> -		skb_reserve(skb, p - buf);
> -		skb_put(skb, len);
> -		goto ok;
> -	}
> -
> -	/* copy small packet so we can reuse these pages for small data */
> -	skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
> +	skb = virtnet_page_to_skb(pinfo);
>   	if (unlikely(!skb))
>   		return NULL;
>   
> -	/* Copy all frame if it fits skb->head, otherwise
> -	 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
> -	 */
> -	if (len <= skb_tailroom(skb))
> -		copy = len;
> -	else
> -		copy = ETH_HLEN + metasize;
> -	skb_put_data(skb, p, copy);
> +	rq = pinfo->rq;
>   
> -	len -= copy;
> -	offset += copy;
> +	/* page has been used by build_skb() */
> +	if (!pinfo->page)
> +		goto end;
>   
> -	if (vi->mergeable_rx_bufs) {
> -		if (len)
> -			skb_add_rx_frag(skb, 0, page, offset, len, truesize);
> -		else
> -			page_to_free = page;
> -		goto ok;
> -	}
> +	page     = pinfo->page;
> +	len      = pinfo->len;
> +	offset   = pinfo->offset;
> +	truesize = pinfo->buf_size;
>   
>   	/*
>   	 * Verify that we can indeed put this data into a skb.
> @@ -477,23 +533,10 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>   		offset = 0;
>   	}
>   
> +end:
>   	if (page)
>   		give_pages(rq, page);
>   
> -ok:
> -	/* hdr_valid means no XDP, so we can copy the vnet header */
> -	if (hdr_valid) {
> -		hdr = skb_vnet_hdr(skb);
> -		memcpy(hdr, hdr_p, hdr_len);
> -	}
> -	if (page_to_free)
> -		put_page(page_to_free);
> -
> -	if (metasize) {
> -		__skb_pull(skb, metasize);
> -		skb_metadata_set(skb, metasize);
> -	}
> -
>   	return skb;
>   }
>   
> @@ -654,17 +697,17 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
>    */
>   static struct page *xdp_linearize_page(struct receive_queue *rq,
>   				       u16 *num_buf,
> -				       struct page *p,
> -				       int offset,
> +				       void *buf,
>   				       int page_off,
>   				       unsigned int *len)
>   {
>   	struct page *page = alloc_page(GFP_ATOMIC);
> +	struct page *p;
>   
>   	if (!page)
>   		return NULL;
>   
> -	memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
> +	memcpy(page_address(page) + page_off, buf, *len);
>   	page_off += *len;
>   
>   	while (--*num_buf) {
> @@ -739,18 +782,18 @@ static struct sk_buff *receive_small(struct net_device *dev,
>   			goto err_xdp;
>   
>   		if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
> -			int offset = buf - page_address(page) + header_offset;
>   			unsigned int tlen = len + vi->hdr_len;
>   			u16 num_buf = 1;
>   
> +			buf += header_offset;
> +
>   			xdp_headroom = virtnet_get_headroom(vi);
>   			header_offset = VIRTNET_RX_PAD + xdp_headroom;
>   			headroom = vi->hdr_len + header_offset;
>   			buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
>   				 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> -			xdp_page = xdp_linearize_page(rq, &num_buf, page,
> -						      offset, header_offset,
> -						      &tlen);
> +			xdp_page = xdp_linearize_page(rq, &num_buf, buf,
> +						      header_offset, &tlen);
>   			if (!xdp_page)
>   				goto err_xdp;
>   
> @@ -842,9 +885,21 @@ static struct sk_buff *receive_big(struct net_device *dev,
>   				   unsigned int len,
>   				   struct virtnet_rq_stats *stats)
>   {
> +	struct virtnet_page_info pinfo;
>   	struct page *page = buf;
> -	struct sk_buff *skb =
> -		page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0, 0);
> +	struct sk_buff *skb;
> +
> +	pinfo.rq          = rq;
> +	pinfo.vi          = vi;
> +	pinfo.page        = page;
> +	pinfo.buf         = page_address(page);
> +	pinfo.buf_size    = PAGE_SIZE;
> +	pinfo.virtnet_hdr = pinfo.buf;
> +	pinfo.packet      = pinfo.virtnet_hdr + sizeof(struct padded_vnet_hdr);
> +	pinfo.len         = len - vi->hdr_len;
> +	pinfo.metasize    = 0;
> +
> +	skb = virtnet_big_page_to_skb(&pinfo);
>   
>   	stats->bytes += len - vi->hdr_len;
>   	if (unlikely(!skb))
> @@ -870,12 +925,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>   	struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
>   	u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
>   	struct page *page = virt_to_head_page(buf);
> -	int offset = buf - page_address(page);
>   	struct sk_buff *head_skb, *curr_skb;
> +	struct virtnet_page_info pinfo;
>   	struct bpf_prog *xdp_prog;
>   	unsigned int truesize = mergeable_ctx_to_truesize(ctx);
>   	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
> -	unsigned int metasize = 0;
>   	unsigned int frame_sz;
>   	int err;
>   
> @@ -887,8 +941,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>   	if (xdp_prog) {
>   		struct xdp_frame *xdpf;
>   		struct page *xdp_page;
> +		void *hard_start;
>   		struct xdp_buff xdp;
> -		void *data;
>   		u32 act;
>   
>   		/* Transient failure which in theory could occur if
> @@ -912,54 +966,47 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>   		if (unlikely(num_buf > 1 ||
>   			     headroom < virtnet_get_headroom(vi))) {
>   			/* linearize data for XDP */
> -			xdp_page = xdp_linearize_page(rq, &num_buf,
> -						      page, offset,
> +			xdp_page = xdp_linearize_page(rq, &num_buf, buf,
>   						      VIRTIO_XDP_HEADROOM,
>   						      &len);
>   			frame_sz = PAGE_SIZE;
>   
>   			if (!xdp_page)
>   				goto err_xdp;
> -			offset = VIRTIO_XDP_HEADROOM;
> +
> +			hard_start = page_address(xdp_page) + vi->hdr_len;
>   		} else {
>   			xdp_page = page;
> +			hard_start = buf + vi->hdr_len - VIRTIO_XDP_HEADROOM;
>   		}
>   
>   		/* Allow consuming headroom but reserve enough space to push
>   		 * the descriptor on if we get an XDP_TX return code.
>   		 */
> -		data = page_address(xdp_page) + offset;
>   		xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq);
> -		xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + vi->hdr_len,
> -				 VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true);
> +		xdp_prepare_buff(&xdp, hard_start, VIRTIO_XDP_HEADROOM,
> +				 len - vi->hdr_len, true);
>   
>   		act = bpf_prog_run_xdp(xdp_prog, &xdp);
>   		stats->xdp_packets++;
>   
>   		switch (act) {
>   		case XDP_PASS:
> -			metasize = xdp.data - xdp.data_meta;
> -
> -			/* recalculate offset to account for any header
> -			 * adjustments and minus the metasize to copy the
> -			 * metadata in page_to_skb(). Note other cases do not
> -			 * build an skb and avoid using offset
> -			 */
> -			offset = xdp.data - page_address(xdp_page) -
> -				 vi->hdr_len - metasize;
> -
> -			/* recalculate len if xdp.data, xdp.data_end or
> -			 * xdp.data_meta were adjusted
> -			 */
> -			len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
> +			pinfo.rq          = rq;
> +			pinfo.vi          = vi;
> +			pinfo.page        = xdp_page;
> +			pinfo.buf         = xdp.data_hard_start - vi->hdr_len;
> +			pinfo.buf_size    = PAGE_SIZE;
> +			pinfo.virtnet_hdr = NULL;
> +			pinfo.packet      = xdp.data;
> +			pinfo.len         = xdp.data_end - xdp.data;
> +			pinfo.metasize    = xdp.data - xdp.data_meta;
>   			/* We can only create skb based on xdp_page. */
>   			if (unlikely(xdp_page != page)) {
>   				rcu_read_unlock();
>   				put_page(page);
> -				head_skb = page_to_skb(vi, rq, xdp_page, offset,
> -						       len, PAGE_SIZE, false,
> -						       metasize, headroom);
> -				return head_skb;
> +
> +				return virtnet_merge_page_to_skb(&pinfo);
>   			}
>   			break;
>   		case XDP_TX:
> @@ -1005,8 +1052,22 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>   				__free_pages(xdp_page, 0);
>   			goto err_xdp;
>   		}
> +		rcu_read_unlock();
> +
> +		/* pinfo has been filled inside XDP_PASS */
> +	} else {
> +		rcu_read_unlock();
> +
> +		pinfo.rq          = rq;
> +		pinfo.vi          = vi;
> +		pinfo.page        = page;
> +		pinfo.buf         = buf - headroom;
> +		pinfo.buf_size    = headroom ? PAGE_SIZE : truesize;
> +		pinfo.virtnet_hdr = buf;
> +		pinfo.packet      = buf + sizeof(*hdr);
> +		pinfo.len         = len - sizeof(*hdr);
> +		pinfo.metasize    = 0;
>   	}
> -	rcu_read_unlock();
>   
>   	if (unlikely(len > truesize)) {
>   		pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
> @@ -1015,14 +1076,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>   		goto err_skb;
>   	}
>   
> -	head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
> -			       metasize, headroom);
> +	head_skb = virtnet_merge_page_to_skb(&pinfo);
>   	curr_skb = head_skb;
>   
>   	if (unlikely(!curr_skb))
>   		goto err_skb;
>   	while (--num_buf) {
>   		int num_skb_frags;
> +		int offset;
>   
>   		buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
>   		if (unlikely(!buf)) {


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-05-31  6:24 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-19  9:00 [PATCH net-next] virtio-net: Refactor the code related to page_to_skb() Xuan Zhuo
2021-05-31  6:24 ` Jason Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).