linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jason Wang <jasowang@redhat.com>
To: mst@redhat.com, jasowang@redhat.com
Cc: kvm@vger.kernel.org, virtualization@lists.linux-foundation.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [RFC PATCH net-next 12/12] vhost_net: batch submitting XDP buffers to underlayer sockets
Date: Mon, 21 May 2018 17:04:33 +0800	[thread overview]
Message-ID: <1526893473-20128-13-git-send-email-jasowang@redhat.com> (raw)
In-Reply-To: <1526893473-20128-1-git-send-email-jasowang@redhat.com>

This patch implements XDP batching for vhost_net with tun. This is
done by batching XDP buffs in vhost and submit them when:

- vhost_net can not build XDP buff (mostly because of the size of packet)
- #batched exceeds the limitation (VHOST_NET_RX_BATCH).
- tun accept a batch of XDP buff through msg_control and process them
  in a batch

With this tun XDP can benefit from e.g batch transmission during
XDP_REDIRECT or XDP_TX.

Tests shows 21% improvement on TX pps (from ~3.2Mpps to ~3.9Mpps)
while transmitting through testpmd from guest to host by
xdp_redirect_map between tap0 and ixgbe.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c   | 36 +++++++++++++++++----------
 drivers/vhost/net.c | 71 ++++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 71 insertions(+), 36 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b586b3f..5d16d18 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1616,7 +1616,6 @@ static u32 tun_do_xdp(struct tun_struct *tun,
 	switch (act) {
 	case XDP_REDIRECT:
 		*err = xdp_do_redirect(tun->dev, xdp, xdp_prog);
-		xdp_do_flush_map();
 		if (*err)
 			break;
 		goto out;
@@ -1624,7 +1623,6 @@ static u32 tun_do_xdp(struct tun_struct *tun,
 		*err = tun_xdp_tx(tun->dev, xdp);
 		if (*err)
 			break;
-		tun_xdp_flush(tun->dev);
 		goto out;
 	case XDP_PASS:
 		goto out;
@@ -2400,9 +2398,6 @@ static int tun_xdp_one(struct tun_struct *tun,
 	int err = 0;
 	bool skb_xdp = false;
 
-	preempt_disable();
-	rcu_read_lock();
-
 	xdp_prog = rcu_dereference(tun->xdp_prog);
 	if (xdp_prog) {
 		if (gso->gso_type) {
@@ -2461,15 +2456,12 @@ static int tun_xdp_one(struct tun_struct *tun,
 		tun_flow_update(tun, rxhash, tfile);
 
 out:
-	rcu_read_unlock();
-	preempt_enable();
-
 	return err;
 }
 
 static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 {
-	int ret;
+	int ret, i;
 	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
 	struct tun_struct *tun = tun_get(tfile);
 	struct tun_msg_ctl *ctl = m->msg_control;
@@ -2477,10 +2469,28 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 	if (!tun)
 		return -EBADFD;
 
-	if (ctl && ctl->type == TUN_MSG_PTR) {
-		ret = tun_xdp_one(tun, tfile, ctl->ptr);
-		if (!ret)
-			ret = total_len;
+	if (ctl && ((ctl->type & 0xF) == TUN_MSG_PTR)) {
+		int n = ctl->type >> 16;
+
+		preempt_disable();
+		rcu_read_lock();
+
+		for (i = 0; i < n; i++) {
+			struct xdp_buff *x = (struct xdp_buff *)ctl->ptr;
+			struct xdp_buff *xdp = &x[i];
+
+			xdp_set_data_meta_invalid(xdp);
+			xdp->rxq = &tfile->xdp_rxq;
+			tun_xdp_one(tun, tfile, xdp);
+		}
+
+		xdp_do_flush_map();
+		tun_xdp_flush(tun->dev);
+
+		rcu_read_unlock();
+		preempt_enable();
+
+		ret = total_len;
 		goto out;
 	}
 
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 0d84de6..bec4109 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -118,6 +118,7 @@ struct vhost_net_virtqueue {
 	struct ptr_ring *rx_ring;
 	struct vhost_net_buf rxq;
 	struct xdp_buff xdp[VHOST_RX_BATCH];
+	struct vring_used_elem heads[VHOST_RX_BATCH];
 };
 
 struct vhost_net {
@@ -511,7 +512,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
 	void *buf;
 	int copied;
 
-	if (len < nvq->sock_hlen)
+	if (unlikely(len < nvq->sock_hlen))
 		return -EFAULT;
 
 	if (SKB_DATA_ALIGN(len + pad) +
@@ -567,11 +568,37 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
 	return 0;
 }
 
+static void vhost_tx_batch(struct vhost_net *net,
+			   struct vhost_net_virtqueue *nvq,
+			   struct socket *sock,
+			   struct msghdr *msghdr, int n)
+{
+	struct tun_msg_ctl ctl = {
+		.type = n << 16 | TUN_MSG_PTR,
+		.ptr = nvq->xdp,
+	};
+	int err;
+
+	if (n == 0)
+		return;
+
+	msghdr->msg_control = &ctl;
+	err = sock->ops->sendmsg(sock, msghdr, 0);
+
+	if (unlikely(err < 0)) {
+		/* FIXME vq_err() */
+		vq_err(&nvq->vq, "sendmsg err!\n");
+		return;
+	}
+	vhost_add_used_and_signal_n(&net->dev, &nvq->vq, nvq->vq.heads, n);
+}
+
+/* Expects to be always run from workqueue - which acts as
+ * read-size critical section for our kind of RCU. */
 static void handle_tx_copy(struct vhost_net *net)
 {
 	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
 	struct vhost_virtqueue *vq = &nvq->vq;
-	struct xdp_buff xdp;
 	unsigned out, in;
 	int head;
 	struct msghdr msg = {
@@ -586,7 +613,6 @@ static void handle_tx_copy(struct vhost_net *net)
 	size_t hdr_size;
 	struct socket *sock;
 	struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
-	struct tun_msg_ctl ctl;
 	int sent_pkts = 0;
 	s16 nheads = 0;
 
@@ -631,22 +657,24 @@ static void handle_tx_copy(struct vhost_net *net)
 		vq->heads[nheads].id = cpu_to_vhost32(vq, head);
 		vq->heads[nheads].len = 0;
 
-		err = vhost_net_build_xdp(nvq, &msg.msg_iter, &xdp);
-		if (!err) {
-			ctl.type = TUN_MSG_PTR;
-			ctl.ptr = &xdp;
-			msg.msg_control = &ctl;
-		} else
-			msg.msg_control = NULL;
-
 		total_len += len;
-		if (total_len < VHOST_NET_WEIGHT &&
-		    vhost_has_more_pkts(net, vq)) {
-			msg.msg_flags |= MSG_MORE;
-		} else {
-			msg.msg_flags &= ~MSG_MORE;
+		err = vhost_net_build_xdp(nvq, &msg.msg_iter,
+					  &nvq->xdp[nheads]);
+		if (!err) {
+			if (++nheads == VHOST_RX_BATCH) {
+				vhost_tx_batch(net, nvq, sock, &msg, nheads);
+				nheads = 0;
+			}
+			goto done;
+		} else if (unlikely(err != -ENOSPC)) {
+			vq_err(vq, "Fail to build XDP buffer\n");
+			break;
 		}
 
+		vhost_tx_batch(net, nvq, sock, &msg, nheads);
+		msg.msg_control = NULL;
+		nheads = 0;
+
 		/* TODO: Check specific error and bomb out unless ENOBUFS? */
 		err = sock->ops->sendmsg(sock, &msg, len);
 		if (unlikely(err < 0)) {
@@ -657,11 +685,9 @@ static void handle_tx_copy(struct vhost_net *net)
 		if (err != len)
 			pr_debug("Truncated TX packet: "
 				 " len %d != %zd\n", err, len);
-		if (++nheads == VHOST_RX_BATCH) {
-			vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
-						    nheads);
-			nheads = 0;
-		}
+
+		vhost_add_used_and_signal(&net->dev, vq, head, 0);
+done:
 		if (vhost_exceeds_weight(++sent_pkts, total_len)) {
 			vhost_poll_queue(&vq->poll);
 			break;
@@ -669,8 +695,7 @@ static void handle_tx_copy(struct vhost_net *net)
 	}
 out:
 	if (nheads)
-		vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
-					    nheads);
+		vhost_tx_batch(net, nvq, sock, &msg, nheads);
 	mutex_unlock(&vq->mutex);
 }
 
-- 
2.7.4

  parent reply	other threads:[~2018-05-21  9:05 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-21  9:04 [RFC PATCH net-next 00/12] XDP batching for TUN/vhost_net Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 01/12] vhost_net: introduce helper to initialize tx iov iter Jason Wang
2018-05-21 16:24   ` Jesse Brandeburg
2018-05-22 12:26     ` Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 02/12] vhost_net: introduce vhost_exceeds_weight() Jason Wang
2018-05-21 16:29   ` Jesse Brandeburg
2018-05-22 12:27     ` Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 03/12] vhost_net: introduce vhost_has_more_pkts() Jason Wang
2018-05-21 16:39   ` Jesse Brandeburg
2018-05-22 12:31     ` Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 04/12] vhost_net: split out datacopy logic Jason Wang
2018-05-21 16:46   ` Jesse Brandeburg
2018-05-22 12:39     ` Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 05/12] vhost_net: batch update used ring for datacopy TX Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 06/12] tuntap: enable premmption early Jason Wang
2018-05-21 14:32   ` Michael S. Tsirkin
2018-05-21  9:04 ` [RFC PATCH net-next 07/12] tuntap: simplify error handling in tun_build_skb() Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 08/12] tuntap: tweak on the path of non-xdp case " Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 09/12] tuntap: split out XDP logic Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 10/12] vhost_net: build xdp buff Jason Wang
2018-05-21 16:56   ` Jesse Brandeburg
2018-05-21 22:21     ` Michael S. Tsirkin
2018-05-22 12:41     ` Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 11/12] vhost_net: passing raw xdp buff to tun Jason Wang
2018-05-21  9:04 ` Jason Wang [this message]
2018-05-21 14:33   ` [RFC PATCH net-next 12/12] vhost_net: batch submitting XDP buffers to underlayer sockets Michael S. Tsirkin
2018-05-25 17:53 ` [RFC PATCH net-next 00/12] XDP batching for TUN/vhost_net Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1526893473-20128-13-git-send-email-jasowang@redhat.com \
    --to=jasowang@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).