All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Wang <jasowang@redhat.com>
To: mst@redhat.com, jasowang@redhat.com
Cc: kvm@vger.kernel.org, virtualization@lists.linux-foundation.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [RFC PATCH net-next 11/12] vhost_net: passing raw xdp buff to tun
Date: Mon, 21 May 2018 17:04:32 +0800	[thread overview]
Message-ID: <1526893473-20128-12-git-send-email-jasowang@redhat.com> (raw)
In-Reply-To: <1526893473-20128-1-git-send-email-jasowang@redhat.com>

This patches implement a TUN specific msg_control:

#define TUN_MSG_UBUF 1
#define TUN_MSG_PTR  2
struct tun_msg_ctl {
       int type;
       void *ptr;
};

The first supported type is ubuf which is already used by vhost_net
zerocopy code. The second is XDP buff, which allows vhost_net to pass
XDP buff to TUN. This could be used to implement accepting an array of
XDP buffs from vhost_net in the following patches.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c      | 91 +++++++++++++++++++++++++++++++++++++++++++++++++-
 drivers/vhost/net.c    | 21 ++++++++++--
 include/linux/if_tun.h |  7 ++++
 3 files changed, 116 insertions(+), 3 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2560378..b586b3f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2387,18 +2387,107 @@ static void tun_sock_write_space(struct sock *sk)
 	kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
 }
 
+static int tun_xdp_one(struct tun_struct *tun,
+		       struct tun_file *tfile,
+		       struct xdp_buff *xdp)
+{
+	struct virtio_net_hdr *gso = xdp->data_hard_start + sizeof(int);
+	struct tun_pcpu_stats *stats;
+	struct bpf_prog *xdp_prog;
+	struct sk_buff *skb = NULL;
+	u32 rxhash = 0, act;
+	int buflen = *(int *)xdp->data_hard_start;
+	int err = 0;
+	bool skb_xdp = false;
+
+	preempt_disable();
+	rcu_read_lock();
+
+	xdp_prog = rcu_dereference(tun->xdp_prog);
+	if (xdp_prog) {
+		if (gso->gso_type) {
+			skb_xdp = true;
+			goto build;
+		}
+		xdp_set_data_meta_invalid(xdp);
+		xdp->rxq = &tfile->xdp_rxq;
+		act = tun_do_xdp(tun, tfile, xdp_prog, xdp, &err);
+		if (err)
+			goto out;
+		if (act != XDP_PASS)
+			goto out;
+	}
+
+build:
+	skb = build_skb(xdp->data_hard_start, buflen);
+	if (!skb) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (skb_xdp) {
+		err = do_xdp_generic(xdp_prog, skb);
+		if (err != XDP_PASS)
+			goto out;
+	}
+
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	skb_put(skb, xdp->data_end - xdp->data);
+
+	if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
+		this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
+		kfree_skb(skb);
+		err = -EINVAL;
+		goto out;
+	}
+
+	skb->protocol = eth_type_trans(skb, tun->dev);
+	skb_reset_network_header(skb);
+	skb_probe_transport_header(skb, 0);
+
+	if (!rcu_dereference(tun->steering_prog))
+		rxhash = __skb_get_hash_symmetric(skb);
+
+	netif_receive_skb(skb);
+
+	stats = get_cpu_ptr(tun->pcpu_stats);
+	u64_stats_update_begin(&stats->syncp);
+	stats->rx_packets++;
+	stats->rx_bytes += skb->len;
+	u64_stats_update_end(&stats->syncp);
+	put_cpu_ptr(stats);
+
+	if (rxhash)
+		tun_flow_update(tun, rxhash, tfile);
+
+out:
+	rcu_read_unlock();
+	preempt_enable();
+
+	return err;
+}
+
 static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 {
 	int ret;
 	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
 	struct tun_struct *tun = tun_get(tfile);
+	struct tun_msg_ctl *ctl = m->msg_control;
 
 	if (!tun)
 		return -EBADFD;
 
-	ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
+	if (ctl && ctl->type == TUN_MSG_PTR) {
+		ret = tun_xdp_one(tun, tfile, ctl->ptr);
+		if (!ret)
+			ret = total_len;
+		goto out;
+	}
+
+	ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter,
 			   m->msg_flags & MSG_DONTWAIT,
 			   m->msg_flags & MSG_MORE);
+out:
 	tun_put(tun);
 	return ret;
 }
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 1209e84..0d84de6 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -117,6 +117,7 @@ struct vhost_net_virtqueue {
 	struct vhost_net_ubuf_ref *ubufs;
 	struct ptr_ring *rx_ring;
 	struct vhost_net_buf rxq;
+	struct xdp_buff xdp[VHOST_RX_BATCH];
 };
 
 struct vhost_net {
@@ -570,6 +571,7 @@ static void handle_tx_copy(struct vhost_net *net)
 {
 	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
 	struct vhost_virtqueue *vq = &nvq->vq;
+	struct xdp_buff xdp;
 	unsigned out, in;
 	int head;
 	struct msghdr msg = {
@@ -584,6 +586,7 @@ static void handle_tx_copy(struct vhost_net *net)
 	size_t hdr_size;
 	struct socket *sock;
 	struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
+	struct tun_msg_ctl ctl;
 	int sent_pkts = 0;
 	s16 nheads = 0;
 
@@ -628,6 +631,14 @@ static void handle_tx_copy(struct vhost_net *net)
 		vq->heads[nheads].id = cpu_to_vhost32(vq, head);
 		vq->heads[nheads].len = 0;
 
+		err = vhost_net_build_xdp(nvq, &msg.msg_iter, &xdp);
+		if (!err) {
+			ctl.type = TUN_MSG_PTR;
+			ctl.ptr = &xdp;
+			msg.msg_control = &ctl;
+		} else
+			msg.msg_control = NULL;
+
 		total_len += len;
 		if (total_len < VHOST_NET_WEIGHT &&
 		    vhost_has_more_pkts(net, vq)) {
@@ -734,16 +745,21 @@ static void handle_tx_zerocopy(struct vhost_net *net)
 		/* use msg_control to pass vhost zerocopy ubuf info to skb */
 		if (zcopy_used) {
 			struct ubuf_info *ubuf;
+			struct tun_msg_ctl ctl;
+
 			ubuf = nvq->ubuf_info + nvq->upend_idx;
 
+			ctl.type = TUN_MSG_UBUF;
+			ctl.ptr = ubuf;
+
 			vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
 			vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
 			ubuf->callback = vhost_zerocopy_callback;
 			ubuf->ctx = nvq->ubufs;
 			ubuf->desc = nvq->upend_idx;
 			refcount_set(&ubuf->refcnt, 1);
-			msg.msg_control = ubuf;
-			msg.msg_controllen = sizeof(ubuf);
+			msg.msg_control = &ctl;
+			msg.msg_controllen = sizeof(ctl);
 			ubufs = nvq->ubufs;
 			atomic_inc(&ubufs->refcount);
 			nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
@@ -751,6 +767,7 @@ static void handle_tx_zerocopy(struct vhost_net *net)
 			msg.msg_control = NULL;
 			ubufs = NULL;
 		}
+
 		total_len += len;
 		if (total_len < VHOST_NET_WEIGHT &&
 		    vhost_has_more_pkts(net, vq)) {
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 3d2996d..ba46dce 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -19,6 +19,13 @@
 
 #define TUN_XDP_FLAG 0x1UL
 
+#define TUN_MSG_UBUF 1
+#define TUN_MSG_PTR  2
+struct tun_msg_ctl {
+	int type;
+	void *ptr;
+};
+
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);
 struct ptr_ring *tun_get_tx_ring(struct file *file);
-- 
2.7.4

  parent reply	other threads:[~2018-05-21  9:05 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-21  9:04 [RFC PATCH net-next 00/12] XDP batching for TUN/vhost_net Jason Wang
2018-05-21  9:04 ` Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 01/12] vhost_net: introduce helper to initialize tx iov iter Jason Wang
2018-05-21 16:24   ` Jesse Brandeburg
2018-05-22 12:26     ` Jason Wang
2018-05-22 12:26     ` Jason Wang
2018-05-21 16:24   ` Jesse Brandeburg
2018-05-21  9:04 ` Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 02/12] vhost_net: introduce vhost_exceeds_weight() Jason Wang
2018-05-21  9:04 ` Jason Wang
2018-05-21 16:29   ` Jesse Brandeburg
2018-05-22 12:27     ` Jason Wang
2018-05-22 12:27       ` Jason Wang
2018-05-21 16:29   ` Jesse Brandeburg
2018-05-21  9:04 ` [RFC PATCH net-next 03/12] vhost_net: introduce vhost_has_more_pkts() Jason Wang
2018-05-21  9:04   ` Jason Wang
2018-05-21 16:39   ` Jesse Brandeburg
2018-05-21 16:39   ` Jesse Brandeburg
2018-05-22 12:31     ` Jason Wang
2018-05-22 12:31     ` Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 04/12] vhost_net: split out datacopy logic Jason Wang
2018-05-21  9:04   ` Jason Wang
2018-05-21 16:46   ` Jesse Brandeburg
2018-05-21 16:46     ` Jesse Brandeburg
2018-05-22 12:39     ` Jason Wang
2018-05-22 12:39     ` Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 05/12] vhost_net: batch update used ring for datacopy TX Jason Wang
2018-05-21  9:04 ` Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 06/12] tuntap: enable premmption early Jason Wang
2018-05-21  9:04 ` Jason Wang
2018-05-21 14:32   ` Michael S. Tsirkin
2018-05-21 14:32   ` Michael S. Tsirkin
2018-05-21  9:04 ` [RFC PATCH net-next 07/12] tuntap: simplify error handling in tun_build_skb() Jason Wang
2018-05-21  9:04 ` Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 08/12] tuntap: tweak on the path of non-xdp case " Jason Wang
2018-05-21  9:04 ` Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 09/12] tuntap: split out XDP logic Jason Wang
2018-05-21  9:04 ` Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 10/12] vhost_net: build xdp buff Jason Wang
2018-05-21 16:56   ` Jesse Brandeburg
2018-05-21 16:56   ` Jesse Brandeburg
2018-05-21 22:21     ` Michael S. Tsirkin
2018-05-21 22:21     ` Michael S. Tsirkin
2018-05-22 12:41     ` Jason Wang
2018-05-22 12:41     ` Jason Wang
2018-05-21  9:04 ` Jason Wang
2018-05-21  9:04 ` [RFC PATCH net-next 11/12] vhost_net: passing raw xdp buff to tun Jason Wang
2018-05-21  9:04 ` Jason Wang [this message]
2018-05-21  9:04 ` [RFC PATCH net-next 12/12] vhost_net: batch submitting XDP buffers to underlayer sockets Jason Wang
2018-05-21  9:04 ` Jason Wang
2018-05-21 14:33   ` Michael S. Tsirkin
2018-05-21 14:33   ` Michael S. Tsirkin
2018-05-25 17:53 ` [RFC PATCH net-next 00/12] XDP batching for TUN/vhost_net Michael S. Tsirkin
2018-05-25 17:53 ` Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1526893473-20128-12-git-send-email-jasowang@redhat.com \
    --to=jasowang@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.