From: Jason Wang <jasowang@redhat.com>
To: mst@redhat.com, jasowang@redhat.com
Cc: kvm@vger.kernel.org, virtualization@lists.linux-foundation.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [RFC PATCH net-next 11/12] vhost_net: passing raw xdp buff to tun
Date: Mon, 21 May 2018 17:04:32 +0800 [thread overview]
Message-ID: <1526893473-20128-12-git-send-email-jasowang@redhat.com> (raw)
In-Reply-To: <1526893473-20128-1-git-send-email-jasowang@redhat.com>
This patches implement a TUN specific msg_control:
#define TUN_MSG_UBUF 1
#define TUN_MSG_PTR 2
struct tun_msg_ctl {
int type;
void *ptr;
};
The first supported type is ubuf which is already used by vhost_net
zerocopy code. The second is XDP buff, which allows vhost_net to pass
XDP buff to TUN. This could be used to implement accepting an array of
XDP buffs from vhost_net in the following patches.
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/net/tun.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++-
drivers/vhost/net.c | 21 ++++++++++--
include/linux/if_tun.h | 7 ++++
3 files changed, 116 insertions(+), 3 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2560378..b586b3f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2387,18 +2387,107 @@ static void tun_sock_write_space(struct sock *sk)
kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
}
+static int tun_xdp_one(struct tun_struct *tun,
+ struct tun_file *tfile,
+ struct xdp_buff *xdp)
+{
+ struct virtio_net_hdr *gso = xdp->data_hard_start + sizeof(int);
+ struct tun_pcpu_stats *stats;
+ struct bpf_prog *xdp_prog;
+ struct sk_buff *skb = NULL;
+ u32 rxhash = 0, act;
+ int buflen = *(int *)xdp->data_hard_start;
+ int err = 0;
+ bool skb_xdp = false;
+
+ preempt_disable();
+ rcu_read_lock();
+
+ xdp_prog = rcu_dereference(tun->xdp_prog);
+ if (xdp_prog) {
+ if (gso->gso_type) {
+ skb_xdp = true;
+ goto build;
+ }
+ xdp_set_data_meta_invalid(xdp);
+ xdp->rxq = &tfile->xdp_rxq;
+ act = tun_do_xdp(tun, tfile, xdp_prog, xdp, &err);
+ if (err)
+ goto out;
+ if (act != XDP_PASS)
+ goto out;
+ }
+
+build:
+ skb = build_skb(xdp->data_hard_start, buflen);
+ if (!skb) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (skb_xdp) {
+ err = do_xdp_generic(xdp_prog, skb);
+ if (err != XDP_PASS)
+ goto out;
+ }
+
+ skb_reserve(skb, xdp->data - xdp->data_hard_start);
+ skb_put(skb, xdp->data_end - xdp->data);
+
+ if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
+ this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
+ kfree_skb(skb);
+ err = -EINVAL;
+ goto out;
+ }
+
+ skb->protocol = eth_type_trans(skb, tun->dev);
+ skb_reset_network_header(skb);
+ skb_probe_transport_header(skb, 0);
+
+ if (!rcu_dereference(tun->steering_prog))
+ rxhash = __skb_get_hash_symmetric(skb);
+
+ netif_receive_skb(skb);
+
+ stats = get_cpu_ptr(tun->pcpu_stats);
+ u64_stats_update_begin(&stats->syncp);
+ stats->rx_packets++;
+ stats->rx_bytes += skb->len;
+ u64_stats_update_end(&stats->syncp);
+ put_cpu_ptr(stats);
+
+ if (rxhash)
+ tun_flow_update(tun, rxhash, tfile);
+
+out:
+ rcu_read_unlock();
+ preempt_enable();
+
+ return err;
+}
+
static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
{
int ret;
struct tun_file *tfile = container_of(sock, struct tun_file, socket);
struct tun_struct *tun = tun_get(tfile);
+ struct tun_msg_ctl *ctl = m->msg_control;
if (!tun)
return -EBADFD;
- ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
+ if (ctl && ctl->type == TUN_MSG_PTR) {
+ ret = tun_xdp_one(tun, tfile, ctl->ptr);
+ if (!ret)
+ ret = total_len;
+ goto out;
+ }
+
+ ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter,
m->msg_flags & MSG_DONTWAIT,
m->msg_flags & MSG_MORE);
+out:
tun_put(tun);
return ret;
}
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 1209e84..0d84de6 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -117,6 +117,7 @@ struct vhost_net_virtqueue {
struct vhost_net_ubuf_ref *ubufs;
struct ptr_ring *rx_ring;
struct vhost_net_buf rxq;
+ struct xdp_buff xdp[VHOST_RX_BATCH];
};
struct vhost_net {
@@ -570,6 +571,7 @@ static void handle_tx_copy(struct vhost_net *net)
{
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
struct vhost_virtqueue *vq = &nvq->vq;
+ struct xdp_buff xdp;
unsigned out, in;
int head;
struct msghdr msg = {
@@ -584,6 +586,7 @@ static void handle_tx_copy(struct vhost_net *net)
size_t hdr_size;
struct socket *sock;
struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
+ struct tun_msg_ctl ctl;
int sent_pkts = 0;
s16 nheads = 0;
@@ -628,6 +631,14 @@ static void handle_tx_copy(struct vhost_net *net)
vq->heads[nheads].id = cpu_to_vhost32(vq, head);
vq->heads[nheads].len = 0;
+ err = vhost_net_build_xdp(nvq, &msg.msg_iter, &xdp);
+ if (!err) {
+ ctl.type = TUN_MSG_PTR;
+ ctl.ptr = &xdp;
+ msg.msg_control = &ctl;
+ } else
+ msg.msg_control = NULL;
+
total_len += len;
if (total_len < VHOST_NET_WEIGHT &&
vhost_has_more_pkts(net, vq)) {
@@ -734,16 +745,21 @@ static void handle_tx_zerocopy(struct vhost_net *net)
/* use msg_control to pass vhost zerocopy ubuf info to skb */
if (zcopy_used) {
struct ubuf_info *ubuf;
+ struct tun_msg_ctl ctl;
+
ubuf = nvq->ubuf_info + nvq->upend_idx;
+ ctl.type = TUN_MSG_UBUF;
+ ctl.ptr = ubuf;
+
vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
ubuf->callback = vhost_zerocopy_callback;
ubuf->ctx = nvq->ubufs;
ubuf->desc = nvq->upend_idx;
refcount_set(&ubuf->refcnt, 1);
- msg.msg_control = ubuf;
- msg.msg_controllen = sizeof(ubuf);
+ msg.msg_control = &ctl;
+ msg.msg_controllen = sizeof(ctl);
ubufs = nvq->ubufs;
atomic_inc(&ubufs->refcount);
nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
@@ -751,6 +767,7 @@ static void handle_tx_zerocopy(struct vhost_net *net)
msg.msg_control = NULL;
ubufs = NULL;
}
+
total_len += len;
if (total_len < VHOST_NET_WEIGHT &&
vhost_has_more_pkts(net, vq)) {
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 3d2996d..ba46dce 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -19,6 +19,13 @@
#define TUN_XDP_FLAG 0x1UL
+#define TUN_MSG_UBUF 1
+#define TUN_MSG_PTR 2
+struct tun_msg_ctl {
+ int type;
+ void *ptr;
+};
+
#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
struct socket *tun_get_socket(struct file *);
struct ptr_ring *tun_get_tx_ring(struct file *file);
--
2.7.4
next prev parent reply other threads:[~2018-05-21 9:05 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-05-21 9:04 [RFC PATCH net-next 00/12] XDP batching for TUN/vhost_net Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 01/12] vhost_net: introduce helper to initialize tx iov iter Jason Wang
2018-05-21 16:24 ` Jesse Brandeburg
2018-05-22 12:26 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 02/12] vhost_net: introduce vhost_exceeds_weight() Jason Wang
2018-05-21 16:29 ` Jesse Brandeburg
2018-05-22 12:27 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 03/12] vhost_net: introduce vhost_has_more_pkts() Jason Wang
2018-05-21 16:39 ` Jesse Brandeburg
2018-05-22 12:31 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 04/12] vhost_net: split out datacopy logic Jason Wang
2018-05-21 16:46 ` Jesse Brandeburg
2018-05-22 12:39 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 05/12] vhost_net: batch update used ring for datacopy TX Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 06/12] tuntap: enable premmption early Jason Wang
2018-05-21 14:32 ` Michael S. Tsirkin
2018-05-21 9:04 ` [RFC PATCH net-next 07/12] tuntap: simplify error handling in tun_build_skb() Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 08/12] tuntap: tweak on the path of non-xdp case " Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 09/12] tuntap: split out XDP logic Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 10/12] vhost_net: build xdp buff Jason Wang
2018-05-21 16:56 ` Jesse Brandeburg
2018-05-21 22:21 ` Michael S. Tsirkin
2018-05-22 12:41 ` Jason Wang
2018-05-21 9:04 ` Jason Wang [this message]
2018-05-21 9:04 ` [RFC PATCH net-next 12/12] vhost_net: batch submitting XDP buffers to underlayer sockets Jason Wang
2018-05-21 14:33 ` Michael S. Tsirkin
2018-05-25 17:53 ` [RFC PATCH net-next 00/12] XDP batching for TUN/vhost_net Michael S. Tsirkin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1526893473-20128-12-git-send-email-jasowang@redhat.com \
--to=jasowang@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=virtualization@lists.linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).