On Wed, 2021-06-23 at 11:45 +0800, Jason Wang wrote: > > As replied in previous version, it would be better if we can unify > similar logic in tun_get_user(). So that ends up looking something like this (incremental). Note the '/* XXX: frags && */' part in tun_skb_set_protocol(), because the 'frags &&' was there in tun_get_user() and it wasn't obvious to me whether I should be lifting that out as a separate argument to tun_skb_set_protocol() or if there's a better way. Either way, in my judgement this is less suitable for a stable fix and more appropriate for a follow-on cleanup. But I don't feel that strongly; I'm more than happy for you to overrule me on that. Especially if you fix the above XXX part while you're at it :) I tested this with vhost-net and !IFF_NO_PI, and TX works. RX is still hosed on the vhost-net side, for the same reason that a bunch of test cases were already listed in #if 0, but I'll address that in a separate email. It's not part of *this* patch. --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1641,6 +1641,40 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, return NULL; } +static int tun_skb_set_protocol(struct tun_struct *tun, struct sk_buff *skb, + __be16 pi_proto) +{ + switch (tun->flags & TUN_TYPE_MASK) { + case IFF_TUN: + if (tun->flags & IFF_NO_PI) { + u8 ip_version = skb->len ? (skb->data[0] >> 4) : 0; + + switch (ip_version) { + case 4: + pi_proto = htons(ETH_P_IP); + break; + case 6: + pi_proto = htons(ETH_P_IPV6); + break; + default: + return -EINVAL; + } + } + + skb_reset_mac_header(skb); + skb->protocol = pi_proto; + skb->dev = tun->dev; + break; + case IFF_TAP: + if (/* XXX frags && */!pskb_may_pull(skb, ETH_HLEN)) + return -ENOMEM; + + skb->protocol = eth_type_trans(skb, tun->dev); + break; + } + return 0; +} + /* Get packet from user space buffer */ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, void *msg_control, struct iov_iter *from, @@ -1784,37 +1818,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, return -EINVAL; } - switch (tun->flags & TUN_TYPE_MASK) { - case IFF_TUN: - if (tun->flags & IFF_NO_PI) { - u8 ip_version = skb->len ? (skb->data[0] >> 4) : 0; - - switch (ip_version) { - case 4: - pi.proto = htons(ETH_P_IP); - break; - case 6: - pi.proto = htons(ETH_P_IPV6); - break; - default: - atomic_long_inc(&tun->dev->rx_dropped); - kfree_skb(skb); - return -EINVAL; - } - } - - skb_reset_mac_header(skb); - skb->protocol = pi.proto; - skb->dev = tun->dev; - break; - case IFF_TAP: - if (frags && !pskb_may_pull(skb, ETH_HLEN)) { - err = -ENOMEM; - goto drop; - } - skb->protocol = eth_type_trans(skb, tun->dev); - break; - } + err = tun_skb_set_protocol(tun, skb, pi.proto); + if (err) + goto drop; /* copy skb_ubuf_info for callback when skb has no error */ if (zerocopy) { @@ -2334,8 +2340,10 @@ static int tun_xdp_one(struct tun_struct *tun, struct virtio_net_hdr *gso = NULL; struct bpf_prog *xdp_prog; struct sk_buff *skb = NULL; + __be16 proto = 0; u32 rxhash = 0, act; int buflen = hdr->buflen; + int reservelen = xdp->data - xdp->data_hard_start; int err = 0; bool skb_xdp = false; struct page *page; @@ -2343,6 +2351,17 @@ static int tun_xdp_one(struct tun_struct *tun, if (tun->flags & IFF_VNET_HDR) gso = &hdr->gso; + if (!(tun->flags & IFF_NO_PI)) { + struct tun_pi *pi = xdp->data; + if (datasize < sizeof(*pi)) { + atomic_long_inc(&tun->rx_frame_errors); + return -EINVAL; + } + proto = pi->proto; + reservelen += sizeof(*pi); + datasize -= sizeof(*pi); + } + xdp_prog = rcu_dereference(tun->xdp_prog); if (xdp_prog) { if (gso && gso->gso_type) { @@ -2388,8 +2407,8 @@ static int tun_xdp_one(struct tun_struct *tun, goto out; } - skb_reserve(skb, xdp->data - xdp->data_hard_start); - skb_put(skb, xdp->data_end - xdp->data); + skb_reserve(skb, reservelen); + skb_put(skb, datasize); if (gso && virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) { atomic_long_inc(&tun->rx_frame_errors); @@ -2397,48 +2416,12 @@ static int tun_xdp_one(struct tun_struct *tun, err = -EINVAL; goto out; } - switch (tun->flags & TUN_TYPE_MASK) { - case IFF_TUN: - if (tun->flags & IFF_NO_PI) { - u8 ip_version = skb->len ? (skb->data[0] >> 4) : 0; - switch (ip_version) { - case 4: - skb->protocol = htons(ETH_P_IP); - break; - case 6: - skb->protocol = htons(ETH_P_IPV6); - break; - default: - atomic_long_inc(&tun->dev->rx_dropped); - kfree_skb(skb); - err = -EINVAL; - goto out; - } - } else { - struct tun_pi *pi = (struct tun_pi *)skb->data; - if (!pskb_may_pull(skb, sizeof(*pi))) { - atomic_long_inc(&tun->dev->rx_dropped); - kfree_skb(skb); - err = -ENOMEM; - goto out; - } - skb_pull_inline(skb, sizeof(*pi)); - skb->protocol = pi->proto; - } - - skb_reset_mac_header(skb); - skb->dev = tun->dev; - break; - case IFF_TAP: - if (!pskb_may_pull(skb, ETH_HLEN)) { - atomic_long_inc(&tun->dev->rx_dropped); - kfree_skb(skb); - err = -ENOMEM; - goto out; - } - skb->protocol = eth_type_trans(skb, tun->dev); - break; + err = tun_skb_set_protocol(tun, skb, proto); + if (err) { + atomic_long_inc(&tun->dev->rx_dropped); + kfree_skb(skb); + goto out; } skb_reset_network_header(skb);