All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] net/packet: support vhost mrg_rxbuf
@ 2018-10-27 12:04 Jianfeng Tan
  2018-10-29  2:54 ` Jason Wang
  0 siblings, 1 reply; 3+ messages in thread
From: Jianfeng Tan @ 2018-10-27 12:04 UTC (permalink / raw)
  To: netdev; +Cc: davem, jasowang, mst

Previouly, virtio net header size is hardcoded to be 10, which makes
the feature mrg_rxbuf not available.

We redefine PACKET_VNET_HDR ioctl which treats user input as boolean,
but now as int, 0, 10, 12, or everything else be treated as 10.

There will be one case which is treated differently: if user input is
12, previously, the header size will be 10; but now it's 12.

Signed-off-by: Jianfeng Tan <jianfeng.tan@linux.alibaba.com>
---
 net/packet/af_packet.c | 97 ++++++++++++++++++++++++++----------------
 net/packet/diag.c      |  2 +-
 net/packet/internal.h  |  2 +-
 3 files changed, 63 insertions(+), 38 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ec3095f13aae..1bd7f4cdcc80 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1999,18 +1999,24 @@ static unsigned int run_filter(struct sk_buff *skb,
 }
 
 static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
-			   size_t *len)
+			   size_t *len, int vnet_hdr_len)
 {
+	int res;
 	struct virtio_net_hdr vnet_hdr;
 
-	if (*len < sizeof(vnet_hdr))
+	if (*len < vnet_hdr_len)
 		return -EINVAL;
-	*len -= sizeof(vnet_hdr);
+	*len -= vnet_hdr_len;
 
 	if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
 		return -EINVAL;
 
-	return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
+	res = memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
+	if (res == 0)
+		iov_iter_advance(&msg->msg_iter,
+				 vnet_hdr_len - sizeof(vnet_hdr));
+
+	return res;
 }
 
 /*
@@ -2206,11 +2212,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 				  po->tp_reserve;
 	} else {
 		unsigned int maclen = skb_network_offset(skb);
+		int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
+
 		netoff = TPACKET_ALIGN(po->tp_hdrlen +
 				       (maclen < 16 ? 16 : maclen)) +
 				       po->tp_reserve;
-		if (po->has_vnet_hdr) {
-			netoff += sizeof(struct virtio_net_hdr);
+		if (vnet_hdr_sz) {
+			netoff += vnet_hdr_sz;
 			do_vnet = true;
 		}
 		macoff = netoff - maclen;
@@ -2429,19 +2437,6 @@ static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
 	return 0;
 }
 
-static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
-				 struct virtio_net_hdr *vnet_hdr)
-{
-	if (*len < sizeof(*vnet_hdr))
-		return -EINVAL;
-	*len -= sizeof(*vnet_hdr);
-
-	if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
-		return -EFAULT;
-
-	return __packet_snd_vnet_parse(vnet_hdr, *len);
-}
-
 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		void *frame, struct net_device *dev, void *data, int tp_len,
 		__be16 proto, unsigned char *addr, int hlen, int copylen,
@@ -2609,6 +2604,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	int len_sum = 0;
 	int status = TP_STATUS_AVAILABLE;
 	int hlen, tlen, copylen = 0;
+	int vnet_hdr_sz;
 
 	mutex_lock(&po->pg_vec_lock);
 
@@ -2648,7 +2644,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	size_max = po->tx_ring.frame_size
 		- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
 
-	if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
+	vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
+	if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz)
 		size_max = dev->mtu + reserve + VLAN_HLEN;
 
 	do {
@@ -2668,10 +2665,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 		status = TP_STATUS_SEND_REQUEST;
 		hlen = LL_RESERVED_SPACE(dev);
 		tlen = dev->needed_tailroom;
-		if (po->has_vnet_hdr) {
+		if (vnet_hdr_sz) {
 			vnet_hdr = data;
-			data += sizeof(*vnet_hdr);
-			tp_len -= sizeof(*vnet_hdr);
+			data += vnet_hdr_sz;
+			tp_len -= vnet_hdr_sz;
 			if (tp_len < 0 ||
 			    __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
 				tp_len = -EINVAL;
@@ -2696,7 +2693,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 					  addr, hlen, copylen, &sockc);
 		if (likely(tp_len >= 0) &&
 		    tp_len > dev->mtu + reserve &&
-		    !po->has_vnet_hdr &&
+		    !vnet_hdr_sz &&
 		    !packet_extra_vlan_len_allowed(dev, skb))
 			tp_len = -EMSGSIZE;
 
@@ -2715,7 +2712,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 			}
 		}
 
-		if (po->has_vnet_hdr) {
+		if (vnet_hdr_sz) {
 			if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
 				tp_len = -EINVAL;
 				goto tpacket_error;
@@ -2802,9 +2799,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	int err, reserve = 0;
 	struct sockcm_cookie sockc;
 	struct virtio_net_hdr vnet_hdr = { 0 };
+	int vnet_hdr_sz;
 	int offset = 0;
 	struct packet_sock *po = pkt_sk(sk);
-	bool has_vnet_hdr = false;
 	int hlen, tlen, linear;
 	int extra_len = 0;
 
@@ -2844,11 +2841,29 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 
 	if (sock->type == SOCK_RAW)
 		reserve = dev->hard_header_len;
-	if (po->has_vnet_hdr) {
-		err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
-		if (err)
+
+	vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
+	if (vnet_hdr_sz) {
+		if (len < vnet_hdr_sz) {
+			err = -EINVAL;
 			goto out_unlock;
-		has_vnet_hdr = true;
+		}
+		len -= vnet_hdr_sz;
+
+		if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr),
+					 &msg->msg_iter)) {
+			err = -EFAULT;
+			goto out_unlock;
+		}
+
+		if (__packet_snd_vnet_parse(&vnet_hdr, len)) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* TODO: check hdr_len with len? */
+
+		iov_iter_advance(&msg->msg_iter, vnet_hdr_sz - sizeof(vnet_hdr));
 	}
 
 	if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
@@ -2912,7 +2927,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	skb->mark = sockc.mark;
 	skb->tstamp = sockc.transmit_time;
 
-	if (has_vnet_hdr) {
+	if (vnet_hdr_sz) {
 		err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
 		if (err)
 			goto out_free;
@@ -3307,11 +3322,11 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	if (pkt_sk(sk)->pressure)
 		packet_rcv_has_room(pkt_sk(sk), NULL);
 
-	if (pkt_sk(sk)->has_vnet_hdr) {
-		err = packet_rcv_vnet(msg, skb, &len);
+	vnet_hdr_len = READ_ONCE(pkt_sk(sk)->vnet_hdr_sz);
+	if (vnet_hdr_len) {
+		err = packet_rcv_vnet(msg, skb, &len, vnet_hdr_len);
 		if (err)
 			goto out_free;
-		vnet_hdr_len = sizeof(struct virtio_net_hdr);
 	}
 
 	/* You lose any data beyond the buffer you gave. If it worries
@@ -3772,7 +3787,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
 			ret = -EBUSY;
 		} else {
-			po->has_vnet_hdr = !!val;
+			/* Previouly we treat user input as boolean (!!val),
+			 * now we treat it as int. After the below correction, 
+			 * the only violation case is 12, which results in
+			 * vnet header size of 12 instead of 10. 
+			 */
+			if (val &&
+			    val != sizeof(struct virtio_net_hdr) &&
+			    val != sizeof(struct virtio_net_hdr_mrg_rxbuf))
+				val = sizeof(struct virtio_net_hdr);
+
+			po->vnet_hdr_sz = val;
 			ret = 0;
 		}
 		release_sock(sk);
@@ -3903,7 +3928,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 		val = po->origdev;
 		break;
 	case PACKET_VNET_HDR:
-		val = po->has_vnet_hdr;
+		val = po->vnet_hdr_sz;
 		break;
 	case PACKET_VERSION:
 		val = po->tp_version;
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 7ef1c881ae74..950015b6704f 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -26,7 +26,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
 		pinfo.pdi_flags |= PDI_AUXDATA;
 	if (po->origdev)
 		pinfo.pdi_flags |= PDI_ORIGDEV;
-	if (po->has_vnet_hdr)
+	if (po->vnet_hdr_sz)
 		pinfo.pdi_flags |= PDI_VNETHDR;
 	if (po->tp_loss)
 		pinfo.pdi_flags |= PDI_LOSS;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 3bb7c5fb3bff..11bc75950f28 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -115,9 +115,9 @@ struct packet_sock {
 	unsigned int		running;	/* bind_lock must be held */
 	unsigned int		auxdata:1,	/* writer must hold sock lock */
 				origdev:1,
-				has_vnet_hdr:1,
 				tp_loss:1,
 				tp_tx_has_off:1;
+	int			vnet_hdr_sz;
 	int			pressure;
 	int			ifindex;	/* bound device		*/
 	__be16			num;
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] net/packet: support vhost mrg_rxbuf
  2018-10-27 12:04 [PATCH] net/packet: support vhost mrg_rxbuf Jianfeng Tan
@ 2018-10-29  2:54 ` Jason Wang
  2018-10-29  4:19   ` Jianfeng Tan
  0 siblings, 1 reply; 3+ messages in thread
From: Jason Wang @ 2018-10-29  2:54 UTC (permalink / raw)
  To: Jianfeng Tan, netdev; +Cc: davem, mst


On 2018/10/27 下午8:04, Jianfeng Tan wrote:
> Previouly, virtio net header size is hardcoded to be 10, which makes
> the feature mrg_rxbuf not available.
>
> We redefine PACKET_VNET_HDR ioctl which treats user input as boolean,
> but now as int, 0, 10, 12, or everything else be treated as 10.
>
> There will be one case which is treated differently: if user input is
> 12, previously, the header size will be 10; but now it's 12.
>
> Signed-off-by: Jianfeng Tan <jianfeng.tan@linux.alibaba.com>


This should go for net-next which is closed. You may consider to 
re-submit when it was open.


> ---
>   net/packet/af_packet.c | 97 ++++++++++++++++++++++++++----------------
>   net/packet/diag.c      |  2 +-
>   net/packet/internal.h  |  2 +-
>   3 files changed, 63 insertions(+), 38 deletions(-)
>
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index ec3095f13aae..1bd7f4cdcc80 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -1999,18 +1999,24 @@ static unsigned int run_filter(struct sk_buff *skb,
>   }
>   
>   static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
> -			   size_t *len)
> +			   size_t *len, int vnet_hdr_len)
>   {
> +	int res;
>   	struct virtio_net_hdr vnet_hdr;
>   
> -	if (*len < sizeof(vnet_hdr))
> +	if (*len < vnet_hdr_len)
>   		return -EINVAL;
> -	*len -= sizeof(vnet_hdr);
> +	*len -= vnet_hdr_len;
>   
>   	if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
>   		return -EINVAL;
>   
> -	return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
> +	res = memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
> +	if (res == 0)
> +		iov_iter_advance(&msg->msg_iter,
> +				 vnet_hdr_len - sizeof(vnet_hdr));
> +
> +	return res;
>   }
>   
>   /*
> @@ -2206,11 +2212,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
>   				  po->tp_reserve;
>   	} else {
>   		unsigned int maclen = skb_network_offset(skb);
> +		int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
> +
>   		netoff = TPACKET_ALIGN(po->tp_hdrlen +
>   				       (maclen < 16 ? 16 : maclen)) +
>   				       po->tp_reserve;
> -		if (po->has_vnet_hdr) {
> -			netoff += sizeof(struct virtio_net_hdr);
> +		if (vnet_hdr_sz) {
> +			netoff += vnet_hdr_sz;
>   			do_vnet = true;
>   		}
>   		macoff = netoff - maclen;
> @@ -2429,19 +2437,6 @@ static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
>   	return 0;
>   }
>   
> -static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
> -				 struct virtio_net_hdr *vnet_hdr)
> -{
> -	if (*len < sizeof(*vnet_hdr))
> -		return -EINVAL;
> -	*len -= sizeof(*vnet_hdr);
> -
> -	if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
> -		return -EFAULT;
> -
> -	return __packet_snd_vnet_parse(vnet_hdr, *len);
> -}
> -
>   static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
>   		void *frame, struct net_device *dev, void *data, int tp_len,
>   		__be16 proto, unsigned char *addr, int hlen, int copylen,
> @@ -2609,6 +2604,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>   	int len_sum = 0;
>   	int status = TP_STATUS_AVAILABLE;
>   	int hlen, tlen, copylen = 0;
> +	int vnet_hdr_sz;
>   
>   	mutex_lock(&po->pg_vec_lock);
>   
> @@ -2648,7 +2644,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>   	size_max = po->tx_ring.frame_size
>   		- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
>   
> -	if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
> +	vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
> +	if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz)
>   		size_max = dev->mtu + reserve + VLAN_HLEN;
>   
>   	do {
> @@ -2668,10 +2665,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>   		status = TP_STATUS_SEND_REQUEST;
>   		hlen = LL_RESERVED_SPACE(dev);
>   		tlen = dev->needed_tailroom;
> -		if (po->has_vnet_hdr) {
> +		if (vnet_hdr_sz) {
>   			vnet_hdr = data;
> -			data += sizeof(*vnet_hdr);
> -			tp_len -= sizeof(*vnet_hdr);
> +			data += vnet_hdr_sz;
> +			tp_len -= vnet_hdr_sz;
>   			if (tp_len < 0 ||
>   			    __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
>   				tp_len = -EINVAL;
> @@ -2696,7 +2693,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>   					  addr, hlen, copylen, &sockc);
>   		if (likely(tp_len >= 0) &&
>   		    tp_len > dev->mtu + reserve &&
> -		    !po->has_vnet_hdr &&
> +		    !vnet_hdr_sz &&
>   		    !packet_extra_vlan_len_allowed(dev, skb))
>   			tp_len = -EMSGSIZE;
>   
> @@ -2715,7 +2712,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>   			}
>   		}
>   
> -		if (po->has_vnet_hdr) {
> +		if (vnet_hdr_sz) {
>   			if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
>   				tp_len = -EINVAL;
>   				goto tpacket_error;
> @@ -2802,9 +2799,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
>   	int err, reserve = 0;
>   	struct sockcm_cookie sockc;
>   	struct virtio_net_hdr vnet_hdr = { 0 };
> +	int vnet_hdr_sz;
>   	int offset = 0;
>   	struct packet_sock *po = pkt_sk(sk);
> -	bool has_vnet_hdr = false;
>   	int hlen, tlen, linear;
>   	int extra_len = 0;
>   
> @@ -2844,11 +2841,29 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
>   
>   	if (sock->type == SOCK_RAW)
>   		reserve = dev->hard_header_len;
> -	if (po->has_vnet_hdr) {
> -		err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
> -		if (err)
> +
> +	vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
> +	if (vnet_hdr_sz) {
> +		if (len < vnet_hdr_sz) {
> +			err = -EINVAL;
>   			goto out_unlock;
> -		has_vnet_hdr = true;
> +		}
> +		len -= vnet_hdr_sz;
> +
> +		if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr),
> +					 &msg->msg_iter)) {
> +			err = -EFAULT;
> +			goto out_unlock;
> +		}
> +
> +		if (__packet_snd_vnet_parse(&vnet_hdr, len)) {
> +			err = -EINVAL;
> +			goto out_unlock;
> +		}


Any reason to open code packet_snd_vnet_parse() here?


> +
> +		/* TODO: check hdr_len with len? */
> +
> +		iov_iter_advance(&msg->msg_iter, vnet_hdr_sz - sizeof(vnet_hdr));
>   	}
>   
>   	if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
> @@ -2912,7 +2927,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
>   	skb->mark = sockc.mark;
>   	skb->tstamp = sockc.transmit_time;
>   
> -	if (has_vnet_hdr) {
> +	if (vnet_hdr_sz) {
>   		err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
>   		if (err)
>   			goto out_free;
> @@ -3307,11 +3322,11 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
>   	if (pkt_sk(sk)->pressure)
>   		packet_rcv_has_room(pkt_sk(sk), NULL);
>   
> -	if (pkt_sk(sk)->has_vnet_hdr) {
> -		err = packet_rcv_vnet(msg, skb, &len);
> +	vnet_hdr_len = READ_ONCE(pkt_sk(sk)->vnet_hdr_sz);
> +	if (vnet_hdr_len) {
> +		err = packet_rcv_vnet(msg, skb, &len, vnet_hdr_len);
>   		if (err)
>   			goto out_free;
> -		vnet_hdr_len = sizeof(struct virtio_net_hdr);
>   	}
>   
>   	/* You lose any data beyond the buffer you gave. If it worries
> @@ -3772,7 +3787,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
>   		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
>   			ret = -EBUSY;
>   		} else {
> -			po->has_vnet_hdr = !!val;
> +			/* Previouly we treat user input as boolean (!!val),
> +			 * now we treat it as int. After the below correction,
> +			 * the only violation case is 12, which results in
> +			 * vnet header size of 12 instead of 10.
> +			 */
> +			if (val &&
> +			    val != sizeof(struct virtio_net_hdr) &&
> +			    val != sizeof(struct virtio_net_hdr_mrg_rxbuf))
> +				val = sizeof(struct virtio_net_hdr);
> +
> +			po->vnet_hdr_sz = val;
>   			ret = 0;
>   		}
>   		release_sock(sk);
> @@ -3903,7 +3928,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
>   		val = po->origdev;
>   		break;
>   	case PACKET_VNET_HDR:
> -		val = po->has_vnet_hdr;
> +		val = po->vnet_hdr_sz;


So the change here is noticeable by userspace. Maybe we need a new opt 
for this?

Thanks


>   		break;
>   	case PACKET_VERSION:
>   		val = po->tp_version;
> diff --git a/net/packet/diag.c b/net/packet/diag.c
> index 7ef1c881ae74..950015b6704f 100644
> --- a/net/packet/diag.c
> +++ b/net/packet/diag.c
> @@ -26,7 +26,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
>   		pinfo.pdi_flags |= PDI_AUXDATA;
>   	if (po->origdev)
>   		pinfo.pdi_flags |= PDI_ORIGDEV;
> -	if (po->has_vnet_hdr)
> +	if (po->vnet_hdr_sz)
>   		pinfo.pdi_flags |= PDI_VNETHDR;
>   	if (po->tp_loss)
>   		pinfo.pdi_flags |= PDI_LOSS;
> diff --git a/net/packet/internal.h b/net/packet/internal.h
> index 3bb7c5fb3bff..11bc75950f28 100644
> --- a/net/packet/internal.h
> +++ b/net/packet/internal.h
> @@ -115,9 +115,9 @@ struct packet_sock {
>   	unsigned int		running;	/* bind_lock must be held */
>   	unsigned int		auxdata:1,	/* writer must hold sock lock */
>   				origdev:1,
> -				has_vnet_hdr:1,
>   				tp_loss:1,
>   				tp_tx_has_off:1;
> +	int			vnet_hdr_sz;
>   	int			pressure;
>   	int			ifindex;	/* bound device		*/
>   	__be16			num;

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] net/packet: support vhost mrg_rxbuf
  2018-10-29  2:54 ` Jason Wang
@ 2018-10-29  4:19   ` Jianfeng Tan
  0 siblings, 0 replies; 3+ messages in thread
From: Jianfeng Tan @ 2018-10-29  4:19 UTC (permalink / raw)
  To: Jason Wang, netdev; +Cc: davem, mst


On 10/29/2018 10:54 AM, Jason Wang wrote:
>
> On 2018/10/27 下午8:04, Jianfeng Tan wrote:
>> Previouly, virtio net header size is hardcoded to be 10, which makes
>> the feature mrg_rxbuf not available.
>>
>> We redefine PACKET_VNET_HDR ioctl which treats user input as boolean,
>> but now as int, 0, 10, 12, or everything else be treated as 10.
>>
>> There will be one case which is treated differently: if user input is
>> 12, previously, the header size will be 10; but now it's 12.
>>
>> Signed-off-by: Jianfeng Tan <jianfeng.tan@linux.alibaba.com>
>
>
> This should go for net-next which is closed. You may consider to 
> re-submit when it was open.

Thank you for the reminder. We'll re-evaluate the necessity of this patch.

>
>
>> ---
>>   net/packet/af_packet.c | 97 ++++++++++++++++++++++++++----------------
>>   net/packet/diag.c      |  2 +-
>>   net/packet/internal.h  |  2 +-
>>   3 files changed, 63 insertions(+), 38 deletions(-)
>>
>> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
>> index ec3095f13aae..1bd7f4cdcc80 100644
>> --- a/net/packet/af_packet.c
>> +++ b/net/packet/af_packet.c
>> @@ -1999,18 +1999,24 @@ static unsigned int run_filter(struct sk_buff 
>> *skb,
>>   }
>>     static int packet_rcv_vnet(struct msghdr *msg, const struct 
>> sk_buff *skb,
>> -               size_t *len)
>> +               size_t *len, int vnet_hdr_len)
>>   {
>> +    int res;
>>       struct virtio_net_hdr vnet_hdr;
>>   -    if (*len < sizeof(vnet_hdr))
>> +    if (*len < vnet_hdr_len)
>>           return -EINVAL;
>> -    *len -= sizeof(vnet_hdr);
>> +    *len -= vnet_hdr_len;
>>         if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
>>           return -EINVAL;
>>   -    return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
>> +    res = memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
>> +    if (res == 0)
>> +        iov_iter_advance(&msg->msg_iter,
>> +                 vnet_hdr_len - sizeof(vnet_hdr));
>> +
>> +    return res;
>>   }
>>     /*
>> @@ -2206,11 +2212,13 @@ static int tpacket_rcv(struct sk_buff *skb, 
>> struct net_device *dev,
>>                     po->tp_reserve;
>>       } else {
>>           unsigned int maclen = skb_network_offset(skb);
>> +        int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
>> +
>>           netoff = TPACKET_ALIGN(po->tp_hdrlen +
>>                          (maclen < 16 ? 16 : maclen)) +
>>                          po->tp_reserve;
>> -        if (po->has_vnet_hdr) {
>> -            netoff += sizeof(struct virtio_net_hdr);
>> +        if (vnet_hdr_sz) {
>> +            netoff += vnet_hdr_sz;
>>               do_vnet = true;
>>           }
>>           macoff = netoff - maclen;
>> @@ -2429,19 +2437,6 @@ static int __packet_snd_vnet_parse(struct 
>> virtio_net_hdr *vnet_hdr, size_t len)
>>       return 0;
>>   }
>>   -static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
>> -                 struct virtio_net_hdr *vnet_hdr)
>> -{
>> -    if (*len < sizeof(*vnet_hdr))
>> -        return -EINVAL;
>> -    *len -= sizeof(*vnet_hdr);
>> -
>> -    if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), 
>> &msg->msg_iter))
>> -        return -EFAULT;
>> -
>> -    return __packet_snd_vnet_parse(vnet_hdr, *len);
>> -}
>> -
>>   static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff 
>> *skb,
>>           void *frame, struct net_device *dev, void *data, int tp_len,
>>           __be16 proto, unsigned char *addr, int hlen, int copylen,
>> @@ -2609,6 +2604,7 @@ static int tpacket_snd(struct packet_sock *po, 
>> struct msghdr *msg)
>>       int len_sum = 0;
>>       int status = TP_STATUS_AVAILABLE;
>>       int hlen, tlen, copylen = 0;
>> +    int vnet_hdr_sz;
>>         mutex_lock(&po->pg_vec_lock);
>>   @@ -2648,7 +2644,8 @@ static int tpacket_snd(struct packet_sock 
>> *po, struct msghdr *msg)
>>       size_max = po->tx_ring.frame_size
>>           - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
>>   -    if ((size_max > dev->mtu + reserve + VLAN_HLEN) && 
>> !po->has_vnet_hdr)
>> +    vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
>> +    if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz)
>>           size_max = dev->mtu + reserve + VLAN_HLEN;
>>         do {
>> @@ -2668,10 +2665,10 @@ static int tpacket_snd(struct packet_sock 
>> *po, struct msghdr *msg)
>>           status = TP_STATUS_SEND_REQUEST;
>>           hlen = LL_RESERVED_SPACE(dev);
>>           tlen = dev->needed_tailroom;
>> -        if (po->has_vnet_hdr) {
>> +        if (vnet_hdr_sz) {
>>               vnet_hdr = data;
>> -            data += sizeof(*vnet_hdr);
>> -            tp_len -= sizeof(*vnet_hdr);
>> +            data += vnet_hdr_sz;
>> +            tp_len -= vnet_hdr_sz;
>>               if (tp_len < 0 ||
>>                   __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
>>                   tp_len = -EINVAL;
>> @@ -2696,7 +2693,7 @@ static int tpacket_snd(struct packet_sock *po, 
>> struct msghdr *msg)
>>                         addr, hlen, copylen, &sockc);
>>           if (likely(tp_len >= 0) &&
>>               tp_len > dev->mtu + reserve &&
>> -            !po->has_vnet_hdr &&
>> +            !vnet_hdr_sz &&
>>               !packet_extra_vlan_len_allowed(dev, skb))
>>               tp_len = -EMSGSIZE;
>>   @@ -2715,7 +2712,7 @@ static int tpacket_snd(struct packet_sock 
>> *po, struct msghdr *msg)
>>               }
>>           }
>>   -        if (po->has_vnet_hdr) {
>> +        if (vnet_hdr_sz) {
>>               if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
>>                   tp_len = -EINVAL;
>>                   goto tpacket_error;
>> @@ -2802,9 +2799,9 @@ static int packet_snd(struct socket *sock, 
>> struct msghdr *msg, size_t len)
>>       int err, reserve = 0;
>>       struct sockcm_cookie sockc;
>>       struct virtio_net_hdr vnet_hdr = { 0 };
>> +    int vnet_hdr_sz;
>>       int offset = 0;
>>       struct packet_sock *po = pkt_sk(sk);
>> -    bool has_vnet_hdr = false;
>>       int hlen, tlen, linear;
>>       int extra_len = 0;
>>   @@ -2844,11 +2841,29 @@ static int packet_snd(struct socket *sock, 
>> struct msghdr *msg, size_t len)
>>         if (sock->type == SOCK_RAW)
>>           reserve = dev->hard_header_len;
>> -    if (po->has_vnet_hdr) {
>> -        err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
>> -        if (err)
>> +
>> +    vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
>> +    if (vnet_hdr_sz) {
>> +        if (len < vnet_hdr_sz) {
>> +            err = -EINVAL;
>>               goto out_unlock;
>> -        has_vnet_hdr = true;
>> +        }
>> +        len -= vnet_hdr_sz;
>> +
>> +        if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr),
>> +                     &msg->msg_iter)) {
>> +            err = -EFAULT;
>> +            goto out_unlock;
>> +        }
>> +
>> +        if (__packet_snd_vnet_parse(&vnet_hdr, len)) {
>> +            err = -EINVAL;
>> +            goto out_unlock;
>> +        }
>
>
> Any reason to open code packet_snd_vnet_parse() here?

No particular reason. Will try to add an parameter, and keep the vnet 
related code inside that function if there will be resubmit.

>
>
>> +
>> +        /* TODO: check hdr_len with len? */
>> +
>> +        iov_iter_advance(&msg->msg_iter, vnet_hdr_sz - 
>> sizeof(vnet_hdr));
>>       }
>>         if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
>> @@ -2912,7 +2927,7 @@ static int packet_snd(struct socket *sock, 
>> struct msghdr *msg, size_t len)
>>       skb->mark = sockc.mark;
>>       skb->tstamp = sockc.transmit_time;
>>   -    if (has_vnet_hdr) {
>> +    if (vnet_hdr_sz) {
>>           err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
>>           if (err)
>>               goto out_free;
>> @@ -3307,11 +3322,11 @@ static int packet_recvmsg(struct socket 
>> *sock, struct msghdr *msg, size_t len,
>>       if (pkt_sk(sk)->pressure)
>>           packet_rcv_has_room(pkt_sk(sk), NULL);
>>   -    if (pkt_sk(sk)->has_vnet_hdr) {
>> -        err = packet_rcv_vnet(msg, skb, &len);
>> +    vnet_hdr_len = READ_ONCE(pkt_sk(sk)->vnet_hdr_sz);
>> +    if (vnet_hdr_len) {
>> +        err = packet_rcv_vnet(msg, skb, &len, vnet_hdr_len);
>>           if (err)
>>               goto out_free;
>> -        vnet_hdr_len = sizeof(struct virtio_net_hdr);
>>       }
>>         /* You lose any data beyond the buffer you gave. If it worries
>> @@ -3772,7 +3787,17 @@ packet_setsockopt(struct socket *sock, int 
>> level, int optname, char __user *optv
>>           if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
>>               ret = -EBUSY;
>>           } else {
>> -            po->has_vnet_hdr = !!val;
>> +            /* Previouly we treat user input as boolean (!!val),
>> +             * now we treat it as int. After the below correction,
>> +             * the only violation case is 12, which results in
>> +             * vnet header size of 12 instead of 10.
>> +             */
>> +            if (val &&
>> +                val != sizeof(struct virtio_net_hdr) &&
>> +                val != sizeof(struct virtio_net_hdr_mrg_rxbuf))
>> +                val = sizeof(struct virtio_net_hdr);
>> +
>> +            po->vnet_hdr_sz = val;
>>               ret = 0;
>>           }
>>           release_sock(sk);
>> @@ -3903,7 +3928,7 @@ static int packet_getsockopt(struct socket 
>> *sock, int level, int optname,
>>           val = po->origdev;
>>           break;
>>       case PACKET_VNET_HDR:
>> -        val = po->has_vnet_hdr;
>> +        val = po->vnet_hdr_sz;
>
>
> So the change here is noticeable by userspace. Maybe we need a new opt 
> for this?

Nice catch, users may assume that only 0 or 1 is returned.

Thanks,
Jianfeng

>
> Thanks
>
>
>>           break;
>>       case PACKET_VERSION:
>>           val = po->tp_version;
>> diff --git a/net/packet/diag.c b/net/packet/diag.c
>> index 7ef1c881ae74..950015b6704f 100644
>> --- a/net/packet/diag.c
>> +++ b/net/packet/diag.c
>> @@ -26,7 +26,7 @@ static int pdiag_put_info(const struct packet_sock 
>> *po, struct sk_buff *nlskb)
>>           pinfo.pdi_flags |= PDI_AUXDATA;
>>       if (po->origdev)
>>           pinfo.pdi_flags |= PDI_ORIGDEV;
>> -    if (po->has_vnet_hdr)
>> +    if (po->vnet_hdr_sz)
>>           pinfo.pdi_flags |= PDI_VNETHDR;
>>       if (po->tp_loss)
>>           pinfo.pdi_flags |= PDI_LOSS;
>> diff --git a/net/packet/internal.h b/net/packet/internal.h
>> index 3bb7c5fb3bff..11bc75950f28 100644
>> --- a/net/packet/internal.h
>> +++ b/net/packet/internal.h
>> @@ -115,9 +115,9 @@ struct packet_sock {
>>       unsigned int        running;    /* bind_lock must be held */
>>       unsigned int        auxdata:1,    /* writer must hold sock lock */
>>                   origdev:1,
>> -                has_vnet_hdr:1,
>>                   tp_loss:1,
>>                   tp_tx_has_off:1;
>> +    int            vnet_hdr_sz;
>>       int            pressure;
>>       int            ifindex;    /* bound device        */
>>       __be16            num;

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2018-10-29 13:06 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-27 12:04 [PATCH] net/packet: support vhost mrg_rxbuf Jianfeng Tan
2018-10-29  2:54 ` Jason Wang
2018-10-29  4:19   ` Jianfeng Tan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.