All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2 v2] af-packet:  Use existing netdev reference for bound sockets.
@ 2011-05-26 23:55 greearb
  2011-05-26 23:55 ` [PATCH 2/2 v2] af-packet: Add flag to distinguish VID 0 from no-vlan greearb
  2011-05-27  3:42 ` [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets Eric Dumazet
  0 siblings, 2 replies; 10+ messages in thread
From: greearb @ 2011-05-26 23:55 UTC (permalink / raw)
  To: netdev; +Cc: Ben Greear

From: Ben Greear <greearb@candelatech.com>

This saves a network device lookup on each packet transmitted,
for sockets that are bound to a network device.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---

v2:  Remove un-used ifindex, lookup dev in else branch.

:100644 100644 4005b24... f9b807d... M	net/packet/af_packet.c
 net/packet/af_packet.c |   27 +++++++++++++++------------
 1 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 4005b24..f9b807d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -989,7 +989,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	struct sk_buff *skb;
 	struct net_device *dev;
 	__be16 proto;
-	int ifindex, err, reserve = 0;
+	bool need_rls_dev = false;
+	int err, reserve = 0;
 	void *ph;
 	struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
 	int tp_len, size_max;
@@ -1001,7 +1002,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 
 	err = -EBUSY;
 	if (saddr == NULL) {
-		ifindex	= po->ifindex;
+		dev = po->prot_hook.dev;
 		proto	= po->num;
 		addr	= NULL;
 	} else {
@@ -1012,12 +1013,12 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 					+ offsetof(struct sockaddr_ll,
 						sll_addr)))
 			goto out;
-		ifindex	= saddr->sll_ifindex;
 		proto	= saddr->sll_protocol;
 		addr	= saddr->sll_addr;
+		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
+		need_rls_dev = true;
 	}
 
-	dev = dev_get_by_index(sock_net(&po->sk), ifindex);
 	err = -ENXIO;
 	if (unlikely(dev == NULL))
 		goto out;
@@ -1103,7 +1104,8 @@ out_status:
 	__packet_set_status(po, ph, status);
 	kfree_skb(skb);
 out_put:
-	dev_put(dev);
+	if (need_rls_dev)
+		dev_put(dev);
 out:
 	mutex_unlock(&po->pg_vec_lock);
 	return err;
@@ -1141,8 +1143,9 @@ static int packet_snd(struct socket *sock,
 	struct sk_buff *skb;
 	struct net_device *dev;
 	__be16 proto;
+	bool need_rls_dev = false;
 	unsigned char *addr;
-	int ifindex, err, reserve = 0;
+	int err, reserve = 0;
 	struct virtio_net_hdr vnet_hdr = { 0 };
 	int offset = 0;
 	int vnet_hdr_len;
@@ -1160,7 +1163,7 @@ static int packet_snd(struct socket *sock,
 	 */
 
 	if (saddr == NULL) {
-		ifindex	= po->ifindex;
+		dev = po->prot_hook.dev;
 		proto	= po->num;
 		addr	= NULL;
 	} else {
@@ -1169,13 +1172,12 @@ static int packet_snd(struct socket *sock,
 			goto out;
 		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
 			goto out;
-		ifindex	= saddr->sll_ifindex;
 		proto	= saddr->sll_protocol;
 		addr	= saddr->sll_addr;
+		dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
+		need_rls_dev = true;
 	}
 
-
-	dev = dev_get_by_index(sock_net(sk), ifindex);
 	err = -ENXIO;
 	if (dev == NULL)
 		goto out_unlock;
@@ -1315,14 +1317,15 @@ static int packet_snd(struct socket *sock,
 	if (err > 0 && (err = net_xmit_errno(err)) != 0)
 		goto out_unlock;
 
-	dev_put(dev);
+	if (need_rls_dev)
+		dev_put(dev);
 
 	return len;
 
 out_free:
 	kfree_skb(skb);
 out_unlock:
-	if (dev)
+	if (dev && need_rls_dev)
 		dev_put(dev);
 out:
 	return err;
-- 
1.7.3.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/2 v2] af-packet:  Add flag to distinguish VID 0 from no-vlan.
  2011-05-26 23:55 [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets greearb
@ 2011-05-26 23:55 ` greearb
  2011-05-27  3:46   ` Eric Dumazet
  2011-05-27  3:42 ` [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets Eric Dumazet
  1 sibling, 1 reply; 10+ messages in thread
From: greearb @ 2011-05-26 23:55 UTC (permalink / raw)
  To: netdev; +Cc: Ben Greear

From: Ben Greear <greearb@candelatech.com>

Currently, user-space cannot determine if a 0 tcp_vlan_tci
means there is no VLAN tag or the VLAN ID was zero.

Add flag to make this explicit.  User-space can check for
TP_STATUS_VLAN_VALID || tp_vlan_tci > 0, which will be backwards
compatible. Older could would have just checked for tp_vlan_tci,
so it will work no worse than before.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---
v2:  add logic to tpacket_rcv too.

:100644 100644 72bfa5a... 6d66ce1... M	include/linux/if_packet.h
:100644 100644 f9b807d... 9c2d068... M	net/packet/af_packet.c
 include/linux/if_packet.h |    1 +
 net/packet/af_packet.c    |   14 ++++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index 72bfa5a..6d66ce1 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -70,6 +70,7 @@ struct tpacket_auxdata {
 #define TP_STATUS_COPY		0x2
 #define TP_STATUS_LOSING	0x4
 #define TP_STATUS_CSUMNOTREADY	0x8
+#define TP_STATUS_VLAN_VALID   0x10 /* auxdata has valid tp_vlan_tci */
 
 /* Tx ring - header status */
 #define TP_STATUS_AVAILABLE	0x0
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f9b807d..9c2d068 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -818,7 +818,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 			getnstimeofday(&ts);
 		h.h2->tp_sec = ts.tv_sec;
 		h.h2->tp_nsec = ts.tv_nsec;
-		h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
+		if (vlan_tx_tag_present(skb)) {
+			h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
+			status |= TP_STATUS_VLAN_VALID;
+		}
+		else
+			h.h2->tp_vlan_tci = 0;
 		hdrlen = sizeof(*h.h2);
 		break;
 	default:
@@ -1763,7 +1768,12 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		aux.tp_snaplen = skb->len;
 		aux.tp_mac = 0;
 		aux.tp_net = skb_network_offset(skb);
-		aux.tp_vlan_tci = vlan_tx_tag_get(skb);
+		if (vlan_tx_tag_present(skb)) {
+			aux.tp_vlan_tci = vlan_tx_tag_get(skb);
+			aux.tp_status |= TP_STATUS_VLAN_VALID;
+		}
+		else
+			aux.tp_vlan_tci = 0;
 
 		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
 	}
-- 
1.7.3.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2 v2] af-packet:  Use existing netdev reference for bound sockets.
  2011-05-26 23:55 [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets greearb
  2011-05-26 23:55 ` [PATCH 2/2 v2] af-packet: Add flag to distinguish VID 0 from no-vlan greearb
@ 2011-05-27  3:42 ` Eric Dumazet
  2011-05-27  4:11   ` Ben Greear
  1 sibling, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2011-05-27  3:42 UTC (permalink / raw)
  To: greearb; +Cc: netdev

Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :
> From: Ben Greear <greearb@candelatech.com>
> 
> This saves a network device lookup on each packet transmitted,
> for sockets that are bound to a network device.
> 
> Signed-off-by: Ben Greear <greearb@candelatech.com>
> ---
> 
> v2:  Remove un-used ifindex, lookup dev in else branch.
> 
> :100644 100644 4005b24... f9b807d... M	net/packet/af_packet.c
>  net/packet/af_packet.c |   27 +++++++++++++++------------
>  1 files changed, 15 insertions(+), 12 deletions(-)
> 
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 4005b24..f9b807d 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -989,7 +989,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>  	struct sk_buff *skb;
>  	struct net_device *dev;
>  	__be16 proto;
> -	int ifindex, err, reserve = 0;
> +	bool need_rls_dev = false;
> +	int err, reserve = 0;
>  	void *ph;
>  	struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
>  	int tp_len, size_max;
> @@ -1001,7 +1002,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>  
>  	err = -EBUSY;
>  	if (saddr == NULL) {
> -		ifindex	= po->ifindex;
> +		dev = po->prot_hook.dev;
>  		proto	= po->num;
>  		addr	= NULL;
>  	} else {
> @@ -1012,12 +1013,12 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>  					+ offsetof(struct sockaddr_ll,
>  						sll_addr)))
>  			goto out;
> -		ifindex	= saddr->sll_ifindex;
>  		proto	= saddr->sll_protocol;
>  		addr	= saddr->sll_addr;
> +		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
> +		need_rls_dev = true;
>  	}
>  
> -	dev = dev_get_by_index(sock_net(&po->sk), ifindex);
>  	err = -ENXIO;
>  	if (unlikely(dev == NULL))
>  		goto out;
> @@ -1103,7 +1104,8 @@ out_status:
>  	__packet_set_status(po, ph, status);
>  	kfree_skb(skb);
>  out_put:
> -	dev_put(dev);
> +	if (need_rls_dev)
> +		dev_put(dev);
>  out:
>  	mutex_unlock(&po->pg_vec_lock);
>  	return err;
> @@ -1141,8 +1143,9 @@ static int packet_snd(struct socket *sock,
>  	struct sk_buff *skb;
>  	struct net_device *dev;
>  	__be16 proto;
> +	bool need_rls_dev = false;
>  	unsigned char *addr;
> -	int ifindex, err, reserve = 0;
> +	int err, reserve = 0;
>  	struct virtio_net_hdr vnet_hdr = { 0 };
>  	int offset = 0;
>  	int vnet_hdr_len;
> @@ -1160,7 +1163,7 @@ static int packet_snd(struct socket *sock,
>  	 */
>  
>  	if (saddr == NULL) {
> -		ifindex	= po->ifindex;
> +		dev = po->prot_hook.dev;
>  		proto	= po->num;
>  		addr	= NULL;
>  	} else {
> @@ -1169,13 +1172,12 @@ static int packet_snd(struct socket *sock,
>  			goto out;
>  		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
>  			goto out;
> -		ifindex	= saddr->sll_ifindex;
>  		proto	= saddr->sll_protocol;
>  		addr	= saddr->sll_addr;
> +		dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
> +		need_rls_dev = true;
>  	}
>  
> -
> -	dev = dev_get_by_index(sock_net(sk), ifindex);
>  	err = -ENXIO;
>  	if (dev == NULL)
>  		goto out_unlock;
> @@ -1315,14 +1317,15 @@ static int packet_snd(struct socket *sock,
>  	if (err > 0 && (err = net_xmit_errno(err)) != 0)
>  		goto out_unlock;
>  
> -	dev_put(dev);
> +	if (need_rls_dev)
> +		dev_put(dev);
>  
>  	return len;
>  
>  out_free:
>  	kfree_skb(skb);
>  out_unlock:
> -	if (dev)
> +	if (dev && need_rls_dev)
>  		dev_put(dev);
>  out:
>  	return err;

Hmmm, I wonder why you want this Ben.

IMHO this is buggy, because we can sleep in this function.

We must take a ref on device (its really cheap these days, now we have a
percpu device refcnt)




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2 v2] af-packet:  Add flag to distinguish VID 0 from no-vlan.
  2011-05-26 23:55 ` [PATCH 2/2 v2] af-packet: Add flag to distinguish VID 0 from no-vlan greearb
@ 2011-05-27  3:46   ` Eric Dumazet
  0 siblings, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2011-05-27  3:46 UTC (permalink / raw)
  To: greearb; +Cc: netdev

Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :
> From: Ben Greear <greearb@candelatech.com>
> 
> Currently, user-space cannot determine if a 0 tcp_vlan_tci
> means there is no VLAN tag or the VLAN ID was zero.
> 
> Add flag to make this explicit.  User-space can check for
> TP_STATUS_VLAN_VALID || tp_vlan_tci > 0, which will be backwards
> compatible. Older could would have just checked for tp_vlan_tci,
> so it will work no worse than before.
> 
> Signed-off-by: Ben Greear <greearb@candelatech.com>
> ---
> v2:  add logic to tpacket_rcv too.
> 
> :100644 100644 72bfa5a... 6d66ce1... M	include/linux/if_packet.h
> :100644 100644 f9b807d... 9c2d068... M	net/packet/af_packet.c
>  include/linux/if_packet.h |    1 +
>  net/packet/af_packet.c    |   14 ++++++++++++--
>  2 files changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
> index 72bfa5a..6d66ce1 100644
> --- a/include/linux/if_packet.h
> +++ b/include/linux/if_packet.h
> @@ -70,6 +70,7 @@ struct tpacket_auxdata {
>  #define TP_STATUS_COPY		0x2
>  #define TP_STATUS_LOSING	0x4
>  #define TP_STATUS_CSUMNOTREADY	0x8
> +#define TP_STATUS_VLAN_VALID   0x10 /* auxdata has valid tp_vlan_tci */
>  
>  /* Tx ring - header status */
>  #define TP_STATUS_AVAILABLE	0x0
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index f9b807d..9c2d068 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -818,7 +818,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
>  			getnstimeofday(&ts);
>  		h.h2->tp_sec = ts.tv_sec;
>  		h.h2->tp_nsec = ts.tv_nsec;
> -		h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
> +		if (vlan_tx_tag_present(skb)) {
> +			h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
> +			status |= TP_STATUS_VLAN_VALID;
> +		}
> +		else
> +			h.h2->tp_vlan_tci = 0;

	if (...) {
		...
	} else {
		...
	}

>  		hdrlen = sizeof(*h.h2);
>  		break;
>  	default:
> @@ -1763,7 +1768,12 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
>  		aux.tp_snaplen = skb->len;
>  		aux.tp_mac = 0;
>  		aux.tp_net = skb_network_offset(skb);
> -		aux.tp_vlan_tci = vlan_tx_tag_get(skb);
> +		if (vlan_tx_tag_present(skb)) {
> +			aux.tp_vlan_tci = vlan_tx_tag_get(skb);
> +			aux.tp_status |= TP_STATUS_VLAN_VALID;
> +		}
> +		else
> +			aux.tp_vlan_tci = 0;

Same codingstyle comment here.

Reviewed-by: Eric Dumazet <eric.dumazet@gmail.com>

Thanks



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2 v2] af-packet:  Use existing netdev reference for bound sockets.
  2011-05-27  3:42 ` [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets Eric Dumazet
@ 2011-05-27  4:11   ` Ben Greear
  2011-05-27 20:08     ` Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: Ben Greear @ 2011-05-27  4:11 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev

On 05/26/2011 08:42 PM, Eric Dumazet wrote:
> Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :

>>   out_free:
>>   	kfree_skb(skb);
>>   out_unlock:
>> -	if (dev)
>> +	if (dev&&  need_rls_dev)
>>   		dev_put(dev);
>>   out:
>>   	return err;
>
> Hmmm, I wonder why you want this Ben.
>
> IMHO this is buggy, because we can sleep in this function.
>
> We must take a ref on device (its really cheap these days, now we have a
> percpu device refcnt)

Why must you take the reference?  And if we must, why isn't the
current code that assigns the prot_hook.dev without taking a
reference OK?

It seems a waste to do the lookup and free if we don't have to,
and with thousands of devices, the lookup might take a reasonable
amount of effort?

Thanks,
Ben


-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2 v2] af-packet:  Use existing netdev reference for bound sockets.
  2011-05-27  4:11   ` Ben Greear
@ 2011-05-27 20:08     ` Eric Dumazet
  2011-05-27 20:15       ` David Miller
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2011-05-27 20:08 UTC (permalink / raw)
  To: Ben Greear; +Cc: netdev

Le jeudi 26 mai 2011 à 21:11 -0700, Ben Greear a écrit :
> On 05/26/2011 08:42 PM, Eric Dumazet wrote:
> > Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :
> 
> >>   out_free:
> >>   	kfree_skb(skb);
> >>   out_unlock:
> >> -	if (dev)
> >> +	if (dev&&  need_rls_dev)
> >>   		dev_put(dev);
> >>   out:
> >>   	return err;
> >
> > Hmmm, I wonder why you want this Ben.
> >
> > IMHO this is buggy, because we can sleep in this function.
> >
> > We must take a ref on device (its really cheap these days, now we have a
> > percpu device refcnt)
> 
> Why must you take the reference?  And if we must, why isn't the
> current code that assigns the prot_hook.dev without taking a
> reference OK?
> 

If we sleep, device can disappear under us.

The only way to not take a reference is to hold rcu_read_lock(), but
you're not allowed to sleep under rcu_read_lock().


> It seems a waste to do the lookup and free if we don't have to,
> and with thousands of devices, the lookup might take a reasonable
> amount of effort?

I understand you want to avoid the lookup, this part is fine for me, but
you need to take a reference on the device before eventual sleep.

Nowadays its a single "inc" instruction on x86, without even a lock
prefix (this on a percpu integer)




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets.
  2011-05-27 20:08     ` Eric Dumazet
@ 2011-05-27 20:15       ` David Miller
  2011-05-27 20:18         ` Ben Greear
  0 siblings, 1 reply; 10+ messages in thread
From: David Miller @ 2011-05-27 20:15 UTC (permalink / raw)
  To: eric.dumazet; +Cc: greearb, netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 27 May 2011 22:08:41 +0200

> Le jeudi 26 mai 2011 à 21:11 -0700, Ben Greear a écrit :
>> On 05/26/2011 08:42 PM, Eric Dumazet wrote:
>> > Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :
>> 
>> >>   out_free:
>> >>   	kfree_skb(skb);
>> >>   out_unlock:
>> >> -	if (dev)
>> >> +	if (dev&&  need_rls_dev)
>> >>   		dev_put(dev);
>> >>   out:
>> >>   	return err;
>> >
>> > Hmmm, I wonder why you want this Ben.
>> >
>> > IMHO this is buggy, because we can sleep in this function.
>> >
>> > We must take a ref on device (its really cheap these days, now we have a
>> > percpu device refcnt)
>> 
>> Why must you take the reference?  And if we must, why isn't the
>> current code that assigns the prot_hook.dev without taking a
>> reference OK?
>> 
> 
> If we sleep, device can disappear under us.
> 
> The only way to not take a reference is to hold rcu_read_lock(), but
> you're not allowed to sleep under rcu_read_lock().

You still have not addresses Ben's point.

Why is it ok for the po->prot_hook.dev handling to not take a
reference?  It's been doing this forever.  Ben is just borrowing this
behavior for his uses.

After some more research I think it happens to be OK because
->prot_hook.dev is used _only_ for pointer comparisons, it is never
actually dereferenced or used in any other way.  Probably, we should
just use ->ifindex for this.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets.
  2011-05-27 20:15       ` David Miller
@ 2011-05-27 20:18         ` Ben Greear
  2011-05-28  6:20           ` Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: Ben Greear @ 2011-05-27 20:18 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, netdev

On 05/27/2011 01:15 PM, David Miller wrote:
> From: Eric Dumazet<eric.dumazet@gmail.com>
> Date: Fri, 27 May 2011 22:08:41 +0200
>
>> Le jeudi 26 mai 2011 à 21:11 -0700, Ben Greear a écrit :
>>> On 05/26/2011 08:42 PM, Eric Dumazet wrote:
>>>> Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :
>>>
>>>>>    out_free:
>>>>>    	kfree_skb(skb);
>>>>>    out_unlock:
>>>>> -	if (dev)
>>>>> +	if (dev&&   need_rls_dev)
>>>>>    		dev_put(dev);
>>>>>    out:
>>>>>    	return err;
>>>>
>>>> Hmmm, I wonder why you want this Ben.
>>>>
>>>> IMHO this is buggy, because we can sleep in this function.
>>>>
>>>> We must take a ref on device (its really cheap these days, now we have a
>>>> percpu device refcnt)
>>>
>>> Why must you take the reference?  And if we must, why isn't the
>>> current code that assigns the prot_hook.dev without taking a
>>> reference OK?
>>>
>>
>> If we sleep, device can disappear under us.
>>
>> The only way to not take a reference is to hold rcu_read_lock(), but
>> you're not allowed to sleep under rcu_read_lock().
>
> You still have not addresses Ben's point.
>
> Why is it ok for the po->prot_hook.dev handling to not take a
> reference?  It's been doing this forever.  Ben is just borrowing this
> behavior for his uses.
>
> After some more research I think it happens to be OK because
> ->prot_hook.dev is used _only_ for pointer comparisons, it is never
> actually dereferenced or used in any other way.  Probably, we should
> just use ->ifindex for this.

It's easy enough to add a dev_hold() when I assign the skb instead
of looking it up in my patch, but perhaps it would be cleaner over all to
just hold a ref on the prot_hook.dev when it is originally assigned?

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets.
  2011-05-27 20:18         ` Ben Greear
@ 2011-05-28  6:20           ` Eric Dumazet
  2011-05-28 17:01             ` Ben Greear
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2011-05-28  6:20 UTC (permalink / raw)
  To: Ben Greear; +Cc: David Miller, netdev

Le vendredi 27 mai 2011 à 13:18 -0700, Ben Greear a écrit :
> On 05/27/2011 01:15 PM, David Miller wrote:
> > From: Eric Dumazet<eric.dumazet@gmail.com>
> > Date: Fri, 27 May 2011 22:08:41 +0200
> >
> >> Le jeudi 26 mai 2011 à 21:11 -0700, Ben Greear a écrit :
> >>> On 05/26/2011 08:42 PM, Eric Dumazet wrote:
> >>>> Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :
> >>>
> >>>>>    out_free:
> >>>>>    	kfree_skb(skb);
> >>>>>    out_unlock:
> >>>>> -	if (dev)
> >>>>> +	if (dev&&   need_rls_dev)
> >>>>>    		dev_put(dev);
> >>>>>    out:
> >>>>>    	return err;
> >>>>
> >>>> Hmmm, I wonder why you want this Ben.
> >>>>
> >>>> IMHO this is buggy, because we can sleep in this function.
> >>>>
> >>>> We must take a ref on device (its really cheap these days, now we have a
> >>>> percpu device refcnt)
> >>>
> >>> Why must you take the reference?  And if we must, why isn't the
> >>> current code that assigns the prot_hook.dev without taking a
> >>> reference OK?
> >>>
> >>
> >> If we sleep, device can disappear under us.
> >>
> >> The only way to not take a reference is to hold rcu_read_lock(), but
> >> you're not allowed to sleep under rcu_read_lock().
> >
> > You still have not addresses Ben's point.
> >
> > Why is it ok for the po->prot_hook.dev handling to not take a
> > reference?  It's been doing this forever.  Ben is just borrowing this
> > behavior for his uses.
> >
> > After some more research I think it happens to be OK because
> > ->prot_hook.dev is used _only_ for pointer comparisons, it is never
> > actually dereferenced or used in any other way.  Probably, we should
> > just use ->ifindex for this.
> 
> It's easy enough to add a dev_hold() when I assign the skb instead
> of looking it up in my patch, but perhaps it would be cleaner over all to
> just hold a ref on the prot_hook.dev when it is originally assigned?


Problem is : if packet_notifier(NETDEV_DOWN|UNREGISTER) is run while we
sleep, what happens then ?

Normally, if we sleep a long time in tpacket_snd() after device ref
increment, and before dev_queue_xmit(), the unregister process can enter
the infamous msleep(250) loop in netdev_wait_allrefs(), but at least we
dont crash.

But if you dont take the reference, we can crash in dev_queue_xmit()
when dereferencing the freed netdev structure.

Please check commit 1a35ca80c1db7 (packet: dont call sleeping functions
while holding rcu_read_lock()) for reference on possible problems.

Thanks !



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets.
  2011-05-28  6:20           ` Eric Dumazet
@ 2011-05-28 17:01             ` Ben Greear
  0 siblings, 0 replies; 10+ messages in thread
From: Ben Greear @ 2011-05-28 17:01 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev

On 05/27/2011 11:20 PM, Eric Dumazet wrote:
> Le vendredi 27 mai 2011 à 13:18 -0700, Ben Greear a écrit :
>> On 05/27/2011 01:15 PM, David Miller wrote:
>>> From: Eric Dumazet<eric.dumazet@gmail.com>
>>> Date: Fri, 27 May 2011 22:08:41 +0200
>>>
>>>> Le jeudi 26 mai 2011 à 21:11 -0700, Ben Greear a écrit :
>>>>> On 05/26/2011 08:42 PM, Eric Dumazet wrote:
>>>>>> Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :
>>>>>
>>>>>>>     out_free:
>>>>>>>     	kfree_skb(skb);
>>>>>>>     out_unlock:
>>>>>>> -	if (dev)
>>>>>>> +	if (dev&&    need_rls_dev)
>>>>>>>     		dev_put(dev);
>>>>>>>     out:
>>>>>>>     	return err;
>>>>>>
>>>>>> Hmmm, I wonder why you want this Ben.
>>>>>>
>>>>>> IMHO this is buggy, because we can sleep in this function.
>>>>>>
>>>>>> We must take a ref on device (its really cheap these days, now we have a
>>>>>> percpu device refcnt)
>>>>>
>>>>> Why must you take the reference?  And if we must, why isn't the
>>>>> current code that assigns the prot_hook.dev without taking a
>>>>> reference OK?
>>>>>
>>>>
>>>> If we sleep, device can disappear under us.
>>>>
>>>> The only way to not take a reference is to hold rcu_read_lock(), but
>>>> you're not allowed to sleep under rcu_read_lock().
>>>
>>> You still have not addresses Ben's point.
>>>
>>> Why is it ok for the po->prot_hook.dev handling to not take a
>>> reference?  It's been doing this forever.  Ben is just borrowing this
>>> behavior for his uses.
>>>
>>> After some more research I think it happens to be OK because
>>> ->prot_hook.dev is used _only_ for pointer comparisons, it is never
>>> actually dereferenced or used in any other way.  Probably, we should
>>> just use ->ifindex for this.
>>
>> It's easy enough to add a dev_hold() when I assign the skb instead
>> of looking it up in my patch, but perhaps it would be cleaner over all to
>> just hold a ref on the prot_hook.dev when it is originally assigned?
>
>
> Problem is : if packet_notifier(NETDEV_DOWN|UNREGISTER) is run while we
> sleep, what happens then ?
>
> Normally, if we sleep a long time in tpacket_snd() after device ref
> increment, and before dev_queue_xmit(), the unregister process can enter
> the infamous msleep(250) loop in netdev_wait_allrefs(), but at least we
> dont crash.
>
> But if you dont take the reference, we can crash in dev_queue_xmit()
> when dereferencing the freed netdev structure.
>
> Please check commit 1a35ca80c1db7 (packet: dont call sleeping functions
> while holding rcu_read_lock()) for reference on possible problems.

I'll create a new patch to hold ref on the prot_hook.dev when it's assigned,
and then layer the 'existing netdev reference' patch on top of that.  Might
be a day or two...

Thanks,
Ben

>
> Thanks !
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2011-05-28 17:01 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-26 23:55 [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets greearb
2011-05-26 23:55 ` [PATCH 2/2 v2] af-packet: Add flag to distinguish VID 0 from no-vlan greearb
2011-05-27  3:46   ` Eric Dumazet
2011-05-27  3:42 ` [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets Eric Dumazet
2011-05-27  4:11   ` Ben Greear
2011-05-27 20:08     ` Eric Dumazet
2011-05-27 20:15       ` David Miller
2011-05-27 20:18         ` Ben Greear
2011-05-28  6:20           ` Eric Dumazet
2011-05-28 17:01             ` Ben Greear

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.