* [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets.
@ 2011-05-26 23:55 greearb
2011-05-26 23:55 ` [PATCH 2/2 v2] af-packet: Add flag to distinguish VID 0 from no-vlan greearb
2011-05-27 3:42 ` [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets Eric Dumazet
0 siblings, 2 replies; 10+ messages in thread
From: greearb @ 2011-05-26 23:55 UTC (permalink / raw)
To: netdev; +Cc: Ben Greear
From: Ben Greear <greearb@candelatech.com>
This saves a network device lookup on each packet transmitted,
for sockets that are bound to a network device.
Signed-off-by: Ben Greear <greearb@candelatech.com>
---
v2: Remove un-used ifindex, lookup dev in else branch.
:100644 100644 4005b24... f9b807d... M net/packet/af_packet.c
net/packet/af_packet.c | 27 +++++++++++++++------------
1 files changed, 15 insertions(+), 12 deletions(-)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 4005b24..f9b807d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -989,7 +989,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
struct sk_buff *skb;
struct net_device *dev;
__be16 proto;
- int ifindex, err, reserve = 0;
+ bool need_rls_dev = false;
+ int err, reserve = 0;
void *ph;
struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
int tp_len, size_max;
@@ -1001,7 +1002,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
err = -EBUSY;
if (saddr == NULL) {
- ifindex = po->ifindex;
+ dev = po->prot_hook.dev;
proto = po->num;
addr = NULL;
} else {
@@ -1012,12 +1013,12 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
+ offsetof(struct sockaddr_ll,
sll_addr)))
goto out;
- ifindex = saddr->sll_ifindex;
proto = saddr->sll_protocol;
addr = saddr->sll_addr;
+ dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
+ need_rls_dev = true;
}
- dev = dev_get_by_index(sock_net(&po->sk), ifindex);
err = -ENXIO;
if (unlikely(dev == NULL))
goto out;
@@ -1103,7 +1104,8 @@ out_status:
__packet_set_status(po, ph, status);
kfree_skb(skb);
out_put:
- dev_put(dev);
+ if (need_rls_dev)
+ dev_put(dev);
out:
mutex_unlock(&po->pg_vec_lock);
return err;
@@ -1141,8 +1143,9 @@ static int packet_snd(struct socket *sock,
struct sk_buff *skb;
struct net_device *dev;
__be16 proto;
+ bool need_rls_dev = false;
unsigned char *addr;
- int ifindex, err, reserve = 0;
+ int err, reserve = 0;
struct virtio_net_hdr vnet_hdr = { 0 };
int offset = 0;
int vnet_hdr_len;
@@ -1160,7 +1163,7 @@ static int packet_snd(struct socket *sock,
*/
if (saddr == NULL) {
- ifindex = po->ifindex;
+ dev = po->prot_hook.dev;
proto = po->num;
addr = NULL;
} else {
@@ -1169,13 +1172,12 @@ static int packet_snd(struct socket *sock,
goto out;
if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
goto out;
- ifindex = saddr->sll_ifindex;
proto = saddr->sll_protocol;
addr = saddr->sll_addr;
+ dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
+ need_rls_dev = true;
}
-
- dev = dev_get_by_index(sock_net(sk), ifindex);
err = -ENXIO;
if (dev == NULL)
goto out_unlock;
@@ -1315,14 +1317,15 @@ static int packet_snd(struct socket *sock,
if (err > 0 && (err = net_xmit_errno(err)) != 0)
goto out_unlock;
- dev_put(dev);
+ if (need_rls_dev)
+ dev_put(dev);
return len;
out_free:
kfree_skb(skb);
out_unlock:
- if (dev)
+ if (dev && need_rls_dev)
dev_put(dev);
out:
return err;
--
1.7.3.4
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 2/2 v2] af-packet: Add flag to distinguish VID 0 from no-vlan.
2011-05-26 23:55 [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets greearb
@ 2011-05-26 23:55 ` greearb
2011-05-27 3:46 ` Eric Dumazet
2011-05-27 3:42 ` [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets Eric Dumazet
1 sibling, 1 reply; 10+ messages in thread
From: greearb @ 2011-05-26 23:55 UTC (permalink / raw)
To: netdev; +Cc: Ben Greear
From: Ben Greear <greearb@candelatech.com>
Currently, user-space cannot determine if a 0 tcp_vlan_tci
means there is no VLAN tag or the VLAN ID was zero.
Add flag to make this explicit. User-space can check for
TP_STATUS_VLAN_VALID || tp_vlan_tci > 0, which will be backwards
compatible. Older could would have just checked for tp_vlan_tci,
so it will work no worse than before.
Signed-off-by: Ben Greear <greearb@candelatech.com>
---
v2: add logic to tpacket_rcv too.
:100644 100644 72bfa5a... 6d66ce1... M include/linux/if_packet.h
:100644 100644 f9b807d... 9c2d068... M net/packet/af_packet.c
include/linux/if_packet.h | 1 +
net/packet/af_packet.c | 14 ++++++++++++--
2 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index 72bfa5a..6d66ce1 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -70,6 +70,7 @@ struct tpacket_auxdata {
#define TP_STATUS_COPY 0x2
#define TP_STATUS_LOSING 0x4
#define TP_STATUS_CSUMNOTREADY 0x8
+#define TP_STATUS_VLAN_VALID 0x10 /* auxdata has valid tp_vlan_tci */
/* Tx ring - header status */
#define TP_STATUS_AVAILABLE 0x0
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f9b807d..9c2d068 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -818,7 +818,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
getnstimeofday(&ts);
h.h2->tp_sec = ts.tv_sec;
h.h2->tp_nsec = ts.tv_nsec;
- h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
+ if (vlan_tx_tag_present(skb)) {
+ h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
+ status |= TP_STATUS_VLAN_VALID;
+ }
+ else
+ h.h2->tp_vlan_tci = 0;
hdrlen = sizeof(*h.h2);
break;
default:
@@ -1763,7 +1768,12 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
aux.tp_snaplen = skb->len;
aux.tp_mac = 0;
aux.tp_net = skb_network_offset(skb);
- aux.tp_vlan_tci = vlan_tx_tag_get(skb);
+ if (vlan_tx_tag_present(skb)) {
+ aux.tp_vlan_tci = vlan_tx_tag_get(skb);
+ aux.tp_status |= TP_STATUS_VLAN_VALID;
+ }
+ else
+ aux.tp_vlan_tci = 0;
put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
}
--
1.7.3.4
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 2/2 v2] af-packet: Add flag to distinguish VID 0 from no-vlan.
2011-05-26 23:55 ` [PATCH 2/2 v2] af-packet: Add flag to distinguish VID 0 from no-vlan greearb
@ 2011-05-27 3:46 ` Eric Dumazet
0 siblings, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2011-05-27 3:46 UTC (permalink / raw)
To: greearb; +Cc: netdev
Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :
> From: Ben Greear <greearb@candelatech.com>
>
> Currently, user-space cannot determine if a 0 tcp_vlan_tci
> means there is no VLAN tag or the VLAN ID was zero.
>
> Add flag to make this explicit. User-space can check for
> TP_STATUS_VLAN_VALID || tp_vlan_tci > 0, which will be backwards
> compatible. Older could would have just checked for tp_vlan_tci,
> so it will work no worse than before.
>
> Signed-off-by: Ben Greear <greearb@candelatech.com>
> ---
> v2: add logic to tpacket_rcv too.
>
> :100644 100644 72bfa5a... 6d66ce1... M include/linux/if_packet.h
> :100644 100644 f9b807d... 9c2d068... M net/packet/af_packet.c
> include/linux/if_packet.h | 1 +
> net/packet/af_packet.c | 14 ++++++++++++--
> 2 files changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
> index 72bfa5a..6d66ce1 100644
> --- a/include/linux/if_packet.h
> +++ b/include/linux/if_packet.h
> @@ -70,6 +70,7 @@ struct tpacket_auxdata {
> #define TP_STATUS_COPY 0x2
> #define TP_STATUS_LOSING 0x4
> #define TP_STATUS_CSUMNOTREADY 0x8
> +#define TP_STATUS_VLAN_VALID 0x10 /* auxdata has valid tp_vlan_tci */
>
> /* Tx ring - header status */
> #define TP_STATUS_AVAILABLE 0x0
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index f9b807d..9c2d068 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -818,7 +818,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
> getnstimeofday(&ts);
> h.h2->tp_sec = ts.tv_sec;
> h.h2->tp_nsec = ts.tv_nsec;
> - h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
> + if (vlan_tx_tag_present(skb)) {
> + h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
> + status |= TP_STATUS_VLAN_VALID;
> + }
> + else
> + h.h2->tp_vlan_tci = 0;
if (...) {
...
} else {
...
}
> hdrlen = sizeof(*h.h2);
> break;
> default:
> @@ -1763,7 +1768,12 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
> aux.tp_snaplen = skb->len;
> aux.tp_mac = 0;
> aux.tp_net = skb_network_offset(skb);
> - aux.tp_vlan_tci = vlan_tx_tag_get(skb);
> + if (vlan_tx_tag_present(skb)) {
> + aux.tp_vlan_tci = vlan_tx_tag_get(skb);
> + aux.tp_status |= TP_STATUS_VLAN_VALID;
> + }
> + else
> + aux.tp_vlan_tci = 0;
Same codingstyle comment here.
Reviewed-by: Eric Dumazet <eric.dumazet@gmail.com>
Thanks
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets.
2011-05-26 23:55 [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets greearb
2011-05-26 23:55 ` [PATCH 2/2 v2] af-packet: Add flag to distinguish VID 0 from no-vlan greearb
@ 2011-05-27 3:42 ` Eric Dumazet
2011-05-27 4:11 ` Ben Greear
1 sibling, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2011-05-27 3:42 UTC (permalink / raw)
To: greearb; +Cc: netdev
Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :
> From: Ben Greear <greearb@candelatech.com>
>
> This saves a network device lookup on each packet transmitted,
> for sockets that are bound to a network device.
>
> Signed-off-by: Ben Greear <greearb@candelatech.com>
> ---
>
> v2: Remove un-used ifindex, lookup dev in else branch.
>
> :100644 100644 4005b24... f9b807d... M net/packet/af_packet.c
> net/packet/af_packet.c | 27 +++++++++++++++------------
> 1 files changed, 15 insertions(+), 12 deletions(-)
>
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 4005b24..f9b807d 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -989,7 +989,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
> struct sk_buff *skb;
> struct net_device *dev;
> __be16 proto;
> - int ifindex, err, reserve = 0;
> + bool need_rls_dev = false;
> + int err, reserve = 0;
> void *ph;
> struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
> int tp_len, size_max;
> @@ -1001,7 +1002,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>
> err = -EBUSY;
> if (saddr == NULL) {
> - ifindex = po->ifindex;
> + dev = po->prot_hook.dev;
> proto = po->num;
> addr = NULL;
> } else {
> @@ -1012,12 +1013,12 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
> + offsetof(struct sockaddr_ll,
> sll_addr)))
> goto out;
> - ifindex = saddr->sll_ifindex;
> proto = saddr->sll_protocol;
> addr = saddr->sll_addr;
> + dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
> + need_rls_dev = true;
> }
>
> - dev = dev_get_by_index(sock_net(&po->sk), ifindex);
> err = -ENXIO;
> if (unlikely(dev == NULL))
> goto out;
> @@ -1103,7 +1104,8 @@ out_status:
> __packet_set_status(po, ph, status);
> kfree_skb(skb);
> out_put:
> - dev_put(dev);
> + if (need_rls_dev)
> + dev_put(dev);
> out:
> mutex_unlock(&po->pg_vec_lock);
> return err;
> @@ -1141,8 +1143,9 @@ static int packet_snd(struct socket *sock,
> struct sk_buff *skb;
> struct net_device *dev;
> __be16 proto;
> + bool need_rls_dev = false;
> unsigned char *addr;
> - int ifindex, err, reserve = 0;
> + int err, reserve = 0;
> struct virtio_net_hdr vnet_hdr = { 0 };
> int offset = 0;
> int vnet_hdr_len;
> @@ -1160,7 +1163,7 @@ static int packet_snd(struct socket *sock,
> */
>
> if (saddr == NULL) {
> - ifindex = po->ifindex;
> + dev = po->prot_hook.dev;
> proto = po->num;
> addr = NULL;
> } else {
> @@ -1169,13 +1172,12 @@ static int packet_snd(struct socket *sock,
> goto out;
> if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
> goto out;
> - ifindex = saddr->sll_ifindex;
> proto = saddr->sll_protocol;
> addr = saddr->sll_addr;
> + dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
> + need_rls_dev = true;
> }
>
> -
> - dev = dev_get_by_index(sock_net(sk), ifindex);
> err = -ENXIO;
> if (dev == NULL)
> goto out_unlock;
> @@ -1315,14 +1317,15 @@ static int packet_snd(struct socket *sock,
> if (err > 0 && (err = net_xmit_errno(err)) != 0)
> goto out_unlock;
>
> - dev_put(dev);
> + if (need_rls_dev)
> + dev_put(dev);
>
> return len;
>
> out_free:
> kfree_skb(skb);
> out_unlock:
> - if (dev)
> + if (dev && need_rls_dev)
> dev_put(dev);
> out:
> return err;
Hmmm, I wonder why you want this Ben.
IMHO this is buggy, because we can sleep in this function.
We must take a ref on device (its really cheap these days, now we have a
percpu device refcnt)
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets.
2011-05-27 3:42 ` [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets Eric Dumazet
@ 2011-05-27 4:11 ` Ben Greear
2011-05-27 20:08 ` Eric Dumazet
0 siblings, 1 reply; 10+ messages in thread
From: Ben Greear @ 2011-05-27 4:11 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev
On 05/26/2011 08:42 PM, Eric Dumazet wrote:
> Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :
>> out_free:
>> kfree_skb(skb);
>> out_unlock:
>> - if (dev)
>> + if (dev&& need_rls_dev)
>> dev_put(dev);
>> out:
>> return err;
>
> Hmmm, I wonder why you want this Ben.
>
> IMHO this is buggy, because we can sleep in this function.
>
> We must take a ref on device (its really cheap these days, now we have a
> percpu device refcnt)
Why must you take the reference? And if we must, why isn't the
current code that assigns the prot_hook.dev without taking a
reference OK?
It seems a waste to do the lookup and free if we don't have to,
and with thousands of devices, the lookup might take a reasonable
amount of effort?
Thanks,
Ben
--
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc http://www.candelatech.com
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets.
2011-05-27 4:11 ` Ben Greear
@ 2011-05-27 20:08 ` Eric Dumazet
2011-05-27 20:15 ` David Miller
0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2011-05-27 20:08 UTC (permalink / raw)
To: Ben Greear; +Cc: netdev
Le jeudi 26 mai 2011 à 21:11 -0700, Ben Greear a écrit :
> On 05/26/2011 08:42 PM, Eric Dumazet wrote:
> > Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :
>
> >> out_free:
> >> kfree_skb(skb);
> >> out_unlock:
> >> - if (dev)
> >> + if (dev&& need_rls_dev)
> >> dev_put(dev);
> >> out:
> >> return err;
> >
> > Hmmm, I wonder why you want this Ben.
> >
> > IMHO this is buggy, because we can sleep in this function.
> >
> > We must take a ref on device (its really cheap these days, now we have a
> > percpu device refcnt)
>
> Why must you take the reference? And if we must, why isn't the
> current code that assigns the prot_hook.dev without taking a
> reference OK?
>
If we sleep, device can disappear under us.
The only way to not take a reference is to hold rcu_read_lock(), but
you're not allowed to sleep under rcu_read_lock().
> It seems a waste to do the lookup and free if we don't have to,
> and with thousands of devices, the lookup might take a reasonable
> amount of effort?
I understand you want to avoid the lookup, this part is fine for me, but
you need to take a reference on the device before eventual sleep.
Nowadays its a single "inc" instruction on x86, without even a lock
prefix (this on a percpu integer)
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets.
2011-05-27 20:08 ` Eric Dumazet
@ 2011-05-27 20:15 ` David Miller
2011-05-27 20:18 ` Ben Greear
0 siblings, 1 reply; 10+ messages in thread
From: David Miller @ 2011-05-27 20:15 UTC (permalink / raw)
To: eric.dumazet; +Cc: greearb, netdev
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 27 May 2011 22:08:41 +0200
> Le jeudi 26 mai 2011 à 21:11 -0700, Ben Greear a écrit :
>> On 05/26/2011 08:42 PM, Eric Dumazet wrote:
>> > Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :
>>
>> >> out_free:
>> >> kfree_skb(skb);
>> >> out_unlock:
>> >> - if (dev)
>> >> + if (dev&& need_rls_dev)
>> >> dev_put(dev);
>> >> out:
>> >> return err;
>> >
>> > Hmmm, I wonder why you want this Ben.
>> >
>> > IMHO this is buggy, because we can sleep in this function.
>> >
>> > We must take a ref on device (its really cheap these days, now we have a
>> > percpu device refcnt)
>>
>> Why must you take the reference? And if we must, why isn't the
>> current code that assigns the prot_hook.dev without taking a
>> reference OK?
>>
>
> If we sleep, device can disappear under us.
>
> The only way to not take a reference is to hold rcu_read_lock(), but
> you're not allowed to sleep under rcu_read_lock().
You still have not addresses Ben's point.
Why is it ok for the po->prot_hook.dev handling to not take a
reference? It's been doing this forever. Ben is just borrowing this
behavior for his uses.
After some more research I think it happens to be OK because
->prot_hook.dev is used _only_ for pointer comparisons, it is never
actually dereferenced or used in any other way. Probably, we should
just use ->ifindex for this.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets.
2011-05-27 20:15 ` David Miller
@ 2011-05-27 20:18 ` Ben Greear
2011-05-28 6:20 ` Eric Dumazet
0 siblings, 1 reply; 10+ messages in thread
From: Ben Greear @ 2011-05-27 20:18 UTC (permalink / raw)
To: David Miller; +Cc: eric.dumazet, netdev
On 05/27/2011 01:15 PM, David Miller wrote:
> From: Eric Dumazet<eric.dumazet@gmail.com>
> Date: Fri, 27 May 2011 22:08:41 +0200
>
>> Le jeudi 26 mai 2011 à 21:11 -0700, Ben Greear a écrit :
>>> On 05/26/2011 08:42 PM, Eric Dumazet wrote:
>>>> Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :
>>>
>>>>> out_free:
>>>>> kfree_skb(skb);
>>>>> out_unlock:
>>>>> - if (dev)
>>>>> + if (dev&& need_rls_dev)
>>>>> dev_put(dev);
>>>>> out:
>>>>> return err;
>>>>
>>>> Hmmm, I wonder why you want this Ben.
>>>>
>>>> IMHO this is buggy, because we can sleep in this function.
>>>>
>>>> We must take a ref on device (its really cheap these days, now we have a
>>>> percpu device refcnt)
>>>
>>> Why must you take the reference? And if we must, why isn't the
>>> current code that assigns the prot_hook.dev without taking a
>>> reference OK?
>>>
>>
>> If we sleep, device can disappear under us.
>>
>> The only way to not take a reference is to hold rcu_read_lock(), but
>> you're not allowed to sleep under rcu_read_lock().
>
> You still have not addresses Ben's point.
>
> Why is it ok for the po->prot_hook.dev handling to not take a
> reference? It's been doing this forever. Ben is just borrowing this
> behavior for his uses.
>
> After some more research I think it happens to be OK because
> ->prot_hook.dev is used _only_ for pointer comparisons, it is never
> actually dereferenced or used in any other way. Probably, we should
> just use ->ifindex for this.
It's easy enough to add a dev_hold() when I assign the skb instead
of looking it up in my patch, but perhaps it would be cleaner over all to
just hold a ref on the prot_hook.dev when it is originally assigned?
Thanks,
Ben
--
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc http://www.candelatech.com
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets.
2011-05-27 20:18 ` Ben Greear
@ 2011-05-28 6:20 ` Eric Dumazet
2011-05-28 17:01 ` Ben Greear
0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2011-05-28 6:20 UTC (permalink / raw)
To: Ben Greear; +Cc: David Miller, netdev
Le vendredi 27 mai 2011 à 13:18 -0700, Ben Greear a écrit :
> On 05/27/2011 01:15 PM, David Miller wrote:
> > From: Eric Dumazet<eric.dumazet@gmail.com>
> > Date: Fri, 27 May 2011 22:08:41 +0200
> >
> >> Le jeudi 26 mai 2011 à 21:11 -0700, Ben Greear a écrit :
> >>> On 05/26/2011 08:42 PM, Eric Dumazet wrote:
> >>>> Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :
> >>>
> >>>>> out_free:
> >>>>> kfree_skb(skb);
> >>>>> out_unlock:
> >>>>> - if (dev)
> >>>>> + if (dev&& need_rls_dev)
> >>>>> dev_put(dev);
> >>>>> out:
> >>>>> return err;
> >>>>
> >>>> Hmmm, I wonder why you want this Ben.
> >>>>
> >>>> IMHO this is buggy, because we can sleep in this function.
> >>>>
> >>>> We must take a ref on device (its really cheap these days, now we have a
> >>>> percpu device refcnt)
> >>>
> >>> Why must you take the reference? And if we must, why isn't the
> >>> current code that assigns the prot_hook.dev without taking a
> >>> reference OK?
> >>>
> >>
> >> If we sleep, device can disappear under us.
> >>
> >> The only way to not take a reference is to hold rcu_read_lock(), but
> >> you're not allowed to sleep under rcu_read_lock().
> >
> > You still have not addresses Ben's point.
> >
> > Why is it ok for the po->prot_hook.dev handling to not take a
> > reference? It's been doing this forever. Ben is just borrowing this
> > behavior for his uses.
> >
> > After some more research I think it happens to be OK because
> > ->prot_hook.dev is used _only_ for pointer comparisons, it is never
> > actually dereferenced or used in any other way. Probably, we should
> > just use ->ifindex for this.
>
> It's easy enough to add a dev_hold() when I assign the skb instead
> of looking it up in my patch, but perhaps it would be cleaner over all to
> just hold a ref on the prot_hook.dev when it is originally assigned?
Problem is : if packet_notifier(NETDEV_DOWN|UNREGISTER) is run while we
sleep, what happens then ?
Normally, if we sleep a long time in tpacket_snd() after device ref
increment, and before dev_queue_xmit(), the unregister process can enter
the infamous msleep(250) loop in netdev_wait_allrefs(), but at least we
dont crash.
But if you dont take the reference, we can crash in dev_queue_xmit()
when dereferencing the freed netdev structure.
Please check commit 1a35ca80c1db7 (packet: dont call sleeping functions
while holding rcu_read_lock()) for reference on possible problems.
Thanks !
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets.
2011-05-28 6:20 ` Eric Dumazet
@ 2011-05-28 17:01 ` Ben Greear
0 siblings, 0 replies; 10+ messages in thread
From: Ben Greear @ 2011-05-28 17:01 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David Miller, netdev
On 05/27/2011 11:20 PM, Eric Dumazet wrote:
> Le vendredi 27 mai 2011 à 13:18 -0700, Ben Greear a écrit :
>> On 05/27/2011 01:15 PM, David Miller wrote:
>>> From: Eric Dumazet<eric.dumazet@gmail.com>
>>> Date: Fri, 27 May 2011 22:08:41 +0200
>>>
>>>> Le jeudi 26 mai 2011 à 21:11 -0700, Ben Greear a écrit :
>>>>> On 05/26/2011 08:42 PM, Eric Dumazet wrote:
>>>>>> Le jeudi 26 mai 2011 à 16:55 -0700, greearb@candelatech.com a écrit :
>>>>>
>>>>>>> out_free:
>>>>>>> kfree_skb(skb);
>>>>>>> out_unlock:
>>>>>>> - if (dev)
>>>>>>> + if (dev&& need_rls_dev)
>>>>>>> dev_put(dev);
>>>>>>> out:
>>>>>>> return err;
>>>>>>
>>>>>> Hmmm, I wonder why you want this Ben.
>>>>>>
>>>>>> IMHO this is buggy, because we can sleep in this function.
>>>>>>
>>>>>> We must take a ref on device (its really cheap these days, now we have a
>>>>>> percpu device refcnt)
>>>>>
>>>>> Why must you take the reference? And if we must, why isn't the
>>>>> current code that assigns the prot_hook.dev without taking a
>>>>> reference OK?
>>>>>
>>>>
>>>> If we sleep, device can disappear under us.
>>>>
>>>> The only way to not take a reference is to hold rcu_read_lock(), but
>>>> you're not allowed to sleep under rcu_read_lock().
>>>
>>> You still have not addresses Ben's point.
>>>
>>> Why is it ok for the po->prot_hook.dev handling to not take a
>>> reference? It's been doing this forever. Ben is just borrowing this
>>> behavior for his uses.
>>>
>>> After some more research I think it happens to be OK because
>>> ->prot_hook.dev is used _only_ for pointer comparisons, it is never
>>> actually dereferenced or used in any other way. Probably, we should
>>> just use ->ifindex for this.
>>
>> It's easy enough to add a dev_hold() when I assign the skb instead
>> of looking it up in my patch, but perhaps it would be cleaner over all to
>> just hold a ref on the prot_hook.dev when it is originally assigned?
>
>
> Problem is : if packet_notifier(NETDEV_DOWN|UNREGISTER) is run while we
> sleep, what happens then ?
>
> Normally, if we sleep a long time in tpacket_snd() after device ref
> increment, and before dev_queue_xmit(), the unregister process can enter
> the infamous msleep(250) loop in netdev_wait_allrefs(), but at least we
> dont crash.
>
> But if you dont take the reference, we can crash in dev_queue_xmit()
> when dereferencing the freed netdev structure.
>
> Please check commit 1a35ca80c1db7 (packet: dont call sleeping functions
> while holding rcu_read_lock()) for reference on possible problems.
I'll create a new patch to hold ref on the prot_hook.dev when it's assigned,
and then layer the 'existing netdev reference' patch on top of that. Might
be a day or two...
Thanks,
Ben
>
> Thanks !
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc http://www.candelatech.com
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2011-05-28 17:01 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-26 23:55 [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets greearb
2011-05-26 23:55 ` [PATCH 2/2 v2] af-packet: Add flag to distinguish VID 0 from no-vlan greearb
2011-05-27 3:46 ` Eric Dumazet
2011-05-27 3:42 ` [PATCH 1/2 v2] af-packet: Use existing netdev reference for bound sockets Eric Dumazet
2011-05-27 4:11 ` Ben Greear
2011-05-27 20:08 ` Eric Dumazet
2011-05-27 20:15 ` David Miller
2011-05-27 20:18 ` Ben Greear
2011-05-28 6:20 ` Eric Dumazet
2011-05-28 17:01 ` Ben Greear
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.