All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] can: raw: fix receiver memory leak
@ 2023-07-05  9:25 Ziyang Xuan
  2023-07-06 11:55 ` Oliver Hartkopp
  0 siblings, 1 reply; 6+ messages in thread
From: Ziyang Xuan @ 2023-07-05  9:25 UTC (permalink / raw)
  To: socketcan, mkl, davem, edumazet, kuba, pabeni, linux-can, netdev,
	penguin-kernel

Got kmemleak errors with the following ltp can_filter testcase:

for ((i=1; i<=100; i++))
do
        ./can_filter &
        sleep 0.1
done

==============================================================
[<00000000db4a4943>] can_rx_register+0x147/0x360 [can]
[<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw]
[<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0
[<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70
[<00000000fd468496>] do_syscall_64+0x33/0x40
[<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6

It's a bug in the concurrent scenario of unregister_netdevice_many()
and raw_release() as following:

             cpu0                                        cpu1
unregister_netdevice_many(can_dev)
  unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this
  net_set_todo(can_dev)
						raw_release(can_socket)
						  dev = dev_get_by_index(, ro->ifindex); // dev == NULL
						  if (dev) { // receivers in dev_rcv_lists not free because dev is NULL
						    raw_disable_allfilters(, dev, );
						    dev_put(dev);
						  }
						...
						ro->bound = 0;
						...

call_netdevice_notifiers(NETDEV_UNREGISTER, )
  raw_notify(, NETDEV_UNREGISTER, )
    if (ro->bound) // invalid because ro->bound has been set 0
      raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed

Add a net_device pointer member in struct raw_sock to record bound can_dev,
and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(),
raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in
dev_rcv_lists.

Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier")
Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
---
 net/can/raw.c | 63 ++++++++++++++++++++++-----------------------------
 1 file changed, 27 insertions(+), 36 deletions(-)

diff --git a/net/can/raw.c b/net/can/raw.c
index 15c79b079184..e767d356f695 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -84,6 +84,7 @@ struct raw_sock {
 	struct sock sk;
 	int bound;
 	int ifindex;
+	struct net_device *dev;
 	struct list_head notifier;
 	int loopback;
 	int recv_own_msgs;
@@ -277,7 +278,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
 	if (!net_eq(dev_net(dev), sock_net(sk)))
 		return;
 
-	if (ro->ifindex != dev->ifindex)
+	if (ro->dev != dev)
 		return;
 
 	switch (msg) {
@@ -292,6 +293,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
 
 		ro->ifindex = 0;
 		ro->bound = 0;
+		ro->dev = NULL;
 		ro->count = 0;
 		release_sock(sk);
 
@@ -337,6 +339,7 @@ static int raw_init(struct sock *sk)
 
 	ro->bound            = 0;
 	ro->ifindex          = 0;
+	ro->dev              = NULL;
 
 	/* set default filter to single entry dfilter */
 	ro->dfilter.can_id   = 0;
@@ -385,19 +388,13 @@ static int raw_release(struct socket *sock)
 
 	lock_sock(sk);
 
+	rtnl_lock();
 	/* remove current filters & unregister */
 	if (ro->bound) {
-		if (ro->ifindex) {
-			struct net_device *dev;
-
-			dev = dev_get_by_index(sock_net(sk), ro->ifindex);
-			if (dev) {
-				raw_disable_allfilters(dev_net(dev), dev, sk);
-				dev_put(dev);
-			}
-		} else {
+		if (ro->dev)
+			raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk);
+		else
 			raw_disable_allfilters(sock_net(sk), NULL, sk);
-		}
 	}
 
 	if (ro->count > 1)
@@ -405,8 +402,10 @@ static int raw_release(struct socket *sock)
 
 	ro->ifindex = 0;
 	ro->bound = 0;
+	ro->dev = NULL;
 	ro->count = 0;
 	free_percpu(ro->uniq);
+	rtnl_unlock();
 
 	sock_orphan(sk);
 	sock->sk = NULL;
@@ -422,6 +421,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
 	struct sock *sk = sock->sk;
 	struct raw_sock *ro = raw_sk(sk);
+	struct net_device *dev = NULL;
 	int ifindex;
 	int err = 0;
 	int notify_enetdown = 0;
@@ -431,14 +431,13 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 	if (addr->can_family != AF_CAN)
 		return -EINVAL;
 
+	rtnl_lock();
 	lock_sock(sk);
 
-	if (ro->bound && addr->can_ifindex == ro->ifindex)
+	if (ro->bound && ro->dev && addr->can_ifindex == ro->dev->ifindex)
 		goto out;
 
 	if (addr->can_ifindex) {
-		struct net_device *dev;
-
 		dev = dev_get_by_index(sock_net(sk), addr->can_ifindex);
 		if (!dev) {
 			err = -ENODEV;
@@ -465,28 +464,23 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 	}
 
 	if (!err) {
+		/* unregister old filters */
 		if (ro->bound) {
-			/* unregister old filters */
-			if (ro->ifindex) {
-				struct net_device *dev;
-
-				dev = dev_get_by_index(sock_net(sk),
-						       ro->ifindex);
-				if (dev) {
-					raw_disable_allfilters(dev_net(dev),
-							       dev, sk);
-					dev_put(dev);
-				}
-			} else {
+			if (ro->dev)
+				raw_disable_allfilters(dev_net(ro->dev),
+						       ro->dev, sk);
+			else
 				raw_disable_allfilters(sock_net(sk), NULL, sk);
-			}
 		}
 		ro->ifindex = ifindex;
+
 		ro->bound = 1;
+		ro->dev = dev;
 	}
 
  out:
 	release_sock(sk);
+	rtnl_unlock();
 
 	if (notify_enetdown) {
 		sk->sk_err = ENETDOWN;
@@ -553,9 +547,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 		rtnl_lock();
 		lock_sock(sk);
 
-		if (ro->bound && ro->ifindex) {
-			dev = dev_get_by_index(sock_net(sk), ro->ifindex);
-			if (!dev) {
+		dev = ro->dev;
+		if (ro->bound && dev) {
+			if (dev->reg_state != NETREG_REGISTERED) {
 				if (count > 1)
 					kfree(filter);
 				err = -ENODEV;
@@ -596,7 +590,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 		ro->count  = count;
 
  out_fil:
-		dev_put(dev);
 		release_sock(sk);
 		rtnl_unlock();
 
@@ -614,9 +607,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 		rtnl_lock();
 		lock_sock(sk);
 
-		if (ro->bound && ro->ifindex) {
-			dev = dev_get_by_index(sock_net(sk), ro->ifindex);
-			if (!dev) {
+		dev = ro->dev;
+		if (ro->bound && dev) {
+			if (dev->reg_state != NETREG_REGISTERED) {
 				err = -ENODEV;
 				goto out_err;
 			}
@@ -627,7 +620,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 			/* (try to) register the new err_mask */
 			err = raw_enable_errfilter(sock_net(sk), dev, sk,
 						   err_mask);
-
 			if (err)
 				goto out_err;
 
@@ -640,7 +632,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 		ro->err_mask = err_mask;
 
  out_err:
-		dev_put(dev);
 		release_sock(sk);
 		rtnl_unlock();
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net] can: raw: fix receiver memory leak
  2023-07-05  9:25 [PATCH net] can: raw: fix receiver memory leak Ziyang Xuan
@ 2023-07-06 11:55 ` Oliver Hartkopp
  2023-07-06 12:48   ` Ziyang Xuan (William)
  0 siblings, 1 reply; 6+ messages in thread
From: Oliver Hartkopp @ 2023-07-06 11:55 UTC (permalink / raw)
  To: Ziyang Xuan, mkl, davem, edumazet, kuba, pabeni, linux-can,
	netdev, penguin-kernel

Hello Ziyang Xuan,

thanks for your patch and the found inconsistency!

The ro->ifindex value might be zero even on a bound CAN_RAW socket which 
results in the use of a common filter for all CAN interfaces, see below ...

On 2023-07-05 11:25, Ziyang Xuan wrote:

(..)

> @@ -277,7 +278,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
>   	if (!net_eq(dev_net(dev), sock_net(sk)))
>   		return;
>   
> -	if (ro->ifindex != dev->ifindex)
> +	if (ro->dev != dev)
>   		return;
>   
>   	switch (msg) {
> @@ -292,6 +293,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
>   
>   		ro->ifindex = 0;
>   		ro->bound = 0;
> +		ro->dev = NULL;
>   		ro->count = 0;
>   		release_sock(sk);
>   

This would be ok for raw_notify().

> @@ -337,6 +339,7 @@ static int raw_init(struct sock *sk)
>   
>   	ro->bound            = 0;
>   	ro->ifindex          = 0;
> +	ro->dev              = NULL;
>   
>   	/* set default filter to single entry dfilter */
>   	ro->dfilter.can_id   = 0;
> @@ -385,19 +388,13 @@ static int raw_release(struct socket *sock)
>   
>   	lock_sock(sk);
>   
> +	rtnl_lock();
>   	/* remove current filters & unregister */
>   	if (ro->bound) {
> -		if (ro->ifindex) {
> -			struct net_device *dev;
> -
> -			dev = dev_get_by_index(sock_net(sk), ro->ifindex);
> -			if (dev) {
> -				raw_disable_allfilters(dev_net(dev), dev, sk);
> -				dev_put(dev);
> -			}
> -		} else {
> +		if (ro->dev)
> +			raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk);
> +		else
>   			raw_disable_allfilters(sock_net(sk), NULL, sk);
> -		}
>   	}
>   
>   	if (ro->count > 1)
> @@ -405,8 +402,10 @@ static int raw_release(struct socket *sock)
>   
>   	ro->ifindex = 0;
>   	ro->bound = 0;
> +	ro->dev = NULL;
>   	ro->count = 0;
>   	free_percpu(ro->uniq);
> +	rtnl_unlock();
>   
>   	sock_orphan(sk);
>   	sock->sk = NULL;

This would be ok too.

> @@ -422,6 +421,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
>   	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
>   	struct sock *sk = sock->sk;
>   	struct raw_sock *ro = raw_sk(sk);
> +	struct net_device *dev = NULL;
>   	int ifindex;
>   	int err = 0;
>   	int notify_enetdown = 0;
> @@ -431,14 +431,13 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
>   	if (addr->can_family != AF_CAN)
>   		return -EINVAL;
>   
> +	rtnl_lock();
>   	lock_sock(sk);
>   
> -	if (ro->bound && addr->can_ifindex == ro->ifindex)
> +	if (ro->bound && ro->dev && addr->can_ifindex == ro->dev->ifindex)

But this is wrong as the case for a bound socket for "all" CAN 
interfaces (ifindex == 0) is not considered.

>   		goto out;
>   
>   	if (addr->can_ifindex) {
> -		struct net_device *dev;
> -
>   		dev = dev_get_by_index(sock_net(sk), addr->can_ifindex);
>   		if (!dev) {
>   			err = -ENODEV;
> @@ -465,28 +464,23 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
>   	}
>   
>   	if (!err) {
> +		/* unregister old filters */
>   		if (ro->bound) {
> -			/* unregister old filters */
> -			if (ro->ifindex) {
> -				struct net_device *dev;
> -
> -				dev = dev_get_by_index(sock_net(sk),
> -						       ro->ifindex);
> -				if (dev) {
> -					raw_disable_allfilters(dev_net(dev),
> -							       dev, sk);
> -					dev_put(dev);
> -				}
> -			} else {
> +			if (ro->dev)
> +				raw_disable_allfilters(dev_net(ro->dev),
> +						       ro->dev, sk);
> +			else
>   				raw_disable_allfilters(sock_net(sk), NULL, sk);
> -			}
>   		}
>   		ro->ifindex = ifindex;
> +
>   		ro->bound = 1;
> +		ro->dev = dev;
>   	}
>   
>    out:
>   	release_sock(sk);
> +	rtnl_unlock();

Would it also fix the issue when just adding the rtnl_locks to 
raw_bind() and raw_release() as suggested by you?

Many thanks,
Oliver

>   
>   	if (notify_enetdown) {
>   		sk->sk_err = ENETDOWN;
> @@ -553,9 +547,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
>   		rtnl_lock();
>   		lock_sock(sk);
>   
> -		if (ro->bound && ro->ifindex) {
> -			dev = dev_get_by_index(sock_net(sk), ro->ifindex);
> -			if (!dev) {
> +		dev = ro->dev;
> +		if (ro->bound && dev) {
> +			if (dev->reg_state != NETREG_REGISTERED) {
>   				if (count > 1)
>   					kfree(filter);
>   				err = -ENODEV;
> @@ -596,7 +590,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
>   		ro->count  = count;
>   
>    out_fil:
> -		dev_put(dev);
>   		release_sock(sk);
>   		rtnl_unlock();
>   
> @@ -614,9 +607,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
>   		rtnl_lock();
>   		lock_sock(sk);
>   
> -		if (ro->bound && ro->ifindex) {
> -			dev = dev_get_by_index(sock_net(sk), ro->ifindex);
> -			if (!dev) {
> +		dev = ro->dev;
> +		if (ro->bound && dev) {
> +			if (dev->reg_state != NETREG_REGISTERED) {
>   				err = -ENODEV;
>   				goto out_err;
>   			}
> @@ -627,7 +620,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
>   			/* (try to) register the new err_mask */
>   			err = raw_enable_errfilter(sock_net(sk), dev, sk,
>   						   err_mask);
> -
>   			if (err)
>   				goto out_err;
>   
> @@ -640,7 +632,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
>   		ro->err_mask = err_mask;
>   
>    out_err:
> -		dev_put(dev);
>   		release_sock(sk);
>   		rtnl_unlock();
>   

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] can: raw: fix receiver memory leak
  2023-07-06 11:55 ` Oliver Hartkopp
@ 2023-07-06 12:48   ` Ziyang Xuan (William)
  2023-07-06 18:29     ` Oliver Hartkopp
  0 siblings, 1 reply; 6+ messages in thread
From: Ziyang Xuan (William) @ 2023-07-06 12:48 UTC (permalink / raw)
  To: Oliver Hartkopp, mkl, davem, edumazet, kuba, pabeni, linux-can,
	netdev, penguin-kernel

> Hello Ziyang Xuan,
> 
> thanks for your patch and the found inconsistency!
> 
> The ro->ifindex value might be zero even on a bound CAN_RAW socket which results in the use of a common filter for all CAN interfaces, see below ...
> 
> On 2023-07-05 11:25, Ziyang Xuan wrote:
> 
> (..)
> 
>> @@ -277,7 +278,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
>>       if (!net_eq(dev_net(dev), sock_net(sk)))
>>           return;
>>   -    if (ro->ifindex != dev->ifindex)
>> +    if (ro->dev != dev)
>>           return;
>>         switch (msg) {
>> @@ -292,6 +293,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
>>             ro->ifindex = 0;
>>           ro->bound = 0;
>> +        ro->dev = NULL;
>>           ro->count = 0;
>>           release_sock(sk);
>>   
> 
> This would be ok for raw_notify().
> 
>> @@ -337,6 +339,7 @@ static int raw_init(struct sock *sk)
>>         ro->bound            = 0;
>>       ro->ifindex          = 0;
>> +    ro->dev              = NULL;
>>         /* set default filter to single entry dfilter */
>>       ro->dfilter.can_id   = 0;
>> @@ -385,19 +388,13 @@ static int raw_release(struct socket *sock)
>>         lock_sock(sk);
>>   +    rtnl_lock();
>>       /* remove current filters & unregister */
>>       if (ro->bound) {
>> -        if (ro->ifindex) {
>> -            struct net_device *dev;
>> -
>> -            dev = dev_get_by_index(sock_net(sk), ro->ifindex);
>> -            if (dev) {
>> -                raw_disable_allfilters(dev_net(dev), dev, sk);
>> -                dev_put(dev);
>> -            }
>> -        } else {
>> +        if (ro->dev)
>> +            raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk);
>> +        else
>>               raw_disable_allfilters(sock_net(sk), NULL, sk);
>> -        }
>>       }
>>         if (ro->count > 1)
>> @@ -405,8 +402,10 @@ static int raw_release(struct socket *sock)
>>         ro->ifindex = 0;
>>       ro->bound = 0;
>> +    ro->dev = NULL;
>>       ro->count = 0;
>>       free_percpu(ro->uniq);
>> +    rtnl_unlock();
>>         sock_orphan(sk);
>>       sock->sk = NULL;
> 
> This would be ok too.
> 
>> @@ -422,6 +421,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
>>       struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
>>       struct sock *sk = sock->sk;
>>       struct raw_sock *ro = raw_sk(sk);
>> +    struct net_device *dev = NULL;
>>       int ifindex;
>>       int err = 0;
>>       int notify_enetdown = 0;
>> @@ -431,14 +431,13 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
>>       if (addr->can_family != AF_CAN)
>>           return -EINVAL;
>>   +    rtnl_lock();
>>       lock_sock(sk);
>>   -    if (ro->bound && addr->can_ifindex == ro->ifindex)
>> +    if (ro->bound && ro->dev && addr->can_ifindex == ro->dev->ifindex)
> 
> But this is wrong as the case for a bound socket for "all" CAN interfaces (ifindex == 0) is not considered.

Yes, here need not modification. I will fix it in v2. Thanks!

> 
>>           goto out;
>>         if (addr->can_ifindex) {
>> -        struct net_device *dev;
>> -
>>           dev = dev_get_by_index(sock_net(sk), addr->can_ifindex);
>>           if (!dev) {
>>               err = -ENODEV;
>> @@ -465,28 +464,23 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
>>       }
>>         if (!err) {
>> +        /* unregister old filters */
>>           if (ro->bound) {
>> -            /* unregister old filters */
>> -            if (ro->ifindex) {
>> -                struct net_device *dev;
>> -
>> -                dev = dev_get_by_index(sock_net(sk),
>> -                               ro->ifindex);
>> -                if (dev) {
>> -                    raw_disable_allfilters(dev_net(dev),
>> -                                   dev, sk);
>> -                    dev_put(dev);
>> -                }
>> -            } else {
>> +            if (ro->dev)
>> +                raw_disable_allfilters(dev_net(ro->dev),
>> +                               ro->dev, sk);
>> +            else
>>                   raw_disable_allfilters(sock_net(sk), NULL, sk);
>> -            }
>>           }
>>           ro->ifindex = ifindex;
>> +
>>           ro->bound = 1;
>> +        ro->dev = dev;
>>       }
>>      out:
>>       release_sock(sk);
>> +    rtnl_unlock();
> 
> Would it also fix the issue when just adding the rtnl_locks to raw_bind() and raw_release() as suggested by you?

This patch just add rtnl_lock in raw_bind() and raw_release(). raw_setsockopt() has rtnl_lock before this. raw_notify()
is under rtnl_lock. My patch has been tested and solved the issue before send. I don't know if it answered your doubts.

Thanks.
William Xuan

> 
> Many thanks,
> Oliver
> 
>>         if (notify_enetdown) {
>>           sk->sk_err = ENETDOWN;
>> @@ -553,9 +547,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
>>           rtnl_lock();
>>           lock_sock(sk);
>>   -        if (ro->bound && ro->ifindex) {
>> -            dev = dev_get_by_index(sock_net(sk), ro->ifindex);
>> -            if (!dev) {
>> +        dev = ro->dev;
>> +        if (ro->bound && dev) {
>> +            if (dev->reg_state != NETREG_REGISTERED) {
>>                   if (count > 1)
>>                       kfree(filter);
>>                   err = -ENODEV;
>> @@ -596,7 +590,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
>>           ro->count  = count;
>>      out_fil:
>> -        dev_put(dev);
>>           release_sock(sk);
>>           rtnl_unlock();
>>   @@ -614,9 +607,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
>>           rtnl_lock();
>>           lock_sock(sk);
>>   -        if (ro->bound && ro->ifindex) {
>> -            dev = dev_get_by_index(sock_net(sk), ro->ifindex);
>> -            if (!dev) {
>> +        dev = ro->dev;
>> +        if (ro->bound && dev) {
>> +            if (dev->reg_state != NETREG_REGISTERED) {
>>                   err = -ENODEV;
>>                   goto out_err;
>>               }
>> @@ -627,7 +620,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
>>               /* (try to) register the new err_mask */
>>               err = raw_enable_errfilter(sock_net(sk), dev, sk,
>>                              err_mask);
>> -
>>               if (err)
>>                   goto out_err;
>>   @@ -640,7 +632,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
>>           ro->err_mask = err_mask;
>>      out_err:
>> -        dev_put(dev);
>>           release_sock(sk);
>>           rtnl_unlock();
>>   
> .

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] can: raw: fix receiver memory leak
  2023-07-06 12:48   ` Ziyang Xuan (William)
@ 2023-07-06 18:29     ` Oliver Hartkopp
  2023-07-07  1:59       ` Ziyang Xuan (William)
  0 siblings, 1 reply; 6+ messages in thread
From: Oliver Hartkopp @ 2023-07-06 18:29 UTC (permalink / raw)
  To: Ziyang Xuan (William),
	mkl, davem, edumazet, kuba, pabeni, linux-can, netdev,
	penguin-kernel

On 06.07.23 14:48, Ziyang Xuan (William) wrote:

(..)

>>>        }
>>>       out:
>>>        release_sock(sk);
>>> +    rtnl_unlock();
>>
>> Would it also fix the issue when just adding the rtnl_locks to raw_bind() and raw_release() as suggested by you?
> 
> This patch just add rtnl_lock in raw_bind() and raw_release(). raw_setsockopt() has rtnl_lock before this. raw_notify()
> is under rtnl_lock. My patch has been tested and solved the issue before send. I don't know if it answered your doubts.

My question was whether adding rtnl_locks to raw_bind() and 
raw_release() would be enough to fix the issue.

Without introducing the additional ro->dev element!?

Best regards,
Oliver

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] can: raw: fix receiver memory leak
  2023-07-06 18:29     ` Oliver Hartkopp
@ 2023-07-07  1:59       ` Ziyang Xuan (William)
  2023-07-07  6:22         ` Oliver Hartkopp
  0 siblings, 1 reply; 6+ messages in thread
From: Ziyang Xuan (William) @ 2023-07-07  1:59 UTC (permalink / raw)
  To: Oliver Hartkopp, mkl, davem, edumazet, kuba, pabeni, linux-can,
	netdev, penguin-kernel

> On 06.07.23 14:48, Ziyang Xuan (William) wrote:
> 
> (..)
> 
>>>>        }
>>>>       out:
>>>>        release_sock(sk);
>>>> +    rtnl_unlock();
>>>
>>> Would it also fix the issue when just adding the rtnl_locks to raw_bind() and raw_release() as suggested by you?
>>
>> This patch just add rtnl_lock in raw_bind() and raw_release(). raw_setsockopt() has rtnl_lock before this. raw_notify()
>> is under rtnl_lock. My patch has been tested and solved the issue before send. I don't know if it answered your doubts.
> 
> My question was whether adding rtnl_locks to raw_bind() and raw_release() would be enough to fix the issue.
> 
> Without introducing the additional ro->dev element!?

Understand. Just add rtnl_lock to raw_bind() and raw_release() can not fix the issue. I tested.

We should understand that unregister a net device is divided into two stages generally.
Fistly, call unregister_netdevice_many() to remove net_dev from device list and add
net_dev to net_todo_list. Secondly, free net_dev in netdev_run_todo().

In my issue. Firstly, unregister_netdevice_many() removed can_dev from device
list and added can_dev to net_todo_list. Then got NULL by dev_get_by_index()
and receivers in dev_rcv_lists would not be freed in raw_release().
After raw_release(), ro->bound would be set 0. When NETDEV_UNREGISTER event
arrived raw_notify(), receivers in dev_rcv_lists would not be freed too
because ro->bound was already 0. Thus receivers in dev_rcv_lists would be leaked.

             cpu0                                        cpu1
unregister_netdevice_many(can_dev)
  unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this
  net_set_todo(can_dev)
						raw_release(can_socket)
						  dev = dev_get_by_index(, ro->ifindex); // dev == NULL
						  if (dev) { // receivers in dev_rcv_lists not free because dev is NULL
						    raw_disable_allfilters(, dev, );
						    dev_put(dev);
						  }
						...
						ro->bound = 0;
						...

netdev_wait_allrefs_any()
  call_netdevice_notifiers(NETDEV_UNREGISTER, )
    raw_notify(, NETDEV_UNREGISTER, )
      if (ro->bound) // invalid because ro->bound has been set 0
        raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed


Thanks,
William Xuan

> 
> Best regards,
> Oliver
> .

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] can: raw: fix receiver memory leak
  2023-07-07  1:59       ` Ziyang Xuan (William)
@ 2023-07-07  6:22         ` Oliver Hartkopp
  0 siblings, 0 replies; 6+ messages in thread
From: Oliver Hartkopp @ 2023-07-07  6:22 UTC (permalink / raw)
  To: Ziyang Xuan (William),
	mkl, davem, edumazet, kuba, pabeni, linux-can, netdev,
	penguin-kernel



On 07.07.23 03:59, Ziyang Xuan (William) wrote:
>> On 06.07.23 14:48, Ziyang Xuan (William) wrote:
>>
>> (..)
>>
>>>>>         }
>>>>>        out:
>>>>>         release_sock(sk);
>>>>> +    rtnl_unlock();
>>>>
>>>> Would it also fix the issue when just adding the rtnl_locks to raw_bind() and raw_release() as suggested by you?
>>>
>>> This patch just add rtnl_lock in raw_bind() and raw_release(). raw_setsockopt() has rtnl_lock before this. raw_notify()
>>> is under rtnl_lock. My patch has been tested and solved the issue before send. I don't know if it answered your doubts.
>>
>> My question was whether adding rtnl_locks to raw_bind() and raw_release() would be enough to fix the issue.
>>
>> Without introducing the additional ro->dev element!?
> 
> Understand. Just add rtnl_lock to raw_bind() and raw_release() can not fix the issue. I tested.
> 
> We should understand that unregister a net device is divided into two stages generally.
> Fistly, call unregister_netdevice_many() to remove net_dev from device list and add
> net_dev to net_todo_list. Secondly, free net_dev in netdev_run_todo().
> 
> In my issue. Firstly, unregister_netdevice_many() removed can_dev from device
> list and added can_dev to net_todo_list. Then got NULL by dev_get_by_index()
> and receivers in dev_rcv_lists would not be freed in raw_release().
> After raw_release(), ro->bound would be set 0. When NETDEV_UNREGISTER event
> arrived raw_notify(), receivers in dev_rcv_lists would not be freed too
> because ro->bound was already 0. Thus receivers in dev_rcv_lists would be leaked.

Thanks for the clarification and the testing!

I really assumed rtnl_lock would do this job and also protect the entire 
sequence starting with unregister_netdevice_many() !?!

Looking forward to the V2 patch then.

Many thanks,
Oliver

> 
>               cpu0                                        cpu1
> unregister_netdevice_many(can_dev)
>    unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this
>    net_set_todo(can_dev)
> 						raw_release(can_socket)
> 						  dev = dev_get_by_index(, ro->ifindex); // dev == NULL
> 						  if (dev) { // receivers in dev_rcv_lists not free because dev is NULL
> 						    raw_disable_allfilters(, dev, );
> 						    dev_put(dev);
> 						  }
> 						...
> 						ro->bound = 0;
> 						...
> 
> netdev_wait_allrefs_any()
>    call_netdevice_notifiers(NETDEV_UNREGISTER, )
>      raw_notify(, NETDEV_UNREGISTER, )
>        if (ro->bound) // invalid because ro->bound has been set 0
>          raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed
> 
> 
> Thanks,
> William Xuan
> 
>>
>> Best regards,
>> Oliver
>> .

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-07-07  6:23 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-05  9:25 [PATCH net] can: raw: fix receiver memory leak Ziyang Xuan
2023-07-06 11:55 ` Oliver Hartkopp
2023-07-06 12:48   ` Ziyang Xuan (William)
2023-07-06 18:29     ` Oliver Hartkopp
2023-07-07  1:59       ` Ziyang Xuan (William)
2023-07-07  6:22         ` Oliver Hartkopp

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.