All of lore.kernel.org
 help / color / mirror / Atom feed
* [Patch net] netns: avoid allocating idr when dumping info
@ 2015-02-27  6:32 Cong Wang
  2015-02-27  8:01 ` Eric Dumazet
  0 siblings, 1 reply; 18+ messages in thread
From: Cong Wang @ 2015-02-27  6:32 UTC (permalink / raw)
  To: netdev; +Cc: Cong Wang, Nicolas Dichtel, Eric Dumazet

We can allocate the peer netns id when creating the link
instead of when dumping the link.

This fixes the following kernel warning:

 ===============================
 [ INFO: suspicious RCU usage. ]
 3.19.0+ #805 Tainted: G        W
 -------------------------------
 include/linux/rcupdate.h:538 Illegal context switch in RCU read-side critical section!

 other info that might help us debug this:

 rcu_scheduler_active = 1, debug_locks = 0
 2 locks held by ip/771:
  #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff8182b8f4>] netlink_dump+0x21/0x26c
  #1:  (rcu_read_lock){......}, at: [<ffffffff817d785b>] rcu_read_lock+0x0/0x6e

 stack backtrace:
 CPU: 3 PID: 771 Comm: ip Tainted: G        W       3.19.0+ #805
 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
  0000000000000001 ffff8800d51e7718 ffffffff81a27457 0000000029e729e6
  ffff8800d6108000 ffff8800d51e7748 ffffffff810b539b ffffffff820013dd
  00000000000001c8 0000000000000000 ffff8800d7448088 ffff8800d51e7758
 Call Trace:
  [<ffffffff81a27457>] dump_stack+0x4c/0x65
  [<ffffffff810b539b>] lockdep_rcu_suspicious+0x107/0x110
  [<ffffffff8109796f>] rcu_preempt_sleep_check+0x45/0x47
  [<ffffffff8109e457>] ___might_sleep+0x1d/0x1cb
  [<ffffffff8109e67d>] __might_sleep+0x78/0x80
  [<ffffffff814b9b1f>] idr_alloc+0x45/0xd1
  [<ffffffff810cb7ab>] ? rcu_read_lock_held+0x3b/0x3d
  [<ffffffff814b9f9d>] ? idr_for_each+0x53/0x101
  [<ffffffff817c1383>] alloc_netid+0x61/0x69
  [<ffffffff817c14c3>] __peernet2id+0x79/0x8d
  [<ffffffff817c1ab7>] peernet2id+0x13/0x1f
  [<ffffffff817d8673>] rtnl_fill_ifinfo+0xa8d/0xc20
  [<ffffffff810b17d9>] ? __lock_is_held+0x39/0x52
  [<ffffffff817d894f>] rtnl_dump_ifinfo+0x149/0x213
  [<ffffffff8182b9c2>] netlink_dump+0xef/0x26c
  [<ffffffff8182bcba>] netlink_recvmsg+0x17b/0x2c5
  [<ffffffff817b0adc>] __sock_recvmsg+0x4e/0x59
  [<ffffffff817b1b40>] sock_recvmsg+0x3f/0x51
  [<ffffffff817b1f9a>] ___sys_recvmsg+0xf6/0x1d9
  [<ffffffff8115dc67>] ? handle_pte_fault+0x6e1/0xd3d
  [<ffffffff8100a3a0>] ? native_sched_clock+0x35/0x37
  [<ffffffff8109f45b>] ? sched_clock_local+0x12/0x72
  [<ffffffff8109f6ac>] ? sched_clock_cpu+0x9e/0xb7
  [<ffffffff810cb7ab>] ? rcu_read_lock_held+0x3b/0x3d
  [<ffffffff811abde8>] ? __fcheck_files+0x4c/0x58
  [<ffffffff811ac556>] ? __fget_light+0x2d/0x52
  [<ffffffff817b376f>] __sys_recvmsg+0x42/0x60
  [<ffffffff817b379f>] SyS_recvmsg+0x12/0x1c

Fixes: commit d37512a277dfb2cef ("rtnl: add link netns id to interface messages")
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 include/net/net_namespace.h | 14 +++++++++++++-
 net/core/net_namespace.c    | 14 ++------------
 net/core/rtnetlink.c        | 11 +++++++++--
 3 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 36faf49..25ac2e0 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -27,6 +27,7 @@
 #include <net/netns/nftables.h>
 #include <net/netns/xfrm.h>
 #include <linux/ns_common.h>
+#include <uapi/linux/net_namespace.h>
 
 struct user_namespace;
 struct proc_dir_entry;
@@ -291,7 +292,18 @@ static inline struct net *read_pnet(struct net * const *pnet)
 #define __net_initconst	__initconst
 #endif
 
-int peernet2id(struct net *net, struct net *peer);
+int __peernet2id(struct net *net, struct net *peer, bool alloc);
+
+/* This function returns the id of a peer netns. If no id is assigned, one will
+ * be allocated and returned.
+ */
+static inline int peernet2id(struct net *net, struct net *peer)
+{
+	int id = __peernet2id(net, peer, true);
+
+	return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
+}
+
 struct net *get_net_ns_by_id(struct net *net, int id);
 
 struct pernet_operations {
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index cb5290b..9ff2164 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -175,7 +175,7 @@ static int net_eq_idr(int id, void *net, void *peer)
 	return 0;
 }
 
-static int __peernet2id(struct net *net, struct net *peer, bool alloc)
+int __peernet2id(struct net *net, struct net *peer, bool alloc)
 {
 	int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
 
@@ -192,17 +192,7 @@ static int __peernet2id(struct net *net, struct net *peer, bool alloc)
 
 	return -ENOENT;
 }
-
-/* This function returns the id of a peer netns. If no id is assigned, one will
- * be allocated and returned.
- */
-int peernet2id(struct net *net, struct net *peer)
-{
-	int id = __peernet2id(net, peer, true);
-
-	return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
-}
-EXPORT_SYMBOL(peernet2id);
+EXPORT_SYMBOL(__peernet2id);
 
 struct net *get_net_ns_by_id(struct net *net, int id)
 {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1385de0..a4348ac 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1175,8 +1175,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
 
 		if (!net_eq(dev_net(dev), link_net)) {
-			int id = peernet2id(dev_net(dev), link_net);
+			int id = __peernet2id(dev_net(dev), link_net, false);
 
+			if (id < 0)
+				id = NETNSA_NSID_NOT_ASSIGNED;
 			if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
 				goto nla_put_failure;
 		}
@@ -2142,7 +2144,12 @@ replay:
 		dev->ifindex = ifm->ifi_index;
 
 		if (ops->newlink) {
-			err = ops->newlink(link_net ? : net, dev, tb, data);
+			struct net *src_net = link_net ?: net;
+
+			if (ops->get_link_net && !net_eq(src_net, dev_net(dev)))
+				(void)peernet2id(dev_net(dev), src_net);
+
+			err = ops->newlink(src_net, dev, tb, data);
 			/* Drivers should call free_netdev() in ->destructor
 			 * and unregister it on failure after registration
 			 * so that device could be finally freed in rtnl_unlock.
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [Patch net] netns: avoid allocating idr when dumping info
  2015-02-27  6:32 [Patch net] netns: avoid allocating idr when dumping info Cong Wang
@ 2015-02-27  8:01 ` Eric Dumazet
  2015-02-27 16:25   ` Nicolas Dichtel
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Dumazet @ 2015-02-27  8:01 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, Nicolas Dichtel

On Thu, 2015-02-26 at 22:32 -0800, Cong Wang wrote:
> We can allocate the peer netns id when creating the link
> instead of when dumping the link.
> 
> This fixes the following kernel warning:
> 
>  ===============================
>  [ INFO: suspicious RCU usage. ]
>  3.19.0+ #805 Tainted: G        W
>  -------------------------------
>  include/linux/rcupdate.h:538 Illegal context switch in RCU read-side critical section!
> 
>  other info that might help us debug this:

This looks very complicated, why even bother ?

I gave an obvious patch fixing root cause, I have no idea why you prefer
over engineering this.

If you believe allocating peer netns id is required at link creation
time, you should explain why.

BTW, (void)peernet2id(dev_net(dev), src_net) can fail, and your fix is
not complete anyway.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch net] netns: avoid allocating idr when dumping info
  2015-02-27  8:01 ` Eric Dumazet
@ 2015-02-27 16:25   ` Nicolas Dichtel
  2015-02-27 16:55     ` Eric Dumazet
                       ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Nicolas Dichtel @ 2015-02-27 16:25 UTC (permalink / raw)
  To: Eric Dumazet, Cong Wang; +Cc: netdev

Le 27/02/2015 09:01, Eric Dumazet a écrit :
> On Thu, 2015-02-26 at 22:32 -0800, Cong Wang wrote:
>> We can allocate the peer netns id when creating the link
>> instead of when dumping the link.
>>
>> This fixes the following kernel warning:
>>
>>   ===============================
>>   [ INFO: suspicious RCU usage. ]
>>   3.19.0+ #805 Tainted: G        W
>>   -------------------------------
>>   include/linux/rcupdate.h:538 Illegal context switch in RCU read-side critical section!
>>
>>   other info that might help us debug this:
>
> This looks very complicated, why even bother ?
>
> I gave an obvious patch fixing root cause, I have no idea why you prefer
> over engineering this.
>
> If you believe allocating peer netns id is required at link creation
> time, you should explain why.
>
> BTW, (void)peernet2id(dev_net(dev), src_net) can fail, and your fix is
> not complete anyway.
That's true. The patch does not cover the following case:
ip link add foo type bar
ip link set foo netns netns1

In this case, the second command will call dev_change_net_namespace() and the
id will not be allocated.

I think Eric's patch is simpler and less error-prone.

FWIW, I will be off for one week and probably without any internet access ;-)

Regards,
Nicolas

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch net] netns: avoid allocating idr when dumping info
  2015-02-27 16:25   ` Nicolas Dichtel
@ 2015-02-27 16:55     ` Eric Dumazet
  2015-02-27 17:07     ` [PATCH] net: do not use rcu in rtnl_dump_ifinfo() Eric Dumazet
  2015-02-27 17:28     ` [Patch net] netns: avoid allocating idr when dumping info Cong Wang
  2 siblings, 0 replies; 18+ messages in thread
From: Eric Dumazet @ 2015-02-27 16:55 UTC (permalink / raw)
  To: nicolas.dichtel; +Cc: Cong Wang, netdev

On Fri, 2015-02-27 at 17:25 +0100, Nicolas Dichtel wrote:

> That's true. The patch does not cover the following case:
> ip link add foo type bar
> ip link set foo netns netns1
> 
> In this case, the second command will call dev_change_net_namespace() and the
> id will not be allocated.
> 
> I think Eric's patch is simpler and less error-prone.
> 
> FWIW, I will be off for one week and probably without any internet access ;-)

I am cooking an official patch right now, thanks !

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] net: do not use rcu in rtnl_dump_ifinfo()
  2015-02-27 16:25   ` Nicolas Dichtel
  2015-02-27 16:55     ` Eric Dumazet
@ 2015-02-27 17:07     ` Eric Dumazet
  2015-02-27 17:19       ` Daniel Borkmann
  2015-02-27 17:42       ` [PATCH v2 net] " Eric Dumazet
  2015-02-27 17:28     ` [Patch net] netns: avoid allocating idr when dumping info Cong Wang
  2 siblings, 2 replies; 18+ messages in thread
From: Eric Dumazet @ 2015-02-27 17:07 UTC (permalink / raw)
  To: nicolas.dichtel, David Miller; +Cc: Cong Wang, netdev

From: Eric Dumazet <edumazet@google.com>

We did a failed attempt in the past to only use rcu in rtnl dump
operations (commit e67f88dd12f6 "net: dont hold rtnl mutex during
netlink dump callbacks")

Now that dumps are holding RTNL anyway, there is no need to also
use rcu locking, as it forbids any scheduling ability, like
GFP_KERNEL allocations that controlling path should use instead
of GFP_ATOMIC whenever possible.

This should fix following splat Cong Wang reported :

===============================
 [ INFO: suspicious RCU usage. ]
 3.19.0+ #805 Tainted: G        W      
 -------------------------------
 include/linux/rcupdate.h:538 Illegal context switch in RCU read-side critical section!
 
 other info that might help us debug this:
 
 
 rcu_scheduler_active = 1, debug_locks = 0
 2 locks held by ip/771:
  #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff8182b8f4>] netlink_dump+0x21/0x26c
  #1:  (rcu_read_lock){......}, at: [<ffffffff817d785b>] rcu_read_lock+0x0/0x6e
 
 stack backtrace:
 CPU: 3 PID: 771 Comm: ip Tainted: G        W       3.19.0+ #805
 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
  0000000000000001 ffff8800d51e7718 ffffffff81a27457 0000000029e729e6
  ffff8800d6108000 ffff8800d51e7748 ffffffff810b539b ffffffff820013dd
  00000000000001c8 0000000000000000 ffff8800d7448088 ffff8800d51e7758
 Call Trace:
  [<ffffffff81a27457>] dump_stack+0x4c/0x65
  [<ffffffff810b539b>] lockdep_rcu_suspicious+0x107/0x110
  [<ffffffff8109796f>] rcu_preempt_sleep_check+0x45/0x47
  [<ffffffff8109e457>] ___might_sleep+0x1d/0x1cb
  [<ffffffff8109e67d>] __might_sleep+0x78/0x80
  [<ffffffff814b9b1f>] idr_alloc+0x45/0xd1
  [<ffffffff810cb7ab>] ? rcu_read_lock_held+0x3b/0x3d
  [<ffffffff814b9f9d>] ? idr_for_each+0x53/0x101
  [<ffffffff817c1383>] alloc_netid+0x61/0x69
  [<ffffffff817c14c3>] __peernet2id+0x79/0x8d
  [<ffffffff817c1ab7>] peernet2id+0x13/0x1f
  [<ffffffff817d8673>] rtnl_fill_ifinfo+0xa8d/0xc20
  [<ffffffff810b17d9>] ? __lock_is_held+0x39/0x52
  [<ffffffff817d894f>] rtnl_dump_ifinfo+0x149/0x213
  [<ffffffff8182b9c2>] netlink_dump+0xef/0x26c
  [<ffffffff8182bcba>] netlink_recvmsg+0x17b/0x2c5
  [<ffffffff817b0adc>] __sock_recvmsg+0x4e/0x59
  [<ffffffff817b1b40>] sock_recvmsg+0x3f/0x51
  [<ffffffff817b1f9a>] ___sys_recvmsg+0xf6/0x1d9
  [<ffffffff8115dc67>] ? handle_pte_fault+0x6e1/0xd3d
  [<ffffffff8100a3a0>] ? native_sched_clock+0x35/0x37
  [<ffffffff8109f45b>] ? sched_clock_local+0x12/0x72
  [<ffffffff8109f6ac>] ? sched_clock_cpu+0x9e/0xb7
  [<ffffffff810cb7ab>] ? rcu_read_lock_held+0x3b/0x3d
  [<ffffffff811abde8>] ? __fcheck_files+0x4c/0x58
  [<ffffffff811ac556>] ? __fget_light+0x2d/0x52
  [<ffffffff817b376f>] __sys_recvmsg+0x42/0x60
  [<ffffffff817b379f>] SyS_recvmsg+0x12/0x1c

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: commit 0c7aecd4bde4b7302 ("netns: add rtnl cmd to add and get peer netns ids")
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reported-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 net/core/rtnetlink.c |    4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1385de0fa080..74ce25d2cded 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1300,7 +1300,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	s_h = cb->args[0];
 	s_idx = cb->args[1];
 
-	rcu_read_lock();
 	cb->seq = net->dev_base_seq;
 
 	/* A hack to preserve kernel<->userspace interface.
@@ -1322,7 +1321,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
 		head = &net->dev_index_head[h];
-		hlist_for_each_entry_rcu(dev, head, index_hlist) {
+		hlist_for_each_entry(dev, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
 			err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
@@ -1344,7 +1343,6 @@ cont:
 		}
 	}
 out:
-	rcu_read_unlock();
 	cb->args[1] = idx;
 	cb->args[0] = h;
 

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] net: do not use rcu in rtnl_dump_ifinfo()
  2015-02-27 17:07     ` [PATCH] net: do not use rcu in rtnl_dump_ifinfo() Eric Dumazet
@ 2015-02-27 17:19       ` Daniel Borkmann
  2015-02-27 17:40         ` Eric Dumazet
  2015-02-27 18:35         ` Cong Wang
  2015-02-27 17:42       ` [PATCH v2 net] " Eric Dumazet
  1 sibling, 2 replies; 18+ messages in thread
From: Daniel Borkmann @ 2015-02-27 17:19 UTC (permalink / raw)
  To: Eric Dumazet, nicolas.dichtel, David Miller; +Cc: Cong Wang, netdev

On 02/27/2015 06:07 PM, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> We did a failed attempt in the past to only use rcu in rtnl dump
> operations (commit e67f88dd12f6 "net: dont hold rtnl mutex during
> netlink dump callbacks")
>
> Now that dumps are holding RTNL anyway, there is no need to also
> use rcu locking, as it forbids any scheduling ability, like
> GFP_KERNEL allocations that controlling path should use instead
> of GFP_ATOMIC whenever possible.
>
> This should fix following splat Cong Wang reported :
>
> ===============================

[ Btw, I think patchwork has an issue with above line when fetching
   the mbox : it thinks commit description ends here. :(

   Prefixing with a space char should help to not get it lost. ]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch net] netns: avoid allocating idr when dumping info
  2015-02-27 16:25   ` Nicolas Dichtel
  2015-02-27 16:55     ` Eric Dumazet
  2015-02-27 17:07     ` [PATCH] net: do not use rcu in rtnl_dump_ifinfo() Eric Dumazet
@ 2015-02-27 17:28     ` Cong Wang
  2015-02-27 17:48       ` Nicolas Dichtel
  2015-02-27 18:04       ` Eric Dumazet
  2 siblings, 2 replies; 18+ messages in thread
From: Cong Wang @ 2015-02-27 17:28 UTC (permalink / raw)
  To: Nicolas Dichtel; +Cc: Eric Dumazet, Linux Kernel Network Developers

On Fri, Feb 27, 2015 at 8:25 AM, Nicolas Dichtel
<nicolas.dichtel@6wind.com> wrote:
> Le 27/02/2015 09:01, Eric Dumazet a écrit :
>
>> On Thu, 2015-02-26 at 22:32 -0800, Cong Wang wrote:
>>>
>>> We can allocate the peer netns id when creating the link
>>> instead of when dumping the link.
>>>
>>> This fixes the following kernel warning:
>>>
>>>   ===============================
>>>   [ INFO: suspicious RCU usage. ]
>>>   3.19.0+ #805 Tainted: G        W
>>>   -------------------------------
>>>   include/linux/rcupdate.h:538 Illegal context switch in RCU read-side
>>> critical section!
>>>
>>>   other info that might help us debug this:
>>
>>
>> This looks very complicated, why even bother ?
>>
>> I gave an obvious patch fixing root cause, I have no idea why you prefer
>> over engineering this.
>>
>> If you believe allocating peer netns id is required at link creation
>> time, you should explain why.


I don't understand why you think this is over engineering,
clearly it is normal to allocate resources in newlink rather than
dumping. Look at other resources we allocate in newlink.


>>
>> BTW, (void)peernet2id(dev_net(dev), src_net) can fail, and your fix is
>> not complete anyway.
>
> That's true. The patch does not cover the following case:
> ip link add foo type bar
> ip link set foo netns netns1
>
> In this case, the second command will call dev_change_net_namespace() and
> the
> id will not be allocated.

Then call it in do_setlink(). This is not a reason we _have to_
allocate it in dumping.

>
> I think Eric's patch is simpler and less error-prone.
>

Sure, he introduced this rcu read lock from the very beginning.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] net: do not use rcu in rtnl_dump_ifinfo()
  2015-02-27 17:19       ` Daniel Borkmann
@ 2015-02-27 17:40         ` Eric Dumazet
  2015-02-27 18:35         ` Cong Wang
  1 sibling, 0 replies; 18+ messages in thread
From: Eric Dumazet @ 2015-02-27 17:40 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: nicolas.dichtel, David Miller, Cong Wang, netdev

On Fri, 2015-02-27 at 18:19 +0100, Daniel Borkmann wrote:
> On 02/27/2015 06:07 PM, Eric Dumazet wrote:
> > From: Eric Dumazet <edumazet@google.com>
> >
> > We did a failed attempt in the past to only use rcu in rtnl dump
> > operations (commit e67f88dd12f6 "net: dont hold rtnl mutex during
> > netlink dump callbacks")
> >
> > Now that dumps are holding RTNL anyway, there is no need to also
> > use rcu locking, as it forbids any scheduling ability, like
> > GFP_KERNEL allocations that controlling path should use instead
> > of GFP_ATOMIC whenever possible.
> >
> > This should fix following splat Cong Wang reported :
> >
> > ===============================
> 
> [ Btw, I think patchwork has an issue with above line when fetching
>    the mbox : it thinks commit description ends here. :(
> 
>    Prefixing with a space char should help to not get it lost. ]


Thanks Daniel for this hint, will send a v2.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 net] net: do not use rcu in rtnl_dump_ifinfo()
  2015-02-27 17:07     ` [PATCH] net: do not use rcu in rtnl_dump_ifinfo() Eric Dumazet
  2015-02-27 17:19       ` Daniel Borkmann
@ 2015-02-27 17:42       ` Eric Dumazet
  2015-02-28  0:43         ` Jεan Sacren
  2015-03-01  5:29         ` David Miller
  1 sibling, 2 replies; 18+ messages in thread
From: Eric Dumazet @ 2015-02-27 17:42 UTC (permalink / raw)
  To: nicolas.dichtel, Daniel Borkmann; +Cc: David Miller, Cong Wang, netdev

From: Eric Dumazet <edumazet@google.com>

We did a failed attempt in the past to only use rcu in rtnl dump
operations (commit e67f88dd12f6 "net: dont hold rtnl mutex during
netlink dump callbacks")

Now that dumps are holding RTNL anyway, there is no need to also
use rcu locking, as it forbids any scheduling ability, like
GFP_KERNEL allocations that controlling path should use instead
of GFP_ATOMIC whenever possible.

This should fix following splat Cong Wang reported :

 [ INFO: suspicious RCU usage. ]
 3.19.0+ #805 Tainted: G        W      

 include/linux/rcupdate.h:538 Illegal context switch in RCU read-side critical section!
 
 other info that might help us debug this:
 
 
 rcu_scheduler_active = 1, debug_locks = 0
 2 locks held by ip/771:
  #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff8182b8f4>] netlink_dump+0x21/0x26c
  #1:  (rcu_read_lock){......}, at: [<ffffffff817d785b>] rcu_read_lock+0x0/0x6e
 
 stack backtrace:
 CPU: 3 PID: 771 Comm: ip Tainted: G        W       3.19.0+ #805
 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
  0000000000000001 ffff8800d51e7718 ffffffff81a27457 0000000029e729e6
  ffff8800d6108000 ffff8800d51e7748 ffffffff810b539b ffffffff820013dd
  00000000000001c8 0000000000000000 ffff8800d7448088 ffff8800d51e7758
 Call Trace:
  [<ffffffff81a27457>] dump_stack+0x4c/0x65
  [<ffffffff810b539b>] lockdep_rcu_suspicious+0x107/0x110
  [<ffffffff8109796f>] rcu_preempt_sleep_check+0x45/0x47
  [<ffffffff8109e457>] ___might_sleep+0x1d/0x1cb
  [<ffffffff8109e67d>] __might_sleep+0x78/0x80
  [<ffffffff814b9b1f>] idr_alloc+0x45/0xd1
  [<ffffffff810cb7ab>] ? rcu_read_lock_held+0x3b/0x3d
  [<ffffffff814b9f9d>] ? idr_for_each+0x53/0x101
  [<ffffffff817c1383>] alloc_netid+0x61/0x69
  [<ffffffff817c14c3>] __peernet2id+0x79/0x8d
  [<ffffffff817c1ab7>] peernet2id+0x13/0x1f
  [<ffffffff817d8673>] rtnl_fill_ifinfo+0xa8d/0xc20
  [<ffffffff810b17d9>] ? __lock_is_held+0x39/0x52
  [<ffffffff817d894f>] rtnl_dump_ifinfo+0x149/0x213
  [<ffffffff8182b9c2>] netlink_dump+0xef/0x26c
  [<ffffffff8182bcba>] netlink_recvmsg+0x17b/0x2c5
  [<ffffffff817b0adc>] __sock_recvmsg+0x4e/0x59
  [<ffffffff817b1b40>] sock_recvmsg+0x3f/0x51
  [<ffffffff817b1f9a>] ___sys_recvmsg+0xf6/0x1d9
  [<ffffffff8115dc67>] ? handle_pte_fault+0x6e1/0xd3d
  [<ffffffff8100a3a0>] ? native_sched_clock+0x35/0x37
  [<ffffffff8109f45b>] ? sched_clock_local+0x12/0x72
  [<ffffffff8109f6ac>] ? sched_clock_cpu+0x9e/0xb7
  [<ffffffff810cb7ab>] ? rcu_read_lock_held+0x3b/0x3d
  [<ffffffff811abde8>] ? __fcheck_files+0x4c/0x58
  [<ffffffff811ac556>] ? __fget_light+0x2d/0x52
  [<ffffffff817b376f>] __sys_recvmsg+0x42/0x60
  [<ffffffff817b379f>] SyS_recvmsg+0x12/0x1c

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: commit 0c7aecd4bde4b7302 ("netns: add rtnl cmd to add and get peer netns ids")
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reported-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 v2: As suggested by Daniel, tweak the changelog to be patchwork compliant ;)

 net/core/rtnetlink.c |    4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1385de0fa080..74ce25d2cded 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1300,7 +1300,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	s_h = cb->args[0];
 	s_idx = cb->args[1];
 
-	rcu_read_lock();
 	cb->seq = net->dev_base_seq;
 
 	/* A hack to preserve kernel<->userspace interface.
@@ -1322,7 +1321,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
 		head = &net->dev_index_head[h];
-		hlist_for_each_entry_rcu(dev, head, index_hlist) {
+		hlist_for_each_entry(dev, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
 			err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
@@ -1344,7 +1343,6 @@ cont:
 		}
 	}
 out:
-	rcu_read_unlock();
 	cb->args[1] = idx;
 	cb->args[0] = h;
 

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [Patch net] netns: avoid allocating idr when dumping info
  2015-02-27 17:28     ` [Patch net] netns: avoid allocating idr when dumping info Cong Wang
@ 2015-02-27 17:48       ` Nicolas Dichtel
  2015-02-28  0:56         ` Cong Wang
  2015-02-27 18:04       ` Eric Dumazet
  1 sibling, 1 reply; 18+ messages in thread
From: Nicolas Dichtel @ 2015-02-27 17:48 UTC (permalink / raw)
  To: Cong Wang; +Cc: Eric Dumazet, Linux Kernel Network Developers

Le 27/02/2015 18:28, Cong Wang a écrit :
> On Fri, Feb 27, 2015 at 8:25 AM, Nicolas Dichtel
> <nicolas.dichtel@6wind.com> wrote:
>> Le 27/02/2015 09:01, Eric Dumazet a écrit :
>>
>>> On Thu, 2015-02-26 at 22:32 -0800, Cong Wang wrote:
[snip]
>>>
>>> BTW, (void)peernet2id(dev_net(dev), src_net) can fail, and your fix is
>>> not complete anyway.
>>
>> That's true. The patch does not cover the following case:
>> ip link add foo type bar
>> ip link set foo netns netns1
>>
>> In this case, the second command will call dev_change_net_namespace() and
>> the
>> id will not be allocated.
>
> Then call it in do_setlink(). This is not a reason we _have to_
> allocate it in dumping.
Sure, but that just shows the 'over engineering' side ;-)
The reason to allocate it in dumping is that you need it in dumping, not before ;-)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch net] netns: avoid allocating idr when dumping info
  2015-02-27 17:28     ` [Patch net] netns: avoid allocating idr when dumping info Cong Wang
  2015-02-27 17:48       ` Nicolas Dichtel
@ 2015-02-27 18:04       ` Eric Dumazet
  2015-02-28  0:48         ` Cong Wang
  1 sibling, 1 reply; 18+ messages in thread
From: Eric Dumazet @ 2015-02-27 18:04 UTC (permalink / raw)
  To: Cong Wang; +Cc: Nicolas Dichtel, Linux Kernel Network Developers

On Fri, 2015-02-27 at 09:28 -0800, Cong Wang wrote:

> I don't understand why you think this is over engineering,
> clearly it is normal to allocate resources in newlink rather than
> dumping. Look at other resources we allocate in newlink.

This has nothing to do with the splat you gave.

If you believe there is a _different_ bug, then submit another patch.

But claiming your patch solves the rcu thing is misleading,
you gave already 2 wrong patches, trying to hide the real fix.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] net: do not use rcu in rtnl_dump_ifinfo()
  2015-02-27 17:19       ` Daniel Borkmann
  2015-02-27 17:40         ` Eric Dumazet
@ 2015-02-27 18:35         ` Cong Wang
  1 sibling, 0 replies; 18+ messages in thread
From: Cong Wang @ 2015-02-27 18:35 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Eric Dumazet, Nicolas Dichtel, David Miller, Cong Wang, netdev

On Fri, Feb 27, 2015 at 9:19 AM, Daniel Borkmann <daniel@iogearbox.net> wrote:
> On 02/27/2015 06:07 PM, Eric Dumazet wrote:
>>
>> From: Eric Dumazet <edumazet@google.com>
>>
>> We did a failed attempt in the past to only use rcu in rtnl dump
>> operations (commit e67f88dd12f6 "net: dont hold rtnl mutex during
>> netlink dump callbacks")
>>
>> Now that dumps are holding RTNL anyway, there is no need to also
>> use rcu locking, as it forbids any scheduling ability, like
>> GFP_KERNEL allocations that controlling path should use instead
>> of GFP_ATOMIC whenever possible.
>>
>> This should fix following splat Cong Wang reported :
>>
>> ===============================
>
>
> [ Btw, I think patchwork has an issue with above line when fetching
>   the mbox : it thinks commit description ends here. :(
>
>   Prefixing with a space char should help to not get it lost. ]

To save everyone's time, either fix patchwork or teach checkpatch.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 net] net: do not use rcu in rtnl_dump_ifinfo()
  2015-02-27 17:42       ` [PATCH v2 net] " Eric Dumazet
@ 2015-02-28  0:43         ` Jεan Sacren
  2015-02-28  1:05           ` Eric Dumazet
  2015-03-01  5:29         ` David Miller
  1 sibling, 1 reply; 18+ messages in thread
From: Jεan Sacren @ 2015-02-28  0:43 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: nicolas.dichtel, Daniel Borkmann, David Miller, Cong Wang, netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 27 Feb 2015 09:42:50 -0800
>
> From: Eric Dumazet <edumazet@google.com>
> 
> We did a failed attempt in the past to only use rcu in rtnl dump
> operations (commit e67f88dd12f6 "net: dont hold rtnl mutex during
> netlink dump callbacks")
> 
> Now that dumps are holding RTNL anyway, there is no need to also
> use rcu locking, as it forbids any scheduling ability, like
> GFP_KERNEL allocations that controlling path should use instead
> of GFP_ATOMIC whenever possible.
> 
> This should fix following splat Cong Wang reported :
> 
>  [ INFO: suspicious RCU usage. ]
>  3.19.0+ #805 Tainted: G        W      
> 
>  include/linux/rcupdate.h:538 Illegal context switch in RCU read-side critical section!
>  
>  other info that might help us debug this:
>  
>  
>  rcu_scheduler_active = 1, debug_locks = 0
>  2 locks held by ip/771:
>   #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff8182b8f4>] netlink_dump+0x21/0x26c
>   #1:  (rcu_read_lock){......}, at: [<ffffffff817d785b>] rcu_read_lock+0x0/0x6e
>  
>  stack backtrace:
>  CPU: 3 PID: 771 Comm: ip Tainted: G        W       3.19.0+ #805
>  Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
>   0000000000000001 ffff8800d51e7718 ffffffff81a27457 0000000029e729e6
>   ffff8800d6108000 ffff8800d51e7748 ffffffff810b539b ffffffff820013dd
>   00000000000001c8 0000000000000000 ffff8800d7448088 ffff8800d51e7758
>  Call Trace:
>   [<ffffffff81a27457>] dump_stack+0x4c/0x65
>   [<ffffffff810b539b>] lockdep_rcu_suspicious+0x107/0x110
>   [<ffffffff8109796f>] rcu_preempt_sleep_check+0x45/0x47
>   [<ffffffff8109e457>] ___might_sleep+0x1d/0x1cb
>   [<ffffffff8109e67d>] __might_sleep+0x78/0x80
>   [<ffffffff814b9b1f>] idr_alloc+0x45/0xd1
>   [<ffffffff810cb7ab>] ? rcu_read_lock_held+0x3b/0x3d
>   [<ffffffff814b9f9d>] ? idr_for_each+0x53/0x101
>   [<ffffffff817c1383>] alloc_netid+0x61/0x69
>   [<ffffffff817c14c3>] __peernet2id+0x79/0x8d
>   [<ffffffff817c1ab7>] peernet2id+0x13/0x1f
>   [<ffffffff817d8673>] rtnl_fill_ifinfo+0xa8d/0xc20
>   [<ffffffff810b17d9>] ? __lock_is_held+0x39/0x52
>   [<ffffffff817d894f>] rtnl_dump_ifinfo+0x149/0x213
>   [<ffffffff8182b9c2>] netlink_dump+0xef/0x26c
>   [<ffffffff8182bcba>] netlink_recvmsg+0x17b/0x2c5
>   [<ffffffff817b0adc>] __sock_recvmsg+0x4e/0x59
>   [<ffffffff817b1b40>] sock_recvmsg+0x3f/0x51
>   [<ffffffff817b1f9a>] ___sys_recvmsg+0xf6/0x1d9
>   [<ffffffff8115dc67>] ? handle_pte_fault+0x6e1/0xd3d
>   [<ffffffff8100a3a0>] ? native_sched_clock+0x35/0x37
>   [<ffffffff8109f45b>] ? sched_clock_local+0x12/0x72
>   [<ffffffff8109f6ac>] ? sched_clock_cpu+0x9e/0xb7
>   [<ffffffff810cb7ab>] ? rcu_read_lock_held+0x3b/0x3d
>   [<ffffffff811abde8>] ? __fcheck_files+0x4c/0x58
>   [<ffffffff811ac556>] ? __fget_light+0x2d/0x52
>   [<ffffffff817b376f>] __sys_recvmsg+0x42/0x60
>   [<ffffffff817b379f>] SyS_recvmsg+0x12/0x1c
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Fixes: commit 0c7aecd4bde4b7302 ("netns: add rtnl cmd to add and get peer netns ids")

- The word 'commit' is not supposed to be there.

- ${SHA1} is supposed to be 12-digit-long.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch net] netns: avoid allocating idr when dumping info
  2015-02-27 18:04       ` Eric Dumazet
@ 2015-02-28  0:48         ` Cong Wang
  0 siblings, 0 replies; 18+ messages in thread
From: Cong Wang @ 2015-02-28  0:48 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Nicolas Dichtel, Linux Kernel Network Developers

On Fri, Feb 27, 2015 at 10:04 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Fri, 2015-02-27 at 09:28 -0800, Cong Wang wrote:
>
>> I don't understand why you think this is over engineering,
>> clearly it is normal to allocate resources in newlink rather than
>> dumping. Look at other resources we allocate in newlink.
>
> This has nothing to do with the splat you gave.


Why not? If we from the beginning can avoid allocate any
memory in dumping, such rcu warning would not even trigger.

>
> If you believe there is a _different_ bug, then submit another patch.

I am not 100% sure it is a bug yet, but it does signal some bad sign.

>
> But claiming your patch solves the rcu thing is misleading,
> you gave already 2 wrong patches, trying to hide the real fix.
>

If rcu read lock were the only problem, I wouldn't even want to
reply, too trivial to worthy a reply.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch net] netns: avoid allocating idr when dumping info
  2015-02-27 17:48       ` Nicolas Dichtel
@ 2015-02-28  0:56         ` Cong Wang
  2015-02-28  6:17           ` Cong Wang
  0 siblings, 1 reply; 18+ messages in thread
From: Cong Wang @ 2015-02-28  0:56 UTC (permalink / raw)
  To: Nicolas Dichtel; +Cc: Eric Dumazet, Linux Kernel Network Developers

On Fri, Feb 27, 2015 at 9:48 AM, Nicolas Dichtel
<nicolas.dichtel@6wind.com> wrote:
> Le 27/02/2015 18:28, Cong Wang a écrit :
>> Then call it in do_setlink(). This is not a reason we _have to_
>> allocate it in dumping.
>
> Sure, but that just shows the 'over engineering' side ;-)
> The reason to allocate it in dumping is that you need it in dumping, not
> before ;-)

Why? This relies on rtnetlink to dump the link after creation,
which is not obvious at all. IOW, imagine if rtnetlink didn't do
the dump after new link, what would happen?

You are hiding it too deep, without a valid reason. It sounds
like you want to defer the allocation as late as possible,
but again it makes no sense: 1) we don't allocate too much,
just one id in idr; 2) this loses the code readability and hides
potential bugs.

You are over engineering, not me who wants to make it sane.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 net] net: do not use rcu in rtnl_dump_ifinfo()
  2015-02-28  0:43         ` Jεan Sacren
@ 2015-02-28  1:05           ` Eric Dumazet
  0 siblings, 0 replies; 18+ messages in thread
From: Eric Dumazet @ 2015-02-28  1:05 UTC (permalink / raw)
  To: Jεan Sacren
  Cc: nicolas.dichtel, Daniel Borkmann, David Miller, Cong Wang, netdev

On Fri, 2015-02-27 at 17:43 -0700, Jεan Sacren wrote:

> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > Fixes: commit 0c7aecd4bde4b7302 ("netns: add rtnl cmd to add and get peer netns ids")
> 
> - The word 'commit' is not supposed to be there.
> 
> - ${SHA1} is supposed to be 12-digit-long.

Right, please David fix this if you care.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch net] netns: avoid allocating idr when dumping info
  2015-02-28  0:56         ` Cong Wang
@ 2015-02-28  6:17           ` Cong Wang
  0 siblings, 0 replies; 18+ messages in thread
From: Cong Wang @ 2015-02-28  6:17 UTC (permalink / raw)
  To: Nicolas Dichtel; +Cc: Linux Kernel Network Developers

(Removing Eric from discussion since he expressed he doesn't
care this code at all in a private email.)


On Fri, Feb 27, 2015 at 4:56 PM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
>
> Why? This relies on rtnetlink to dump the link after creation,
> which is not obvious at all. IOW, imagine if rtnetlink didn't do
> the dump after new link, what would happen?
>
> You are hiding it too deep, without a valid reason. It sounds
> like you want to defer the allocation as late as possible,
> but again it makes no sense: 1) we don't allocate too much,
> just one id in idr; 2) this loses the code readability and hides
> potential bugs.
>

Here we go:

This breaks veth, where veth peers are completely equal,
therefore they are links for each other. So we expect
link-netnsid shown on both side.

Your current impl. relies on rtnl_configure_link() to dump
the link to allocate the netns id after creating a link.
Unfortunately for veth, rtnl_configure_link() is called
before veth->peer is set, therefore the dump of the "peer"
does _not_ actually allocate the netns id, since its
->get_link_net() returns the same net.

I will send a patch together after I figure out another bug
I saw.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 net] net: do not use rcu in rtnl_dump_ifinfo()
  2015-02-27 17:42       ` [PATCH v2 net] " Eric Dumazet
  2015-02-28  0:43         ` Jεan Sacren
@ 2015-03-01  5:29         ` David Miller
  1 sibling, 0 replies; 18+ messages in thread
From: David Miller @ 2015-03-01  5:29 UTC (permalink / raw)
  To: eric.dumazet; +Cc: nicolas.dichtel, daniel, xiyou.wangcong, netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 27 Feb 2015 09:42:50 -0800

> From: Eric Dumazet <edumazet@google.com>
> 
> We did a failed attempt in the past to only use rcu in rtnl dump
> operations (commit e67f88dd12f6 "net: dont hold rtnl mutex during
> netlink dump callbacks")
> 
> Now that dumps are holding RTNL anyway, there is no need to also
> use rcu locking, as it forbids any scheduling ability, like
> GFP_KERNEL allocations that controlling path should use instead
> of GFP_ATOMIC whenever possible.
> 
> This should fix following splat Cong Wang reported :
 ...
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Fixes: commit 0c7aecd4bde4b7302 ("netns: add rtnl cmd to add and get peer netns ids")
> Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> Reported-by: Cong Wang <xiyou.wangcong@gmail.com>
> ---
>  v2: As suggested by Daniel, tweak the changelog to be patchwork compliant ;)

Applied, thanks Eric.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2015-03-01  5:29 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-27  6:32 [Patch net] netns: avoid allocating idr when dumping info Cong Wang
2015-02-27  8:01 ` Eric Dumazet
2015-02-27 16:25   ` Nicolas Dichtel
2015-02-27 16:55     ` Eric Dumazet
2015-02-27 17:07     ` [PATCH] net: do not use rcu in rtnl_dump_ifinfo() Eric Dumazet
2015-02-27 17:19       ` Daniel Borkmann
2015-02-27 17:40         ` Eric Dumazet
2015-02-27 18:35         ` Cong Wang
2015-02-27 17:42       ` [PATCH v2 net] " Eric Dumazet
2015-02-28  0:43         ` Jεan Sacren
2015-02-28  1:05           ` Eric Dumazet
2015-03-01  5:29         ` David Miller
2015-02-27 17:28     ` [Patch net] netns: avoid allocating idr when dumping info Cong Wang
2015-02-27 17:48       ` Nicolas Dichtel
2015-02-28  0:56         ` Cong Wang
2015-02-28  6:17           ` Cong Wang
2015-02-27 18:04       ` Eric Dumazet
2015-02-28  0:48         ` Cong Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.