All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] ipv6: fix RTNL assert fail in DAD
@ 2014-03-17 23:18 Stephen Hemminger
  2014-03-18  0:29 ` Hannes Frederic Sowa
  2014-03-23  2:03 ` Hannes Frederic Sowa
  0 siblings, 2 replies; 17+ messages in thread
From: Stephen Hemminger @ 2014-03-17 23:18 UTC (permalink / raw)
  To: David Miller, Hannes Frederic Sowa; +Cc: netdev

IPv6 duplicate address detection is triggering the following assertion
failure when using macvlan + vif + multicast.
 RTNL: assertion failed at net/core/dev.c (4496)

This happens because the DAD timer is adding a multicast address without
acquiring the RTNL mutex. In order to acquire the RTNL mutex, it must be
done in process context; therefore it must be in a workqueue.

Full backtrace:
[  541.030090] RTNL: assertion failed at net/core/dev.c (4496)
[  541.031143] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           O 3.10.33-1-amd64-vyatta #1
[  541.031145] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
[  541.031146]  ffffffff8148a9f0 000000000000002f ffffffff813c98c1 ffff88007c4451f8
[  541.031148]  0000000000000000 0000000000000000 ffffffff813d3540 ffff88007fc03d18
[  541.031150]  0000880000000006 ffff88007c445000 ffffffffa0194160 0000000000000000
[  541.031152] Call Trace:
[  541.031153]  <IRQ>  [<ffffffff8148a9f0>] ? dump_stack+0xd/0x17
[  541.031180]  [<ffffffff813c98c1>] ? __dev_set_promiscuity+0x101/0x180
[  541.031183]  [<ffffffff813d3540>] ? __hw_addr_create_ex+0x60/0xc0
[  541.031185]  [<ffffffff813cfe1a>] ? __dev_set_rx_mode+0xaa/0xc0
[  541.031189]  [<ffffffff813d3a81>] ? __dev_mc_add+0x61/0x90
[  541.031198]  [<ffffffffa01dcf9c>] ? igmp6_group_added+0xfc/0x1a0 [ipv6]
[  541.031208]  [<ffffffff8111237b>] ? kmem_cache_alloc+0xcb/0xd0
[  541.031212]  [<ffffffffa01ddcd7>] ? ipv6_dev_mc_inc+0x267/0x300 [ipv6]
[  541.031216]  [<ffffffffa01c2fae>] ? addrconf_join_solict+0x2e/0x40 [ipv6]
[  541.031219]  [<ffffffffa01ba2e9>] ? ipv6_dev_ac_inc+0x159/0x1f0 [ipv6]
[  541.031223]  [<ffffffffa01c0772>] ? addrconf_join_anycast+0x92/0xa0 [ipv6]
[  541.031226]  [<ffffffffa01c311e>] ? __ipv6_ifa_notify+0x11e/0x1e0 [ipv6]
[  541.031229]  [<ffffffffa01c3213>] ? ipv6_ifa_notify+0x33/0x50 [ipv6]
[  541.031233]  [<ffffffffa01c36c8>] ? addrconf_dad_completed+0x28/0x100 [ipv6]
[  541.031241]  [<ffffffff81075c1d>] ? task_cputime+0x2d/0x50
[  541.031244]  [<ffffffffa01c38d6>] ? addrconf_dad_timer+0x136/0x150 [ipv6]
[  541.031247]  [<ffffffffa01c37a0>] ? addrconf_dad_completed+0x100/0x100 [ipv6]
[  541.031255]  [<ffffffff8105313a>] ? call_timer_fn.isra.22+0x2a/0x90
[  541.031258]  [<ffffffffa01c37a0>] ? addrconf_dad_completed+0x100/0x100 [ipv6]
[  541.031261]  [<ffffffff81053531>] ? run_timer_softirq+0x1a1/0x260
[  541.031267]  [<ffffffff810350cf>] ? kvm_clock_read+0x1f/0x30
[  541.031272]  [<ffffffff810132a5>] ? sched_clock+0x5/0x10
[  541.031274]  [<ffffffff81074bd5>] ? sched_clock_local+0x15/0x80
[  541.031276]  [<ffffffff8104d586>] ? __do_softirq+0xd6/0x1b0
[  541.031282]  [<ffffffff8149109c>] ? call_softirq+0x1c/0x30
[  541.031284]  [<ffffffff8100d835>] ? do_softirq+0x75/0xb0
[  541.031286]  [<ffffffff8104d7ed>] ? irq_exit+0xbd/0xc0
[  541.031290]  [<ffffffff8102e718>] ? smp_apic_timer_interrupt+0x68/0xa0
[  541.031292]  [<ffffffff814908dd>] ? apic_timer_interrupt+0x6d/0x80
[  541.031293]  <EOI>  [<ffffffff81013ee0>] ? hard_enable_TSC+0x20/0x20
[  541.031296]  [<ffffffff81035412>] ? native_safe_halt+0x2/0x10
[  541.031298]  [<ffffffff81014619>] ? arch_cpu_idle+0x9/0x30
[  541.031299]  [<ffffffff81013ee5>] ? default_idle+0x5/0x10
[  541.031302]  [<ffffffff8108263a>] ? cpu_startup_entry+0x8a/0x180
[  541.031306]  [<ffffffff818d3e3e>] ? start_kernel+0x3ac/0x3b7
[  541.031309]  [<ffffffff818d38a9>] ? repair_env_string+0x5b/0x5b
[  541.031311]  [<ffffffff818d36bc>] ? x86_64_start_kernel+0xf6/0x105


Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

---
Patch is against -net (not net-next) because this is a bug fix.
Should be applied to -stable as well.

--- a/include/net/if_inet6.h	2014-03-17 10:53:06.386453341 -0700
+++ b/include/net/if_inet6.h	2014-03-17 11:01:42.383220298 -0700
@@ -59,6 +59,7 @@ struct inet6_ifaddr {
 	unsigned long		tstamp; /* updated timestamp */
 
 	struct timer_list	dad_timer;
+	struct work_struct	dad_work;
 
 	struct inet6_dev	*idev;
 	struct rt6_info		*rt;
--- a/net/ipv6/addrconf.c	2014-03-17 10:53:06.386453341 -0700
+++ b/net/ipv6/addrconf.c	2014-03-17 15:12:52.785628652 -0700
@@ -150,8 +150,10 @@ static struct rt6_info *addrconf_get_pre
 						  const struct net_device *dev,
 						  u32 flags, u32 noflags);
 
+static struct workqueue_struct *addrconf_wq;
 static void addrconf_dad_start(struct inet6_ifaddr *ifp);
 static void addrconf_dad_timer(unsigned long data);
+static void addrconf_dad_work(struct work_struct *work);
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
 static void addrconf_dad_run(struct inet6_dev *idev);
 static void addrconf_rs_timer(unsigned long data);
@@ -851,6 +853,7 @@ ipv6_add_addr(struct inet6_dev *idev, co
 	spin_lock_init(&ifa->state_lock);
 	setup_timer(&ifa->dad_timer, addrconf_dad_timer,
 		    (unsigned long)ifa);
+	INIT_WORK(&ifa->dad_work, addrconf_dad_work);
 	INIT_HLIST_NODE(&ifa->addr_lst);
 	ifa->scope = scope;
 	ifa->prefix_len = pfxlen;
@@ -3203,6 +3206,18 @@ out:
 	read_unlock_bh(&idev->lock);
 }
 
+/* DAD completion is handled in work queue */
+static void addrconf_dad_work(struct work_struct *work)
+{
+	struct inet6_ifaddr *ifp
+		= container_of(work, struct inet6_ifaddr, dad_work);
+
+	rtnl_lock();
+	addrconf_dad_completed(ifp);
+	rtnl_unlock();
+	in6_ifa_put(ifp);
+}
+
 static void addrconf_dad_timer(unsigned long data)
 {
 	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
@@ -3234,9 +3249,8 @@ static void addrconf_dad_timer(unsigned
 		spin_unlock(&ifp->lock);
 		write_unlock(&idev->lock);
 
-		addrconf_dad_completed(ifp);
-
-		goto out;
+		queue_work(addrconf_wq, &ifp->dad_work);
+		return;
 	}
 
 	ifp->dad_probes--;
@@ -5244,6 +5258,12 @@ int __init addrconf_init(void)
 	if (err < 0)
 		goto out_addrlabel;
 
+	addrconf_wq = create_workqueue("addrconf");
+	if (!addrconf_wq) {
+		err = -ENOMEM;
+		goto out_nowq;
+	}
+
 	/* The addrconf netdev notifier requires that loopback_dev
 	 * has it's ipv6 private information allocated and setup
 	 * before it can bring up and give link-local addresses
@@ -5302,6 +5322,8 @@ errout:
 	rtnl_af_unregister(&inet6_ops);
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 errlo:
+	destroy_workqueue(addrconf_wq);
+out_nowq:
 	unregister_pernet_subsys(&addrconf_ops);
 out_addrlabel:
 	ipv6_addr_label_cleanup();
@@ -5340,4 +5362,5 @@ void addrconf_cleanup(void)
 
 	del_timer(&addr_chk_timer);
 	rtnl_unlock();
+	destroy_workqueue(addrconf_wq);
 }

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net] ipv6: fix RTNL assert fail in DAD
  2014-03-17 23:18 [PATCH net] ipv6: fix RTNL assert fail in DAD Stephen Hemminger
@ 2014-03-18  0:29 ` Hannes Frederic Sowa
  2014-03-19  0:54   ` Stephen Hemminger
  2014-03-23  2:03 ` Hannes Frederic Sowa
  1 sibling, 1 reply; 17+ messages in thread
From: Hannes Frederic Sowa @ 2014-03-18  0:29 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev

Hi!

On Mon, Mar 17, 2014 at 04:18:53PM -0700, Stephen Hemminger wrote:
> IPv6 duplicate address detection is triggering the following assertion
> failure when using macvlan + vif + multicast.
>  RTNL: assertion failed at net/core/dev.c (4496)
> 
> This happens because the DAD timer is adding a multicast address without
> acquiring the RTNL mutex. In order to acquire the RTNL mutex, it must be
> done in process context; therefore it must be in a workqueue.
> 
> Full backtrace:
> [  541.030090] RTNL: assertion failed at net/core/dev.c (4496)
> [  541.031143] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           O 3.10.33-1-amd64-vyatta #1
> [  541.031145] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
> [  541.031146]  ffffffff8148a9f0 000000000000002f ffffffff813c98c1 ffff88007c4451f8
> [  541.031148]  0000000000000000 0000000000000000 ffffffff813d3540 ffff88007fc03d18
> [  541.031150]  0000880000000006 ffff88007c445000 ffffffffa0194160 0000000000000000
> [  541.031152] Call Trace:
> [  541.031153]  <IRQ>  [<ffffffff8148a9f0>] ? dump_stack+0xd/0x17
> [  541.031180]  [<ffffffff813c98c1>] ? __dev_set_promiscuity+0x101/0x180
> [  541.031183]  [<ffffffff813d3540>] ? __hw_addr_create_ex+0x60/0xc0
> [  541.031185]  [<ffffffff813cfe1a>] ? __dev_set_rx_mode+0xaa/0xc0
> [  541.031189]  [<ffffffff813d3a81>] ? __dev_mc_add+0x61/0x90
> [  541.031198]  [<ffffffffa01dcf9c>] ? igmp6_group_added+0xfc/0x1a0 [ipv6]
> [  541.031208]  [<ffffffff8111237b>] ? kmem_cache_alloc+0xcb/0xd0
> [  541.031212]  [<ffffffffa01ddcd7>] ? ipv6_dev_mc_inc+0x267/0x300 [ipv6]
> [  541.031216]  [<ffffffffa01c2fae>] ? addrconf_join_solict+0x2e/0x40 [ipv6]
> [  541.031219]  [<ffffffffa01ba2e9>] ? ipv6_dev_ac_inc+0x159/0x1f0 [ipv6]
> [  541.031223]  [<ffffffffa01c0772>] ? addrconf_join_anycast+0x92/0xa0 [ipv6]
> [  541.031226]  [<ffffffffa01c311e>] ? __ipv6_ifa_notify+0x11e/0x1e0 [ipv6]
> [  541.031229]  [<ffffffffa01c3213>] ? ipv6_ifa_notify+0x33/0x50 [ipv6]

This is the most often case but I fear there are more of them.

addrconf_verify seems unsafe, too, when removing the last ipv6 address. So
does addrconf_prefix_rcv if adding first address.

I wonder if we should put the whole ipv6_ifa_notify infrastructure in a
workqueue? I don't like that either and it could add subtile races.

Those races also seem possible if we only defer addrconf_join_solict,
addrconf_leave_solict, addrconf_join_anycast and addrconf_leave_anycast to
workqueues.

This change is certainly going into the right direction but I am not sure if
we could generalize it.

Greetings,

  Hannes

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net] ipv6: fix RTNL assert fail in DAD
  2014-03-18  0:29 ` Hannes Frederic Sowa
@ 2014-03-19  0:54   ` Stephen Hemminger
  2014-03-19  4:17     ` David Miller
  0 siblings, 1 reply; 17+ messages in thread
From: Stephen Hemminger @ 2014-03-19  0:54 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: David Miller, netdev

On Tue, 18 Mar 2014 01:29:08 +0100
Hannes Frederic Sowa <hannes@stressinduktion.org> wrote:

> Hi!
> 
> On Mon, Mar 17, 2014 at 04:18:53PM -0700, Stephen Hemminger wrote:
> > IPv6 duplicate address detection is triggering the following assertion
> > failure when using macvlan + vif + multicast.
> >  RTNL: assertion failed at net/core/dev.c (4496)
> > 
> > This happens because the DAD timer is adding a multicast address without
> > acquiring the RTNL mutex. In order to acquire the RTNL mutex, it must be
> > done in process context; therefore it must be in a workqueue.
> > 
> > Full backtrace:
> > [  541.030090] RTNL: assertion failed at net/core/dev.c (4496)
> > [  541.031143] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           O 3.10.33-1-amd64-vyatta #1
> > [  541.031145] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
> > [  541.031146]  ffffffff8148a9f0 000000000000002f ffffffff813c98c1 ffff88007c4451f8
> > [  541.031148]  0000000000000000 0000000000000000 ffffffff813d3540 ffff88007fc03d18
> > [  541.031150]  0000880000000006 ffff88007c445000 ffffffffa0194160 0000000000000000
> > [  541.031152] Call Trace:
> > [  541.031153]  <IRQ>  [<ffffffff8148a9f0>] ? dump_stack+0xd/0x17
> > [  541.031180]  [<ffffffff813c98c1>] ? __dev_set_promiscuity+0x101/0x180
> > [  541.031183]  [<ffffffff813d3540>] ? __hw_addr_create_ex+0x60/0xc0
> > [  541.031185]  [<ffffffff813cfe1a>] ? __dev_set_rx_mode+0xaa/0xc0
> > [  541.031189]  [<ffffffff813d3a81>] ? __dev_mc_add+0x61/0x90
> > [  541.031198]  [<ffffffffa01dcf9c>] ? igmp6_group_added+0xfc/0x1a0 [ipv6]
> > [  541.031208]  [<ffffffff8111237b>] ? kmem_cache_alloc+0xcb/0xd0
> > [  541.031212]  [<ffffffffa01ddcd7>] ? ipv6_dev_mc_inc+0x267/0x300 [ipv6]
> > [  541.031216]  [<ffffffffa01c2fae>] ? addrconf_join_solict+0x2e/0x40 [ipv6]
> > [  541.031219]  [<ffffffffa01ba2e9>] ? ipv6_dev_ac_inc+0x159/0x1f0 [ipv6]
> > [  541.031223]  [<ffffffffa01c0772>] ? addrconf_join_anycast+0x92/0xa0 [ipv6]
> > [  541.031226]  [<ffffffffa01c311e>] ? __ipv6_ifa_notify+0x11e/0x1e0 [ipv6]
> > [  541.031229]  [<ffffffffa01c3213>] ? ipv6_ifa_notify+0x33/0x50 [ipv6]
> 
> This is the most often case but I fear there are more of them.
> 
> addrconf_verify seems unsafe, too, when removing the last ipv6 address. So
> does addrconf_prefix_rcv if adding first address.
> 
> I wonder if we should put the whole ipv6_ifa_notify infrastructure in a
> workqueue? I don't like that either and it could add subtile races.

That is option, might be some call chains that already have rtnl_lock held.

> 
> Those races also seem possible if we only defer addrconf_join_solict,
> addrconf_leave_solict, addrconf_join_anycast and addrconf_leave_anycast to
> workqueues.
> 
> This change is certainly going into the right direction but I am not sure if
> we could generalize it.

There is a lot of bookkeeping that happens for cases where nothing
changes at the device layer. Want to avoid rtnl_lock unless there is
something that is going to happen.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net] ipv6: fix RTNL assert fail in DAD
  2014-03-19  0:54   ` Stephen Hemminger
@ 2014-03-19  4:17     ` David Miller
  2014-03-19  6:58       ` Stephen Hemminger
  0 siblings, 1 reply; 17+ messages in thread
From: David Miller @ 2014-03-19  4:17 UTC (permalink / raw)
  To: stephen; +Cc: hannes, netdev

From: Stephen Hemminger <stephen@networkplumber.org>
Date: Tue, 18 Mar 2014 17:54:06 -0700

> On Tue, 18 Mar 2014 01:29:08 +0100
> Hannes Frederic Sowa <hannes@stressinduktion.org> wrote:
> 
>> I wonder if we should put the whole ipv6_ifa_notify infrastructure in a
>> workqueue? I don't like that either and it could add subtile races.
> 
> That is option, might be some call chains that already have rtnl_lock held.

There are TAHI ipv6 conformance tests that expect state changes to be
precisely synchronous.

And frankly it's pretty reasonable to send two packets back to back,
one which causes the state change and one which tests if the state
change happened, and expect that to work.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net] ipv6: fix RTNL assert fail in DAD
  2014-03-19  4:17     ` David Miller
@ 2014-03-19  6:58       ` Stephen Hemminger
  2014-03-19 17:53         ` David Miller
  0 siblings, 1 reply; 17+ messages in thread
From: Stephen Hemminger @ 2014-03-19  6:58 UTC (permalink / raw)
  To: David Miller; +Cc: hannes, netdev

On Wed, 19 Mar 2014 00:17:36 -0400 (EDT)
David Miller <davem@davemloft.net> wrote:

> From: Stephen Hemminger <stephen@networkplumber.org>
> Date: Tue, 18 Mar 2014 17:54:06 -0700
> 
> > On Tue, 18 Mar 2014 01:29:08 +0100
> > Hannes Frederic Sowa <hannes@stressinduktion.org> wrote:
> > 
> >> I wonder if we should put the whole ipv6_ifa_notify infrastructure in a
> >> workqueue? I don't like that either and it could add subtile races.
> > 
> > That is option, might be some call chains that already have rtnl_lock held.
> 
> There are TAHI ipv6 conformance tests that expect state changes to be
> precisely synchronous.
> 
> And frankly it's pretty reasonable to send two packets back to back,
> one which causes the state change and one which tests if the state
> change happened, and expect that to work.

It is more the timer based state changes that are problematic because
they aren't acquire RTNL.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net] ipv6: fix RTNL assert fail in DAD
  2014-03-19  6:58       ` Stephen Hemminger
@ 2014-03-19 17:53         ` David Miller
  2014-03-19 22:44           ` Hannes Frederic Sowa
  0 siblings, 1 reply; 17+ messages in thread
From: David Miller @ 2014-03-19 17:53 UTC (permalink / raw)
  To: stephen; +Cc: hannes, netdev

From: Stephen Hemminger <stephen@networkplumber.org>
Date: Tue, 18 Mar 2014 23:58:11 -0700

> On Wed, 19 Mar 2014 00:17:36 -0400 (EDT)
> David Miller <davem@davemloft.net> wrote:
> 
>> From: Stephen Hemminger <stephen@networkplumber.org>
>> Date: Tue, 18 Mar 2014 17:54:06 -0700
>> 
>> > On Tue, 18 Mar 2014 01:29:08 +0100
>> > Hannes Frederic Sowa <hannes@stressinduktion.org> wrote:
>> > 
>> >> I wonder if we should put the whole ipv6_ifa_notify infrastructure in a
>> >> workqueue? I don't like that either and it could add subtile races.
>> > 
>> > That is option, might be some call chains that already have rtnl_lock held.
>> 
>> There are TAHI ipv6 conformance tests that expect state changes to be
>> precisely synchronous.
>> 
>> And frankly it's pretty reasonable to send two packets back to back,
>> one which causes the state change and one which tests if the state
>> change happened, and expect that to work.
> 
> It is more the timer based state changes that are problematic because
> they aren't acquire RTNL.

Ok, the timer stuff could run from a workqueue just fine.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net] ipv6: fix RTNL assert fail in DAD
  2014-03-19 17:53         ` David Miller
@ 2014-03-19 22:44           ` Hannes Frederic Sowa
  2014-03-20  3:52             ` David Miller
  0 siblings, 1 reply; 17+ messages in thread
From: Hannes Frederic Sowa @ 2014-03-19 22:44 UTC (permalink / raw)
  To: David Miller; +Cc: stephen, netdev

On Wed, Mar 19, 2014 at 01:53:19PM -0400, David Miller wrote:
> From: Stephen Hemminger <stephen@networkplumber.org>
> Date: Tue, 18 Mar 2014 23:58:11 -0700
> 
> > On Wed, 19 Mar 2014 00:17:36 -0400 (EDT)
> > David Miller <davem@davemloft.net> wrote:
> > 
> >> From: Stephen Hemminger <stephen@networkplumber.org>
> >> Date: Tue, 18 Mar 2014 17:54:06 -0700
> >> 
> >> > On Tue, 18 Mar 2014 01:29:08 +0100
> >> > Hannes Frederic Sowa <hannes@stressinduktion.org> wrote:
> >> > 
> >> >> I wonder if we should put the whole ipv6_ifa_notify infrastructure in a
> >> >> workqueue? I don't like that either and it could add subtile races.
> >> > 
> >> > That is option, might be some call chains that already have rtnl_lock held.
> >> 
> >> There are TAHI ipv6 conformance tests that expect state changes to be
> >> precisely synchronous.

Exactly, I was afraid of those things (not exactly of TAHI breakage but of
real packet loss in case of late workqueue scheduling).

> >> And frankly it's pretty reasonable to send two packets back to back,
> >> one which causes the state change and one which tests if the state
> >> change happened, and expect that to work.
> > 
> > It is more the timer based state changes that are problematic because
> > they aren't acquire RTNL.
> 
> Ok, the timer stuff could run from a workqueue just fine.

We have no-timer invocations, too, like addrconf_prefix_rcv. In that case the
whole handling of the router advertisment should get deferred into the
workqueue.

Bye,

  Hannes

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net] ipv6: fix RTNL assert fail in DAD
  2014-03-19 22:44           ` Hannes Frederic Sowa
@ 2014-03-20  3:52             ` David Miller
  2014-03-20  6:38               ` Hannes Frederic Sowa
  0 siblings, 1 reply; 17+ messages in thread
From: David Miller @ 2014-03-20  3:52 UTC (permalink / raw)
  To: hannes; +Cc: stephen, netdev

From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 19 Mar 2014 23:44:42 +0100

> On Wed, Mar 19, 2014 at 01:53:19PM -0400, David Miller wrote:
>> Ok, the timer stuff could run from a workqueue just fine.
> 
> We have no-timer invocations, too, like addrconf_prefix_rcv. In that case the
> whole handling of the router advertisment should get deferred into the
> workqueue.

Just to be clear, you are saying that this doesn't need to be
synchronous?  Handling a prefix event seems like something that would
in fact need to be.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net] ipv6: fix RTNL assert fail in DAD
  2014-03-20  3:52             ` David Miller
@ 2014-03-20  6:38               ` Hannes Frederic Sowa
  0 siblings, 0 replies; 17+ messages in thread
From: Hannes Frederic Sowa @ 2014-03-20  6:38 UTC (permalink / raw)
  To: David Miller; +Cc: stephen, netdev

On Wed, Mar 19, 2014 at 11:52:17PM -0400, David Miller wrote:
> From: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Date: Wed, 19 Mar 2014 23:44:42 +0100
> 
> > On Wed, Mar 19, 2014 at 01:53:19PM -0400, David Miller wrote:
> >> Ok, the timer stuff could run from a workqueue just fine.
> > 
> > We have no-timer invocations, too, like addrconf_prefix_rcv. In that case the
> > whole handling of the router advertisment should get deferred into the
> > workqueue.
> 
> Just to be clear, you are saying that this doesn't need to be
> synchronous?  Handling a prefix event seems like something that would
> in fact need to be.

Here is my current analysis and proposals:

Actually, I would say that a safe entry point for starting to push further
prefix event handling into a workqueue would be addrconf_dad_start.

>From there on, we need to make sure that addrconf_join_solict (which
is the first point we actually need RTNL locked) is called before we
do optimistic duplicate address detection processing (this seems to be
the only happens-before invariant we need to preserve here). Stephen already
allocated the work_struct in inet6_ifaddr, so my suggestion would be to
change Stephen's patch to use a delayed workqueue and just replace the
other timer operations to use the new work_struct in inet6_ifaddr
with delayed operations. Entry-point would be addrconf_dad_start which
simply adds the delayed operation with 0 delay and maybe a new flag so
that addrconf_dad_timer (which should be called addrconf_dad_work by then)
does the work which was prior in addrconf_dad_start.

The addrconf_dad_completed handling could be under RTNL, too, so the
original problem would be gone.

addrconf_verify would also need a delayed workqueue (split to
addrconf_verify_rtnl and addrconf_verify is just a invocation
to mod_delay_work(wq, addrconf_verify_work, 0) which calls
addrconf_verify_rtnl with rtnl locked, would be my approach by only
looking at the code).

That leaves us with one unsafe invocation of an rtnl-locked needed invocation
in pndisc_constructor for proxy_ndp handling. Don't know what to do about that
currently but didn't look to closely.

Also, to find problems like this sooner, should we propagate ASSERT_RTNL()
tests up from conditional callees to their callers (e.g. __dev_set_promiscuity
-> __dev_set_rx_mode -> maybe even further up the stack?).

Greetings,

  Hannes

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net] ipv6: fix RTNL assert fail in DAD
  2014-03-17 23:18 [PATCH net] ipv6: fix RTNL assert fail in DAD Stephen Hemminger
  2014-03-18  0:29 ` Hannes Frederic Sowa
@ 2014-03-23  2:03 ` Hannes Frederic Sowa
  2014-03-25  8:03   ` [PATCH net] ipv6: move DAD and addrconf_verify processing to workqueue Hannes Frederic Sowa
  1 sibling, 1 reply; 17+ messages in thread
From: Hannes Frederic Sowa @ 2014-03-23  2:03 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev

Hi Stephen!

On Mon, Mar 17, 2014 at 04:18:53PM -0700, Stephen Hemminger wrote:
> IPv6 duplicate address detection is triggering the following assertion
> failure when using macvlan + vif + multicast.
>  RTNL: assertion failed at net/core/dev.c (4496)
> 
> This happens because the DAD timer is adding a multicast address without
> acquiring the RTNL mutex. In order to acquire the RTNL mutex, it must be
> done in process context; therefore it must be in a workqueue.

I finally had a bit free time and cooked up this patch. I stole some snippets
from yours. Of course, I'll attribute it correctly.

Just wanted to post early, so we don't waste time to work on that
concurrently:

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 9650a3f..1cdcf9c 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -31,6 +31,7 @@
 #define IF_PREFIX_AUTOCONF	0x02
 
 enum {
+	INET6_IFADDR_STATE_PREDAD,
 	INET6_IFADDR_STATE_DAD,
 	INET6_IFADDR_STATE_POSTDAD,
 	INET6_IFADDR_STATE_UP,
@@ -58,7 +59,7 @@ struct inet6_ifaddr {
 	unsigned long		cstamp;	/* created timestamp */
 	unsigned long		tstamp; /* updated timestamp */
 
-	struct timer_list	dad_timer;
+	struct delayed_work	dad_work;
 
 	struct inet6_dev	*idev;
 	struct rt6_info		*rt;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 344e972..0cdd99c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -133,9 +133,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
 static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
 static DEFINE_SPINLOCK(addrconf_hash_lock);
 
-static void addrconf_verify(unsigned long);
+static void addrconf_verify(void);
+static void addrconf_verify_rtnl(void);
+static void addrconf_verify_work(struct work_struct *);
 
-static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
+static struct workqueue_struct *addrconf_wq;
+static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);
 static DEFINE_SPINLOCK(addrconf_verify_lock);
 
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
@@ -151,7 +154,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 						  u32 flags, u32 noflags);
 
 static void addrconf_dad_start(struct inet6_ifaddr *ifp);
-static void addrconf_dad_timer(unsigned long data);
+static void addrconf_dad_work(struct work_struct *w);
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
 static void addrconf_dad_run(struct inet6_dev *idev);
 static void addrconf_rs_timer(unsigned long data);
@@ -247,9 +250,9 @@ static void addrconf_del_rs_timer(struct inet6_dev *idev)
 		__in6_dev_put(idev);
 }
 
-static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp)
+static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
 {
-	if (del_timer(&ifp->dad_timer))
+	if (cancel_delayed_work(&ifp->dad_work))
 		__in6_ifa_put(ifp);
 }
 
@@ -261,12 +264,12 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev,
 	mod_timer(&idev->rs_timer, jiffies + when);
 }
 
-static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp,
+static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
 				   unsigned long when)
 {
-	if (!timer_pending(&ifp->dad_timer))
+	if (!delayed_work_pending(&ifp->dad_work))
 		in6_ifa_hold(ifp);
-	mod_timer(&ifp->dad_timer, jiffies + when);
+	mod_delayed_work(addrconf_wq, &ifp->dad_work, when);
 }
 
 static int snmp6_alloc_dev(struct inet6_dev *idev)
@@ -751,7 +754,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 
 	in6_dev_put(ifp->idev);
 
-	if (del_timer(&ifp->dad_timer))
+	if (cancel_delayed_work(&ifp->dad_work))
 		pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
 
 	if (ifp->state != INET6_IFADDR_STATE_DEAD) {
@@ -849,8 +852,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 
 	spin_lock_init(&ifa->lock);
 	spin_lock_init(&ifa->state_lock);
-	setup_timer(&ifa->dad_timer, addrconf_dad_timer,
-		    (unsigned long)ifa);
+	INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work);
 	INIT_HLIST_NODE(&ifa->addr_lst);
 	ifa->scope = scope;
 	ifa->prefix_len = pfxlen;
@@ -1021,7 +1023,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 
 	write_unlock_bh(&ifp->idev->lock);
 
-	addrconf_del_dad_timer(ifp);
+	addrconf_del_dad_work(ifp);
 
 	ipv6_ifa_notify(RTM_DELADDR, ifp);
 
@@ -1604,7 +1606,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 {
 	if (ifp->flags&IFA_F_PERMANENT) {
 		spin_lock_bh(&ifp->lock);
-		addrconf_del_dad_timer(ifp);
+		addrconf_del_dad_work(ifp);
 		ifp->flags |= IFA_F_TENTATIVE;
 		if (dad_failed)
 			ifp->flags |= IFA_F_DADFAILED;
@@ -1680,6 +1682,8 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
 {
 	struct in6_addr maddr;
 
+	ASSERT_RTNL();
+
 	if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
 		return;
 
@@ -1691,6 +1695,8 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
 {
 	struct in6_addr maddr;
 
+	ASSERT_RTNL();
+
 	if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
 		return;
 
@@ -1701,6 +1707,9 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
 {
 	struct in6_addr addr;
+
+	ASSERT_RTNL();
+
 	if (ifp->prefix_len >= 127) /* RFC 6164 */
 		return;
 	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -1712,6 +1721,9 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
 {
 	struct in6_addr addr;
+
+	ASSERT_RTNL();
+
 	if (ifp->prefix_len >= 127) /* RFC 6164 */
 		return;
 	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -2271,11 +2283,13 @@ ok:
 				return;
 			}
 
-			ifp->flags |= IFA_F_MANAGETEMPADDR;
 			update_lft = 0;
 			create = 1;
+			spin_lock(&ifp->lock);
+			ifp->flags |= IFA_F_MANAGETEMPADDR;
 			ifp->cstamp = jiffies;
 			ifp->tokenized = tokenized;
+			spin_unlock(&ifp->lock);
 			addrconf_dad_start(ifp);
 		}
 
@@ -2326,7 +2340,7 @@ ok:
 					 create, now);
 
 			in6_ifa_put(ifp);
-			addrconf_verify(0);
+			addrconf_verify();
 		}
 	}
 	inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
@@ -2475,7 +2489,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
 			manage_tempaddrs(idev, ifp, valid_lft, prefered_lft,
 					 true, jiffies);
 		in6_ifa_put(ifp);
-		addrconf_verify(0);
+		addrconf_verify_rtnl();
 		return 0;
 	}
 
@@ -3011,7 +3025,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		hlist_for_each_entry_rcu(ifa, h, addr_lst) {
 			if (ifa->idev == idev) {
 				hlist_del_init_rcu(&ifa->addr_lst);
-				addrconf_del_dad_timer(ifa);
+				addrconf_del_dad_work(ifa);
 				goto restart;
 			}
 		}
@@ -3049,7 +3063,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	while (!list_empty(&idev->addr_list)) {
 		ifa = list_first_entry(&idev->addr_list,
 				       struct inet6_ifaddr, if_list);
-		addrconf_del_dad_timer(ifa);
+		addrconf_del_dad_work(ifa);
 
 		list_del(&ifa->if_list);
 
@@ -3148,15 +3162,17 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
 		rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1);
 
 	ifp->dad_probes = idev->cnf.dad_transmits;
-	addrconf_mod_dad_timer(ifp, rand_num);
+	addrconf_mod_dad_work(ifp, rand_num);
 }
 
-static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 {
 	struct inet6_dev *idev = ifp->idev;
 	struct net_device *dev = idev->dev;
 
+	rtnl_lock();
 	addrconf_join_solict(dev, &ifp->addr);
+	rtnl_unlock();
 
 	prandom_seed((__force u32) ifp->addr.s6_addr32[3]);
 
@@ -3203,11 +3219,40 @@ out:
 	read_unlock_bh(&idev->lock);
 }
 
-static void addrconf_dad_timer(unsigned long data)
+static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+{
+	bool begin_dad = false;
+
+	spin_lock_bh(&ifp->state_lock);
+	if (ifp->state != INET6_IFADDR_STATE_DEAD) {
+		ifp->state = INET6_IFADDR_STATE_PREDAD;
+		begin_dad = true;
+	}
+	spin_unlock_bh(&ifp->state_lock);
+
+	if (begin_dad)
+		mod_delayed_work(addrconf_wq, &ifp->dad_work, 0);
+}
+
+static void addrconf_dad_work(struct work_struct *w)
 {
-	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+	struct inet6_ifaddr *ifp = container_of(to_delayed_work(w),
+						struct inet6_ifaddr,
+						dad_work);
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr mcaddr;
+	bool begin_dad;
+
+	spin_lock_bh(&ifp->state_lock);
+	begin_dad = ifp->state == INET6_IFADDR_STATE_PREDAD;
+	if (begin_dad)
+		ifp->state = INET6_IFADDR_STATE_DAD;
+	spin_unlock_bh(&ifp->state_lock);
+
+	if (begin_dad) {
+		addrconf_dad_begin(ifp);
+		return;
+	}
 
 	if (!ifp->dad_probes && addrconf_dad_end(ifp))
 		goto out;
@@ -3240,8 +3285,8 @@ static void addrconf_dad_timer(unsigned long data)
 	}
 
 	ifp->dad_probes--;
-	addrconf_mod_dad_timer(ifp,
-			       NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));
+	addrconf_mod_dad_work(ifp,
+			      NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));
 	spin_unlock(&ifp->lock);
 	write_unlock(&idev->lock);
 
@@ -3276,13 +3321,15 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 	struct in6_addr lladdr;
 	bool send_rs, send_mld;
 
-	addrconf_del_dad_timer(ifp);
+	addrconf_del_dad_work(ifp);
 
 	/*
 	 *	Configure the address for reception. Now it is valid.
 	 */
 
+	rtnl_lock();
 	ipv6_ifa_notify(RTM_NEWADDR, ifp);
+	rtnl_unlock();
 
 	/* If added prefix is link local and we are prepared to process
 	   router advertisements, start sending router solicitations.
@@ -3517,18 +3564,20 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
  *	Periodic address status verification
  */
 
-static void addrconf_verify(unsigned long foo)
+static void addrconf_verify_rtnl(void)
 {
 	unsigned long now, next, next_sec, next_sched;
 	struct inet6_ifaddr *ifp;
 	int i;
 
+	ASSERT_RTNL();
+
 	rcu_read_lock_bh();
 	spin_lock(&addrconf_verify_lock);
 	now = jiffies;
 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
 
-	del_timer(&addr_chk_timer);
+	cancel_delayed_work(&addr_chk_work);
 
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 restart:
@@ -3629,12 +3678,23 @@ restart:
 	ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
 	      now, next, next_sec, next_sched);
 
-	addr_chk_timer.expires = next_sched;
-	add_timer(&addr_chk_timer);
+	mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - jiffies);
 	spin_unlock(&addrconf_verify_lock);
 	rcu_read_unlock_bh();
 }
 
+static void addrconf_verify_work(struct work_struct *w)
+{
+	rtnl_lock();
+	addrconf_verify_rtnl();
+	rtnl_unlock();
+}
+
+static void addrconf_verify(void)
+{
+	mod_delayed_work(addrconf_wq, &addr_chk_work, 0);
+}
+
 static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
 				     struct in6_addr **peer_pfx)
 {
@@ -3691,6 +3751,8 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 	bool was_managetempaddr;
 	bool had_prefixroute;
 
+	ASSERT_RTNL();
+
 	if (!valid_lft || (prefered_lft > valid_lft))
 		return -EINVAL;
 
@@ -3756,7 +3818,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 				 !was_managetempaddr, jiffies);
 	}
 
-	addrconf_verify(0);
+	addrconf_verify();
 
 	return 0;
 }
@@ -4386,6 +4448,8 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
 	bool update_rs = false;
 	struct in6_addr ll_addr;
 
+	ASSERT_RTNL();
+
 	if (token == NULL)
 		return -EINVAL;
 	if (ipv6_addr_any(token))
@@ -4434,7 +4498,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
 	}
 
 	write_unlock_bh(&idev->lock);
-	addrconf_verify(0);
+	addrconf_verify();
 	return 0;
 }
 
@@ -4636,6 +4700,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 {
 	struct net *net = dev_net(ifp->idev->dev);
 
+	ASSERT_RTNL();
+
 	inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
 
 	switch (event) {
@@ -4682,6 +4748,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 
 static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 {
+	ASSERT_RTNL();
+
 	rcu_read_lock_bh();
 	if (likely(ifp->idev->dead == 0))
 		__ipv6_ifa_notify(event, ifp);
@@ -5244,6 +5312,12 @@ int __init addrconf_init(void)
 	if (err < 0)
 		goto out_addrlabel;
 
+	addrconf_wq = create_workqueue("ipv6_addrconf");
+	if (!addrconf_wq) {
+		err = -ENOMEM;
+		goto out_nowq;
+	}
+
 	/* The addrconf netdev notifier requires that loopback_dev
 	 * has it's ipv6 private information allocated and setup
 	 * before it can bring up and give link-local addresses
@@ -5274,7 +5348,7 @@ int __init addrconf_init(void)
 
 	register_netdevice_notifier(&ipv6_dev_notf);
 
-	addrconf_verify(0);
+	addrconf_verify();
 
 	rtnl_af_register(&inet6_ops);
 
@@ -5302,6 +5376,8 @@ errout:
 	rtnl_af_unregister(&inet6_ops);
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 errlo:
+	destroy_workqueue(addrconf_wq);
+out_nowq:
 	unregister_pernet_subsys(&addrconf_ops);
 out_addrlabel:
 	ipv6_addr_label_cleanup();
@@ -5337,7 +5413,8 @@ void addrconf_cleanup(void)
 	for (i = 0; i < IN6_ADDR_HSIZE; i++)
 		WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
 	spin_unlock_bh(&addrconf_hash_lock);
-
-	del_timer(&addr_chk_timer);
 	rtnl_unlock();
+
+	cancel_delayed_work_sync(&addr_chk_work);
+	destroy_workqueue(addrconf_wq);
 }

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net] ipv6: move DAD and addrconf_verify processing to workqueue
  2014-03-23  2:03 ` Hannes Frederic Sowa
@ 2014-03-25  8:03   ` Hannes Frederic Sowa
  2014-03-25 21:20     ` Stephen Hemminger
  0 siblings, 1 reply; 17+ messages in thread
From: Hannes Frederic Sowa @ 2014-03-25  8:03 UTC (permalink / raw)
  To: Stephen Hemminger, David Miller, netdev

addrconf_join_solict and addrconf_join_anycast may cause actions which
need rtnl locked, especially on first address creation.

To get rtnl lock we need to push the code paths which depend on those
calls up to workqueues, specifically addrconf_verify and the DAD
processing.

Relevant backtrace:
[  541.030090] RTNL: assertion failed at net/core/dev.c (4496)
[  541.031143] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           O 3.10.33-1-amd64-vyatta #1
[  541.031145] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
[  541.031146]  ffffffff8148a9f0 000000000000002f ffffffff813c98c1 ffff88007c4451f8
[  541.031148]  0000000000000000 0000000000000000 ffffffff813d3540 ffff88007fc03d18
[  541.031150]  0000880000000006 ffff88007c445000 ffffffffa0194160 0000000000000000
[  541.031152] Call Trace:
[  541.031153]  <IRQ>  [<ffffffff8148a9f0>] ? dump_stack+0xd/0x17
[  541.031180]  [<ffffffff813c98c1>] ? __dev_set_promiscuity+0x101/0x180
[  541.031183]  [<ffffffff813d3540>] ? __hw_addr_create_ex+0x60/0xc0
[  541.031185]  [<ffffffff813cfe1a>] ? __dev_set_rx_mode+0xaa/0xc0
[  541.031189]  [<ffffffff813d3a81>] ? __dev_mc_add+0x61/0x90
[  541.031198]  [<ffffffffa01dcf9c>] ? igmp6_group_added+0xfc/0x1a0 [ipv6]
[  541.031208]  [<ffffffff8111237b>] ? kmem_cache_alloc+0xcb/0xd0
[  541.031212]  [<ffffffffa01ddcd7>] ? ipv6_dev_mc_inc+0x267/0x300 [ipv6]
[  541.031216]  [<ffffffffa01c2fae>] ? addrconf_join_solict+0x2e/0x40 [ipv6]
[  541.031219]  [<ffffffffa01ba2e9>] ? ipv6_dev_ac_inc+0x159/0x1f0 [ipv6]
[  541.031223]  [<ffffffffa01c0772>] ? addrconf_join_anycast+0x92/0xa0 [ipv6]
[  541.031226]  [<ffffffffa01c311e>] ? __ipv6_ifa_notify+0x11e/0x1e0 [ipv6]
[  541.031229]  [<ffffffffa01c3213>] ? ipv6_ifa_notify+0x33/0x50 [ipv6]
[  541.031233]  [<ffffffffa01c36c8>] ? addrconf_dad_completed+0x28/0x100 [ipv6]
[  541.031241]  [<ffffffff81075c1d>] ? task_cputime+0x2d/0x50
[  541.031244]  [<ffffffffa01c38d6>] ? addrconf_dad_timer+0x136/0x150 [ipv6]
[  541.031247]  [<ffffffffa01c37a0>] ? addrconf_dad_completed+0x100/0x100 [ipv6]
[  541.031255]  [<ffffffff8105313a>] ? call_timer_fn.isra.22+0x2a/0x90
[  541.031258]  [<ffffffffa01c37a0>] ? addrconf_dad_completed+0x100/0x100 [ipv6]

Hunks and backtrace stolen from a patch by Stephen Hemminger.

Reported-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 include/net/if_inet6.h |   3 +-
 net/ipv6/addrconf.c    | 146 +++++++++++++++++++++++++++++++++++++------------
 2 files changed, 114 insertions(+), 35 deletions(-)

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 9650a3f..1cdcf9c 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -31,6 +31,7 @@
 #define IF_PREFIX_AUTOCONF	0x02
 
 enum {
+	INET6_IFADDR_STATE_PREDAD,
 	INET6_IFADDR_STATE_DAD,
 	INET6_IFADDR_STATE_POSTDAD,
 	INET6_IFADDR_STATE_UP,
@@ -58,7 +59,7 @@ struct inet6_ifaddr {
 	unsigned long		cstamp;	/* created timestamp */
 	unsigned long		tstamp; /* updated timestamp */
 
-	struct timer_list	dad_timer;
+	struct delayed_work	dad_work;
 
 	struct inet6_dev	*idev;
 	struct rt6_info		*rt;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 344e972..9d31dd1 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -133,9 +133,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
 static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
 static DEFINE_SPINLOCK(addrconf_hash_lock);
 
-static void addrconf_verify(unsigned long);
+static void addrconf_verify(void);
+static void addrconf_verify_rtnl(void);
+static void addrconf_verify_work(struct work_struct *);
 
-static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
+static struct workqueue_struct *addrconf_wq;
+static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);
 static DEFINE_SPINLOCK(addrconf_verify_lock);
 
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
@@ -151,7 +154,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 						  u32 flags, u32 noflags);
 
 static void addrconf_dad_start(struct inet6_ifaddr *ifp);
-static void addrconf_dad_timer(unsigned long data);
+static void addrconf_dad_work(struct work_struct *w);
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
 static void addrconf_dad_run(struct inet6_dev *idev);
 static void addrconf_rs_timer(unsigned long data);
@@ -247,9 +250,9 @@ static void addrconf_del_rs_timer(struct inet6_dev *idev)
 		__in6_dev_put(idev);
 }
 
-static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp)
+static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
 {
-	if (del_timer(&ifp->dad_timer))
+	if (cancel_delayed_work(&ifp->dad_work))
 		__in6_ifa_put(ifp);
 }
 
@@ -261,12 +264,12 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev,
 	mod_timer(&idev->rs_timer, jiffies + when);
 }
 
-static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp,
+static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
 				   unsigned long when)
 {
-	if (!timer_pending(&ifp->dad_timer))
+	if (!delayed_work_pending(&ifp->dad_work))
 		in6_ifa_hold(ifp);
-	mod_timer(&ifp->dad_timer, jiffies + when);
+	mod_delayed_work(addrconf_wq, &ifp->dad_work, when);
 }
 
 static int snmp6_alloc_dev(struct inet6_dev *idev)
@@ -751,7 +754,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 
 	in6_dev_put(ifp->idev);
 
-	if (del_timer(&ifp->dad_timer))
+	if (cancel_delayed_work(&ifp->dad_work))
 		pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
 
 	if (ifp->state != INET6_IFADDR_STATE_DEAD) {
@@ -849,8 +852,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 
 	spin_lock_init(&ifa->lock);
 	spin_lock_init(&ifa->state_lock);
-	setup_timer(&ifa->dad_timer, addrconf_dad_timer,
-		    (unsigned long)ifa);
+	INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work);
 	INIT_HLIST_NODE(&ifa->addr_lst);
 	ifa->scope = scope;
 	ifa->prefix_len = pfxlen;
@@ -1021,7 +1023,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 
 	write_unlock_bh(&ifp->idev->lock);
 
-	addrconf_del_dad_timer(ifp);
+	addrconf_del_dad_work(ifp);
 
 	ipv6_ifa_notify(RTM_DELADDR, ifp);
 
@@ -1604,7 +1606,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 {
 	if (ifp->flags&IFA_F_PERMANENT) {
 		spin_lock_bh(&ifp->lock);
-		addrconf_del_dad_timer(ifp);
+		addrconf_del_dad_work(ifp);
 		ifp->flags |= IFA_F_TENTATIVE;
 		if (dad_failed)
 			ifp->flags |= IFA_F_DADFAILED;
@@ -1680,6 +1682,8 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
 {
 	struct in6_addr maddr;
 
+	ASSERT_RTNL();
+
 	if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
 		return;
 
@@ -1691,6 +1695,8 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
 {
 	struct in6_addr maddr;
 
+	ASSERT_RTNL();
+
 	if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
 		return;
 
@@ -1701,6 +1707,9 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
 {
 	struct in6_addr addr;
+
+	ASSERT_RTNL();
+
 	if (ifp->prefix_len >= 127) /* RFC 6164 */
 		return;
 	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -1712,6 +1721,9 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
 {
 	struct in6_addr addr;
+
+	ASSERT_RTNL();
+
 	if (ifp->prefix_len >= 127) /* RFC 6164 */
 		return;
 	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -2271,11 +2283,13 @@ ok:
 				return;
 			}
 
-			ifp->flags |= IFA_F_MANAGETEMPADDR;
 			update_lft = 0;
 			create = 1;
+			spin_lock_bh(&ifp->lock);
+			ifp->flags |= IFA_F_MANAGETEMPADDR;
 			ifp->cstamp = jiffies;
 			ifp->tokenized = tokenized;
+			spin_unlock_bh(&ifp->lock);
 			addrconf_dad_start(ifp);
 		}
 
@@ -2326,7 +2340,7 @@ ok:
 					 create, now);
 
 			in6_ifa_put(ifp);
-			addrconf_verify(0);
+			addrconf_verify();
 		}
 	}
 	inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
@@ -2475,7 +2489,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
 			manage_tempaddrs(idev, ifp, valid_lft, prefered_lft,
 					 true, jiffies);
 		in6_ifa_put(ifp);
-		addrconf_verify(0);
+		addrconf_verify_rtnl();
 		return 0;
 	}
 
@@ -3011,7 +3025,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		hlist_for_each_entry_rcu(ifa, h, addr_lst) {
 			if (ifa->idev == idev) {
 				hlist_del_init_rcu(&ifa->addr_lst);
-				addrconf_del_dad_timer(ifa);
+				addrconf_del_dad_work(ifa);
 				goto restart;
 			}
 		}
@@ -3049,7 +3063,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	while (!list_empty(&idev->addr_list)) {
 		ifa = list_first_entry(&idev->addr_list,
 				       struct inet6_ifaddr, if_list);
-		addrconf_del_dad_timer(ifa);
+		addrconf_del_dad_work(ifa);
 
 		list_del(&ifa->if_list);
 
@@ -3148,15 +3162,17 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
 		rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1);
 
 	ifp->dad_probes = idev->cnf.dad_transmits;
-	addrconf_mod_dad_timer(ifp, rand_num);
+	addrconf_mod_dad_work(ifp, rand_num);
 }
 
-static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 {
 	struct inet6_dev *idev = ifp->idev;
 	struct net_device *dev = idev->dev;
 
+	rtnl_lock();
 	addrconf_join_solict(dev, &ifp->addr);
+	rtnl_unlock();
 
 	prandom_seed((__force u32) ifp->addr.s6_addr32[3]);
 
@@ -3203,11 +3219,42 @@ out:
 	read_unlock_bh(&idev->lock);
 }
 
-static void addrconf_dad_timer(unsigned long data)
+static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+{
+	bool begin_dad = false;
+
+	spin_lock_bh(&ifp->state_lock);
+	if (ifp->state != INET6_IFADDR_STATE_DEAD) {
+		ifp->state = INET6_IFADDR_STATE_PREDAD;
+		begin_dad = true;
+	}
+	spin_unlock_bh(&ifp->state_lock);
+
+	if (begin_dad) {
+		in6_ifa_hold(ifp);
+		mod_delayed_work(addrconf_wq, &ifp->dad_work, 0);
+	}
+}
+
+static void addrconf_dad_work(struct work_struct *w)
 {
-	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+	struct inet6_ifaddr *ifp = container_of(to_delayed_work(w),
+						struct inet6_ifaddr,
+						dad_work);
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr mcaddr;
+	bool begin_dad;
+
+	spin_lock_bh(&ifp->state_lock);
+	begin_dad = ifp->state == INET6_IFADDR_STATE_PREDAD;
+	if (begin_dad)
+		ifp->state = INET6_IFADDR_STATE_DAD;
+	spin_unlock_bh(&ifp->state_lock);
+
+	if (begin_dad) {
+		addrconf_dad_begin(ifp);
+		goto out;
+	}
 
 	if (!ifp->dad_probes && addrconf_dad_end(ifp))
 		goto out;
@@ -3240,8 +3287,8 @@ static void addrconf_dad_timer(unsigned long data)
 	}
 
 	ifp->dad_probes--;
-	addrconf_mod_dad_timer(ifp,
-			       NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));
+	addrconf_mod_dad_work(ifp,
+			      NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));
 	spin_unlock(&ifp->lock);
 	write_unlock(&idev->lock);
 
@@ -3276,13 +3323,15 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 	struct in6_addr lladdr;
 	bool send_rs, send_mld;
 
-	addrconf_del_dad_timer(ifp);
+	addrconf_del_dad_work(ifp);
 
 	/*
 	 *	Configure the address for reception. Now it is valid.
 	 */
 
+	rtnl_lock();
 	ipv6_ifa_notify(RTM_NEWADDR, ifp);
+	rtnl_unlock();
 
 	/* If added prefix is link local and we are prepared to process
 	   router advertisements, start sending router solicitations.
@@ -3517,18 +3566,20 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
  *	Periodic address status verification
  */
 
-static void addrconf_verify(unsigned long foo)
+static void addrconf_verify_rtnl(void)
 {
 	unsigned long now, next, next_sec, next_sched;
 	struct inet6_ifaddr *ifp;
 	int i;
 
+	ASSERT_RTNL();
+
 	rcu_read_lock_bh();
 	spin_lock(&addrconf_verify_lock);
 	now = jiffies;
 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
 
-	del_timer(&addr_chk_timer);
+	cancel_delayed_work(&addr_chk_work);
 
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 restart:
@@ -3629,12 +3680,23 @@ restart:
 	ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
 	      now, next, next_sec, next_sched);
 
-	addr_chk_timer.expires = next_sched;
-	add_timer(&addr_chk_timer);
+	mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
 	spin_unlock(&addrconf_verify_lock);
 	rcu_read_unlock_bh();
 }
 
+static void addrconf_verify_work(struct work_struct *w)
+{
+	rtnl_lock();
+	addrconf_verify_rtnl();
+	rtnl_unlock();
+}
+
+static void addrconf_verify(void)
+{
+	mod_delayed_work(addrconf_wq, &addr_chk_work, 0);
+}
+
 static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
 				     struct in6_addr **peer_pfx)
 {
@@ -3691,6 +3753,8 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 	bool was_managetempaddr;
 	bool had_prefixroute;
 
+	ASSERT_RTNL();
+
 	if (!valid_lft || (prefered_lft > valid_lft))
 		return -EINVAL;
 
@@ -3756,7 +3820,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 				 !was_managetempaddr, jiffies);
 	}
 
-	addrconf_verify(0);
+	addrconf_verify();
 
 	return 0;
 }
@@ -4386,6 +4450,8 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
 	bool update_rs = false;
 	struct in6_addr ll_addr;
 
+	ASSERT_RTNL();
+
 	if (token == NULL)
 		return -EINVAL;
 	if (ipv6_addr_any(token))
@@ -4434,7 +4500,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
 	}
 
 	write_unlock_bh(&idev->lock);
-	addrconf_verify(0);
+	addrconf_verify();
 	return 0;
 }
 
@@ -4636,6 +4702,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 {
 	struct net *net = dev_net(ifp->idev->dev);
 
+	if (!event)
+		ASSERT_RTNL();
+
 	inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
 
 	switch (event) {
@@ -5244,6 +5313,12 @@ int __init addrconf_init(void)
 	if (err < 0)
 		goto out_addrlabel;
 
+	addrconf_wq = create_workqueue("ipv6_addrconf");
+	if (!addrconf_wq) {
+		err = -ENOMEM;
+		goto out_nowq;
+	}
+
 	/* The addrconf netdev notifier requires that loopback_dev
 	 * has it's ipv6 private information allocated and setup
 	 * before it can bring up and give link-local addresses
@@ -5274,7 +5349,7 @@ int __init addrconf_init(void)
 
 	register_netdevice_notifier(&ipv6_dev_notf);
 
-	addrconf_verify(0);
+	addrconf_verify();
 
 	rtnl_af_register(&inet6_ops);
 
@@ -5302,6 +5377,8 @@ errout:
 	rtnl_af_unregister(&inet6_ops);
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 errlo:
+	destroy_workqueue(addrconf_wq);
+out_nowq:
 	unregister_pernet_subsys(&addrconf_ops);
 out_addrlabel:
 	ipv6_addr_label_cleanup();
@@ -5337,7 +5414,8 @@ void addrconf_cleanup(void)
 	for (i = 0; i < IN6_ADDR_HSIZE; i++)
 		WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
 	spin_unlock_bh(&addrconf_hash_lock);
-
-	del_timer(&addr_chk_timer);
+	cancel_delayed_work(&addr_chk_work);
 	rtnl_unlock();
+
+	destroy_workqueue(addrconf_wq);
 }
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH net] ipv6: move DAD and addrconf_verify processing to workqueue
  2014-03-25  8:03   ` [PATCH net] ipv6: move DAD and addrconf_verify processing to workqueue Hannes Frederic Sowa
@ 2014-03-25 21:20     ` Stephen Hemminger
  2014-03-26  0:00       ` Hannes Frederic Sowa
  0 siblings, 1 reply; 17+ messages in thread
From: Stephen Hemminger @ 2014-03-25 21:20 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: David Miller, netdev

On Tue, 25 Mar 2014 09:03:01 +0100
Hannes Frederic Sowa <hannes@stressinduktion.org> wrote:

>  
> -static void addrconf_verify(unsigned long foo)
> +static void addrconf_verify_rtnl(void)
>  {
>  	unsigned long now, next, next_sec, next_sched;
>  	struct inet6_ifaddr *ifp;
>  	int i;
>  
> +	ASSERT_RTNL();
> +
>  	rcu_read_lock_bh();
>  	spin_lock(&addrconf_verify_lock);

Since RTNL is now held in addrconf_verify(), is addrconf_verify_lock is not needed.

Since RTNL is held, rcu is not needed either.

Also, I would skip the wrapper (addrconf_verify_rtnl) and just
put it all in addrconf_verify_work

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net] ipv6: move DAD and addrconf_verify processing to workqueue
  2014-03-25 21:20     ` Stephen Hemminger
@ 2014-03-26  0:00       ` Hannes Frederic Sowa
  2014-03-26  6:17         ` [PATCH v2 " Hannes Frederic Sowa
  0 siblings, 1 reply; 17+ messages in thread
From: Hannes Frederic Sowa @ 2014-03-26  0:00 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev

On Tue, Mar 25, 2014 at 02:20:04PM -0700, Stephen Hemminger wrote:
> On Tue, 25 Mar 2014 09:03:01 +0100
> Hannes Frederic Sowa <hannes@stressinduktion.org> wrote:
> 
> >  
> > -static void addrconf_verify(unsigned long foo)
> > +static void addrconf_verify_rtnl(void)
> >  {
> >  	unsigned long now, next, next_sec, next_sched;
> >  	struct inet6_ifaddr *ifp;
> >  	int i;
> >  
> > +	ASSERT_RTNL();
> > +
> >  	rcu_read_lock_bh();
> >  	spin_lock(&addrconf_verify_lock);
> 
> Since RTNL is now held in addrconf_verify(), is addrconf_verify_lock is not needed.

Yes, I overlooked that, thanks!

> Since RTNL is held, rcu is not needed either.

It might be possible if we add other rtnl lock invocations to the dad
process, but because rcu is much more lightweight, I would leave it as is:

ipv6_del_addr removes elements from inet6_addr_lst without holding RTNL but
only synchronizes on addrconf_hash_lock. In this constellation RCU is still
needed.

> Also, I would skip the wrapper (addrconf_verify_rtnl) and just
> put it all in addrconf_verify_work

I preferred to have the addrconf_verify invocations inline and not deferred if
we e.g. do cleanups from user space to not have any races because of not yet
deleted addresses. I would also prefer to leave this as is, ok?

Will send v2, thanks!

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v2 net] ipv6: move DAD and addrconf_verify processing to workqueue
  2014-03-26  0:00       ` Hannes Frederic Sowa
@ 2014-03-26  6:17         ` Hannes Frederic Sowa
  2014-03-27 17:15           ` David Miller
  0 siblings, 1 reply; 17+ messages in thread
From: Hannes Frederic Sowa @ 2014-03-26  6:17 UTC (permalink / raw)
  To: Stephen Hemminger, David Miller, netdev

addrconf_join_solict and addrconf_join_anycast may cause actions which
need rtnl locked, especially on first address creation.

A new DAD state is introduced which deferres processing of the initial
DAD processing into a workqueue.

To get rtnl lock we need to push the code paths which depend on those
calls up to workqueues, specifically addrconf_verify and the DAD
processing.

(v2)
addrconf_dad_failure needs to be queued up to the workqueue, too. This
patch introduces a new DAD state and stop the DAD processing in the
workqueue (this is because of the possible ipv6_del_addr processing
which removes the solicited multicast address from the device).

addrconf_verify_lock is removed, too. After the transition it is not
needed any more.

As we are not processing in bottom half anymore we need to be a bit more
careful about disabling bottom half out when we lock spin_locks which are also
used in bh.

Relevant backtrace:
[  541.030090] RTNL: assertion failed at net/core/dev.c (4496)
[  541.031143] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           O 3.10.33-1-amd64-vyatta #1
[  541.031145] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
[  541.031146]  ffffffff8148a9f0 000000000000002f ffffffff813c98c1 ffff88007c4451f8
[  541.031148]  0000000000000000 0000000000000000 ffffffff813d3540 ffff88007fc03d18
[  541.031150]  0000880000000006 ffff88007c445000 ffffffffa0194160 0000000000000000
[  541.031152] Call Trace:
[  541.031153]  <IRQ>  [<ffffffff8148a9f0>] ? dump_stack+0xd/0x17
[  541.031180]  [<ffffffff813c98c1>] ? __dev_set_promiscuity+0x101/0x180
[  541.031183]  [<ffffffff813d3540>] ? __hw_addr_create_ex+0x60/0xc0
[  541.031185]  [<ffffffff813cfe1a>] ? __dev_set_rx_mode+0xaa/0xc0
[  541.031189]  [<ffffffff813d3a81>] ? __dev_mc_add+0x61/0x90
[  541.031198]  [<ffffffffa01dcf9c>] ? igmp6_group_added+0xfc/0x1a0 [ipv6]
[  541.031208]  [<ffffffff8111237b>] ? kmem_cache_alloc+0xcb/0xd0
[  541.031212]  [<ffffffffa01ddcd7>] ? ipv6_dev_mc_inc+0x267/0x300 [ipv6]
[  541.031216]  [<ffffffffa01c2fae>] ? addrconf_join_solict+0x2e/0x40 [ipv6]
[  541.031219]  [<ffffffffa01ba2e9>] ? ipv6_dev_ac_inc+0x159/0x1f0 [ipv6]
[  541.031223]  [<ffffffffa01c0772>] ? addrconf_join_anycast+0x92/0xa0 [ipv6]
[  541.031226]  [<ffffffffa01c311e>] ? __ipv6_ifa_notify+0x11e/0x1e0 [ipv6]
[  541.031229]  [<ffffffffa01c3213>] ? ipv6_ifa_notify+0x33/0x50 [ipv6]
[  541.031233]  [<ffffffffa01c36c8>] ? addrconf_dad_completed+0x28/0x100 [ipv6]
[  541.031241]  [<ffffffff81075c1d>] ? task_cputime+0x2d/0x50
[  541.031244]  [<ffffffffa01c38d6>] ? addrconf_dad_timer+0x136/0x150 [ipv6]
[  541.031247]  [<ffffffffa01c37a0>] ? addrconf_dad_completed+0x100/0x100 [ipv6]
[  541.031255]  [<ffffffff8105313a>] ? call_timer_fn.isra.22+0x2a/0x90
[  541.031258]  [<ffffffffa01c37a0>] ? addrconf_dad_completed+0x100/0x100 [ipv6]

Hunks and backtrace stolen from a patch by Stephen Hemminger.

Reported-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
I tried to address the changelog in the commit description.

Thanks to Stephen for the review, hope that happens again, because the
changes are quite a lot. ;)

I didn't remove the rcu_read_lock_bh from addrconf_verify because the
address list ist synchronized via addrconf_hash_lock and not RTNL.
ipv6_add_addr adds addresses there without taking RTNL and RCU lock is
not that expensive and code is not that very sensible for speed.

Thanks!

 include/net/if_inet6.h |   4 +-
 net/ipv6/addrconf.c    | 190 ++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 143 insertions(+), 51 deletions(-)

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 9650a3f..b4956a5 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -31,8 +31,10 @@
 #define IF_PREFIX_AUTOCONF	0x02
 
 enum {
+	INET6_IFADDR_STATE_PREDAD,
 	INET6_IFADDR_STATE_DAD,
 	INET6_IFADDR_STATE_POSTDAD,
+	INET6_IFADDR_STATE_ERRDAD,
 	INET6_IFADDR_STATE_UP,
 	INET6_IFADDR_STATE_DEAD,
 };
@@ -58,7 +60,7 @@ struct inet6_ifaddr {
 	unsigned long		cstamp;	/* created timestamp */
 	unsigned long		tstamp; /* updated timestamp */
 
-	struct timer_list	dad_timer;
+	struct delayed_work	dad_work;
 
 	struct inet6_dev	*idev;
 	struct rt6_info		*rt;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 344e972..35d9486 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -133,10 +133,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
 static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
 static DEFINE_SPINLOCK(addrconf_hash_lock);
 
-static void addrconf_verify(unsigned long);
+static void addrconf_verify(void);
+static void addrconf_verify_rtnl(void);
+static void addrconf_verify_work(struct work_struct *);
 
-static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
-static DEFINE_SPINLOCK(addrconf_verify_lock);
+static struct workqueue_struct *addrconf_wq;
+static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);
 
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
@@ -151,7 +153,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 						  u32 flags, u32 noflags);
 
 static void addrconf_dad_start(struct inet6_ifaddr *ifp);
-static void addrconf_dad_timer(unsigned long data);
+static void addrconf_dad_work(struct work_struct *w);
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
 static void addrconf_dad_run(struct inet6_dev *idev);
 static void addrconf_rs_timer(unsigned long data);
@@ -247,9 +249,9 @@ static void addrconf_del_rs_timer(struct inet6_dev *idev)
 		__in6_dev_put(idev);
 }
 
-static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp)
+static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
 {
-	if (del_timer(&ifp->dad_timer))
+	if (cancel_delayed_work(&ifp->dad_work))
 		__in6_ifa_put(ifp);
 }
 
@@ -261,12 +263,12 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev,
 	mod_timer(&idev->rs_timer, jiffies + when);
 }
 
-static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp,
-				   unsigned long when)
+static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
+				   unsigned long delay)
 {
-	if (!timer_pending(&ifp->dad_timer))
+	if (!delayed_work_pending(&ifp->dad_work))
 		in6_ifa_hold(ifp);
-	mod_timer(&ifp->dad_timer, jiffies + when);
+	mod_delayed_work(addrconf_wq, &ifp->dad_work, delay);
 }
 
 static int snmp6_alloc_dev(struct inet6_dev *idev)
@@ -751,7 +753,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 
 	in6_dev_put(ifp->idev);
 
-	if (del_timer(&ifp->dad_timer))
+	if (cancel_delayed_work(&ifp->dad_work))
 		pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
 
 	if (ifp->state != INET6_IFADDR_STATE_DEAD) {
@@ -849,8 +851,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 
 	spin_lock_init(&ifa->lock);
 	spin_lock_init(&ifa->state_lock);
-	setup_timer(&ifa->dad_timer, addrconf_dad_timer,
-		    (unsigned long)ifa);
+	INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work);
 	INIT_HLIST_NODE(&ifa->addr_lst);
 	ifa->scope = scope;
 	ifa->prefix_len = pfxlen;
@@ -990,6 +991,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP;
 	unsigned long expires;
 
+	ASSERT_RTNL();
+
 	spin_lock_bh(&ifp->state_lock);
 	state = ifp->state;
 	ifp->state = INET6_IFADDR_STATE_DEAD;
@@ -1021,7 +1024,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 
 	write_unlock_bh(&ifp->idev->lock);
 
-	addrconf_del_dad_timer(ifp);
+	addrconf_del_dad_work(ifp);
 
 	ipv6_ifa_notify(RTM_DELADDR, ifp);
 
@@ -1604,7 +1607,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 {
 	if (ifp->flags&IFA_F_PERMANENT) {
 		spin_lock_bh(&ifp->lock);
-		addrconf_del_dad_timer(ifp);
+		addrconf_del_dad_work(ifp);
 		ifp->flags |= IFA_F_TENTATIVE;
 		if (dad_failed)
 			ifp->flags |= IFA_F_DADFAILED;
@@ -1625,20 +1628,21 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 			spin_unlock_bh(&ifp->lock);
 		}
 		ipv6_del_addr(ifp);
-	} else
+	} else {
 		ipv6_del_addr(ifp);
+	}
 }
 
 static int addrconf_dad_end(struct inet6_ifaddr *ifp)
 {
 	int err = -ENOENT;
 
-	spin_lock(&ifp->state_lock);
+	spin_lock_bh(&ifp->state_lock);
 	if (ifp->state == INET6_IFADDR_STATE_DAD) {
 		ifp->state = INET6_IFADDR_STATE_POSTDAD;
 		err = 0;
 	}
-	spin_unlock(&ifp->state_lock);
+	spin_unlock_bh(&ifp->state_lock);
 
 	return err;
 }
@@ -1671,7 +1675,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 		}
 	}
 
-	addrconf_dad_stop(ifp, 1);
+	spin_lock_bh(&ifp->state_lock);
+	/* transition from _POSTDAD to _ERRDAD */
+	ifp->state = INET6_IFADDR_STATE_ERRDAD;
+	spin_unlock_bh(&ifp->state_lock);
+
+	addrconf_mod_dad_work(ifp, 0);
 }
 
 /* Join to solicited addr multicast group. */
@@ -1680,6 +1689,8 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
 {
 	struct in6_addr maddr;
 
+	ASSERT_RTNL();
+
 	if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
 		return;
 
@@ -1691,6 +1702,8 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
 {
 	struct in6_addr maddr;
 
+	ASSERT_RTNL();
+
 	if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
 		return;
 
@@ -1701,6 +1714,9 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
 {
 	struct in6_addr addr;
+
+	ASSERT_RTNL();
+
 	if (ifp->prefix_len >= 127) /* RFC 6164 */
 		return;
 	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -1712,6 +1728,9 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
 {
 	struct in6_addr addr;
+
+	ASSERT_RTNL();
+
 	if (ifp->prefix_len >= 127) /* RFC 6164 */
 		return;
 	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -2271,11 +2290,13 @@ ok:
 				return;
 			}
 
-			ifp->flags |= IFA_F_MANAGETEMPADDR;
 			update_lft = 0;
 			create = 1;
+			spin_lock_bh(&ifp->lock);
+			ifp->flags |= IFA_F_MANAGETEMPADDR;
 			ifp->cstamp = jiffies;
 			ifp->tokenized = tokenized;
+			spin_unlock_bh(&ifp->lock);
 			addrconf_dad_start(ifp);
 		}
 
@@ -2326,7 +2347,7 @@ ok:
 					 create, now);
 
 			in6_ifa_put(ifp);
-			addrconf_verify(0);
+			addrconf_verify();
 		}
 	}
 	inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
@@ -2475,7 +2496,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
 			manage_tempaddrs(idev, ifp, valid_lft, prefered_lft,
 					 true, jiffies);
 		in6_ifa_put(ifp);
-		addrconf_verify(0);
+		addrconf_verify_rtnl();
 		return 0;
 	}
 
@@ -3011,7 +3032,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		hlist_for_each_entry_rcu(ifa, h, addr_lst) {
 			if (ifa->idev == idev) {
 				hlist_del_init_rcu(&ifa->addr_lst);
-				addrconf_del_dad_timer(ifa);
+				addrconf_del_dad_work(ifa);
 				goto restart;
 			}
 		}
@@ -3049,7 +3070,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	while (!list_empty(&idev->addr_list)) {
 		ifa = list_first_entry(&idev->addr_list,
 				       struct inet6_ifaddr, if_list);
-		addrconf_del_dad_timer(ifa);
+		addrconf_del_dad_work(ifa);
 
 		list_del(&ifa->if_list);
 
@@ -3148,10 +3169,10 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
 		rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1);
 
 	ifp->dad_probes = idev->cnf.dad_transmits;
-	addrconf_mod_dad_timer(ifp, rand_num);
+	addrconf_mod_dad_work(ifp, rand_num);
 }
 
-static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 {
 	struct inet6_dev *idev = ifp->idev;
 	struct net_device *dev = idev->dev;
@@ -3203,25 +3224,68 @@ out:
 	read_unlock_bh(&idev->lock);
 }
 
-static void addrconf_dad_timer(unsigned long data)
+static void addrconf_dad_start(struct inet6_ifaddr *ifp)
 {
-	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+	bool begin_dad = false;
+
+	spin_lock_bh(&ifp->state_lock);
+	if (ifp->state != INET6_IFADDR_STATE_DEAD) {
+		ifp->state = INET6_IFADDR_STATE_PREDAD;
+		begin_dad = true;
+	}
+	spin_unlock_bh(&ifp->state_lock);
+
+	if (begin_dad)
+		addrconf_mod_dad_work(ifp, 0);
+}
+
+static void addrconf_dad_work(struct work_struct *w)
+{
+	struct inet6_ifaddr *ifp = container_of(to_delayed_work(w),
+						struct inet6_ifaddr,
+						dad_work);
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr mcaddr;
 
+	enum {
+		DAD_PROCESS,
+		DAD_BEGIN,
+		DAD_ABORT,
+	} action = DAD_PROCESS;
+
+	rtnl_lock();
+
+	spin_lock_bh(&ifp->state_lock);
+	if (ifp->state == INET6_IFADDR_STATE_PREDAD) {
+		action = DAD_BEGIN;
+		ifp->state = INET6_IFADDR_STATE_DAD;
+	} else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) {
+		action = DAD_ABORT;
+		ifp->state = INET6_IFADDR_STATE_POSTDAD;
+	}
+	spin_unlock_bh(&ifp->state_lock);
+
+	if (action == DAD_BEGIN) {
+		addrconf_dad_begin(ifp);
+		goto out;
+	} else if (action == DAD_ABORT) {
+		addrconf_dad_stop(ifp, 1);
+		goto out;
+	}
+
 	if (!ifp->dad_probes && addrconf_dad_end(ifp))
 		goto out;
 
-	write_lock(&idev->lock);
+	write_lock_bh(&idev->lock);
 	if (idev->dead || !(idev->if_flags & IF_READY)) {
-		write_unlock(&idev->lock);
+		write_unlock_bh(&idev->lock);
 		goto out;
 	}
 
 	spin_lock(&ifp->lock);
 	if (ifp->state == INET6_IFADDR_STATE_DEAD) {
 		spin_unlock(&ifp->lock);
-		write_unlock(&idev->lock);
+		write_unlock_bh(&idev->lock);
 		goto out;
 	}
 
@@ -3232,7 +3296,7 @@ static void addrconf_dad_timer(unsigned long data)
 
 		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
 		spin_unlock(&ifp->lock);
-		write_unlock(&idev->lock);
+		write_unlock_bh(&idev->lock);
 
 		addrconf_dad_completed(ifp);
 
@@ -3240,16 +3304,17 @@ static void addrconf_dad_timer(unsigned long data)
 	}
 
 	ifp->dad_probes--;
-	addrconf_mod_dad_timer(ifp,
-			       NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));
+	addrconf_mod_dad_work(ifp,
+			      NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));
 	spin_unlock(&ifp->lock);
-	write_unlock(&idev->lock);
+	write_unlock_bh(&idev->lock);
 
 	/* send a neighbour solicitation for our addr */
 	addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
 	ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
 out:
 	in6_ifa_put(ifp);
+	rtnl_unlock();
 }
 
 /* ifp->idev must be at least read locked */
@@ -3276,7 +3341,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 	struct in6_addr lladdr;
 	bool send_rs, send_mld;
 
-	addrconf_del_dad_timer(ifp);
+	addrconf_del_dad_work(ifp);
 
 	/*
 	 *	Configure the address for reception. Now it is valid.
@@ -3517,23 +3582,23 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
  *	Periodic address status verification
  */
 
-static void addrconf_verify(unsigned long foo)
+static void addrconf_verify_rtnl(void)
 {
 	unsigned long now, next, next_sec, next_sched;
 	struct inet6_ifaddr *ifp;
 	int i;
 
+	ASSERT_RTNL();
+
 	rcu_read_lock_bh();
-	spin_lock(&addrconf_verify_lock);
 	now = jiffies;
 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
 
-	del_timer(&addr_chk_timer);
+	cancel_delayed_work(&addr_chk_work);
 
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 restart:
-		hlist_for_each_entry_rcu_bh(ifp,
-					 &inet6_addr_lst[i], addr_lst) {
+		hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
 
 			/* When setting preferred_lft to a value not zero or
@@ -3628,13 +3693,22 @@ restart:
 
 	ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
 	      now, next, next_sec, next_sched);
-
-	addr_chk_timer.expires = next_sched;
-	add_timer(&addr_chk_timer);
-	spin_unlock(&addrconf_verify_lock);
+	mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
 	rcu_read_unlock_bh();
 }
 
+static void addrconf_verify_work(struct work_struct *w)
+{
+	rtnl_lock();
+	addrconf_verify_rtnl();
+	rtnl_unlock();
+}
+
+static void addrconf_verify(void)
+{
+	mod_delayed_work(addrconf_wq, &addr_chk_work, 0);
+}
+
 static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
 				     struct in6_addr **peer_pfx)
 {
@@ -3691,6 +3765,8 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 	bool was_managetempaddr;
 	bool had_prefixroute;
 
+	ASSERT_RTNL();
+
 	if (!valid_lft || (prefered_lft > valid_lft))
 		return -EINVAL;
 
@@ -3756,7 +3832,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 				 !was_managetempaddr, jiffies);
 	}
 
-	addrconf_verify(0);
+	addrconf_verify_rtnl();
 
 	return 0;
 }
@@ -4386,6 +4462,8 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
 	bool update_rs = false;
 	struct in6_addr ll_addr;
 
+	ASSERT_RTNL();
+
 	if (token == NULL)
 		return -EINVAL;
 	if (ipv6_addr_any(token))
@@ -4434,7 +4512,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
 	}
 
 	write_unlock_bh(&idev->lock);
-	addrconf_verify(0);
+	addrconf_verify_rtnl();
 	return 0;
 }
 
@@ -4636,6 +4714,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 {
 	struct net *net = dev_net(ifp->idev->dev);
 
+	if (event)
+		ASSERT_RTNL();
+
 	inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
 
 	switch (event) {
@@ -5244,6 +5325,12 @@ int __init addrconf_init(void)
 	if (err < 0)
 		goto out_addrlabel;
 
+	addrconf_wq = create_workqueue("ipv6_addrconf");
+	if (!addrconf_wq) {
+		err = -ENOMEM;
+		goto out_nowq;
+	}
+
 	/* The addrconf netdev notifier requires that loopback_dev
 	 * has it's ipv6 private information allocated and setup
 	 * before it can bring up and give link-local addresses
@@ -5274,7 +5361,7 @@ int __init addrconf_init(void)
 
 	register_netdevice_notifier(&ipv6_dev_notf);
 
-	addrconf_verify(0);
+	addrconf_verify();
 
 	rtnl_af_register(&inet6_ops);
 
@@ -5302,6 +5389,8 @@ errout:
 	rtnl_af_unregister(&inet6_ops);
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 errlo:
+	destroy_workqueue(addrconf_wq);
+out_nowq:
 	unregister_pernet_subsys(&addrconf_ops);
 out_addrlabel:
 	ipv6_addr_label_cleanup();
@@ -5337,7 +5426,8 @@ void addrconf_cleanup(void)
 	for (i = 0; i < IN6_ADDR_HSIZE; i++)
 		WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
 	spin_unlock_bh(&addrconf_hash_lock);
-
-	del_timer(&addr_chk_timer);
+	cancel_delayed_work(&addr_chk_work);
 	rtnl_unlock();
+
+	destroy_workqueue(addrconf_wq);
 }
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 net] ipv6: move DAD and addrconf_verify processing to workqueue
  2014-03-26  6:17         ` [PATCH v2 " Hannes Frederic Sowa
@ 2014-03-27 17:15           ` David Miller
  2014-03-27 17:28             ` [PATCH v3 " Hannes Frederic Sowa
  0 siblings, 1 reply; 17+ messages in thread
From: David Miller @ 2014-03-27 17:15 UTC (permalink / raw)
  To: hannes; +Cc: stephen, netdev

From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 26 Mar 2014 07:17:45 +0100

> @@ -751,7 +753,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
>  
>  	in6_dev_put(ifp->idev);
>  
> -	if (del_timer(&ifp->dad_timer))
> +	if (cancel_delayed_work(&ifp->dad_work))
>  		pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
>  
>  	if (ifp->state != INET6_IFADDR_STATE_DEAD) {

This pr_notice() needs to be adjusted, we are no longer using a timer.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v3 net] ipv6: move DAD and addrconf_verify processing to workqueue
  2014-03-27 17:15           ` David Miller
@ 2014-03-27 17:28             ` Hannes Frederic Sowa
  2014-03-28 20:57               ` David Miller
  0 siblings, 1 reply; 17+ messages in thread
From: Hannes Frederic Sowa @ 2014-03-27 17:28 UTC (permalink / raw)
  To: David Miller; +Cc: stephen, netdev

addrconf_join_solict and addrconf_join_anycast may cause actions which
need rtnl locked, especially on first address creation.

A new DAD state is introduced which deferres processing of the initial
DAD processing into a workqueue.

To get rtnl lock we need to push the code paths which depend on those
calls up to workqueues, specifically addrconf_verify and the DAD
processing.

(v2)
addrconf_dad_failure needs to be queued up to the workqueue, too. This
patch introduces a new DAD state and stop the DAD processing in the
workqueue (this is because of the possible ipv6_del_addr processing
which removes the solicited multicast address from the device).

addrconf_verify_lock is removed, too. After the transition it is not
needed any more.

As we are not processing in bottom half anymore we need to be a bit more
careful about disabling bottom half out when we lock spin_locks which are also
used in bh.

Relevant backtrace:
[  541.030090] RTNL: assertion failed at net/core/dev.c (4496)
[  541.031143] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           O 3.10.33-1-amd64-vyatta #1
[  541.031145] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
[  541.031146]  ffffffff8148a9f0 000000000000002f ffffffff813c98c1 ffff88007c4451f8
[  541.031148]  0000000000000000 0000000000000000 ffffffff813d3540 ffff88007fc03d18
[  541.031150]  0000880000000006 ffff88007c445000 ffffffffa0194160 0000000000000000
[  541.031152] Call Trace:
[  541.031153]  <IRQ>  [<ffffffff8148a9f0>] ? dump_stack+0xd/0x17
[  541.031180]  [<ffffffff813c98c1>] ? __dev_set_promiscuity+0x101/0x180
[  541.031183]  [<ffffffff813d3540>] ? __hw_addr_create_ex+0x60/0xc0
[  541.031185]  [<ffffffff813cfe1a>] ? __dev_set_rx_mode+0xaa/0xc0
[  541.031189]  [<ffffffff813d3a81>] ? __dev_mc_add+0x61/0x90
[  541.031198]  [<ffffffffa01dcf9c>] ? igmp6_group_added+0xfc/0x1a0 [ipv6]
[  541.031208]  [<ffffffff8111237b>] ? kmem_cache_alloc+0xcb/0xd0
[  541.031212]  [<ffffffffa01ddcd7>] ? ipv6_dev_mc_inc+0x267/0x300 [ipv6]
[  541.031216]  [<ffffffffa01c2fae>] ? addrconf_join_solict+0x2e/0x40 [ipv6]
[  541.031219]  [<ffffffffa01ba2e9>] ? ipv6_dev_ac_inc+0x159/0x1f0 [ipv6]
[  541.031223]  [<ffffffffa01c0772>] ? addrconf_join_anycast+0x92/0xa0 [ipv6]
[  541.031226]  [<ffffffffa01c311e>] ? __ipv6_ifa_notify+0x11e/0x1e0 [ipv6]
[  541.031229]  [<ffffffffa01c3213>] ? ipv6_ifa_notify+0x33/0x50 [ipv6]
[  541.031233]  [<ffffffffa01c36c8>] ? addrconf_dad_completed+0x28/0x100 [ipv6]
[  541.031241]  [<ffffffff81075c1d>] ? task_cputime+0x2d/0x50
[  541.031244]  [<ffffffffa01c38d6>] ? addrconf_dad_timer+0x136/0x150 [ipv6]
[  541.031247]  [<ffffffffa01c37a0>] ? addrconf_dad_completed+0x100/0x100 [ipv6]
[  541.031255]  [<ffffffff8105313a>] ? call_timer_fn.isra.22+0x2a/0x90
[  541.031258]  [<ffffffffa01c37a0>] ? addrconf_dad_completed+0x100/0x100 [ipv6]

Hunks and backtrace stolen from a patch by Stephen Hemminger.

Reported-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
v3 (only minor change):
* reword pr_notice in inet6_ifa_finish_destroy (thanks, David!)

 include/net/if_inet6.h |   4 +-
 net/ipv6/addrconf.c    | 193 ++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 145 insertions(+), 52 deletions(-)

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 9650a3f..b4956a5 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -31,8 +31,10 @@
 #define IF_PREFIX_AUTOCONF	0x02
 
 enum {
+	INET6_IFADDR_STATE_PREDAD,
 	INET6_IFADDR_STATE_DAD,
 	INET6_IFADDR_STATE_POSTDAD,
+	INET6_IFADDR_STATE_ERRDAD,
 	INET6_IFADDR_STATE_UP,
 	INET6_IFADDR_STATE_DEAD,
 };
@@ -58,7 +60,7 @@ struct inet6_ifaddr {
 	unsigned long		cstamp;	/* created timestamp */
 	unsigned long		tstamp; /* updated timestamp */
 
-	struct timer_list	dad_timer;
+	struct delayed_work	dad_work;
 
 	struct inet6_dev	*idev;
 	struct rt6_info		*rt;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 344e972..6c7fa08 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -133,10 +133,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
 static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
 static DEFINE_SPINLOCK(addrconf_hash_lock);
 
-static void addrconf_verify(unsigned long);
+static void addrconf_verify(void);
+static void addrconf_verify_rtnl(void);
+static void addrconf_verify_work(struct work_struct *);
 
-static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
-static DEFINE_SPINLOCK(addrconf_verify_lock);
+static struct workqueue_struct *addrconf_wq;
+static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);
 
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
@@ -151,7 +153,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 						  u32 flags, u32 noflags);
 
 static void addrconf_dad_start(struct inet6_ifaddr *ifp);
-static void addrconf_dad_timer(unsigned long data);
+static void addrconf_dad_work(struct work_struct *w);
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
 static void addrconf_dad_run(struct inet6_dev *idev);
 static void addrconf_rs_timer(unsigned long data);
@@ -247,9 +249,9 @@ static void addrconf_del_rs_timer(struct inet6_dev *idev)
 		__in6_dev_put(idev);
 }
 
-static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp)
+static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
 {
-	if (del_timer(&ifp->dad_timer))
+	if (cancel_delayed_work(&ifp->dad_work))
 		__in6_ifa_put(ifp);
 }
 
@@ -261,12 +263,12 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev,
 	mod_timer(&idev->rs_timer, jiffies + when);
 }
 
-static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp,
-				   unsigned long when)
+static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
+				   unsigned long delay)
 {
-	if (!timer_pending(&ifp->dad_timer))
+	if (!delayed_work_pending(&ifp->dad_work))
 		in6_ifa_hold(ifp);
-	mod_timer(&ifp->dad_timer, jiffies + when);
+	mod_delayed_work(addrconf_wq, &ifp->dad_work, delay);
 }
 
 static int snmp6_alloc_dev(struct inet6_dev *idev)
@@ -751,8 +753,9 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 
 	in6_dev_put(ifp->idev);
 
-	if (del_timer(&ifp->dad_timer))
-		pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
+	if (cancel_delayed_work(&ifp->dad_work))
+		pr_notice("delayed DAD work was pending while freeing ifa=%p\n",
+			  ifp);
 
 	if (ifp->state != INET6_IFADDR_STATE_DEAD) {
 		pr_warn("Freeing alive inet6 address %p\n", ifp);
@@ -849,8 +852,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 
 	spin_lock_init(&ifa->lock);
 	spin_lock_init(&ifa->state_lock);
-	setup_timer(&ifa->dad_timer, addrconf_dad_timer,
-		    (unsigned long)ifa);
+	INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work);
 	INIT_HLIST_NODE(&ifa->addr_lst);
 	ifa->scope = scope;
 	ifa->prefix_len = pfxlen;
@@ -990,6 +992,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP;
 	unsigned long expires;
 
+	ASSERT_RTNL();
+
 	spin_lock_bh(&ifp->state_lock);
 	state = ifp->state;
 	ifp->state = INET6_IFADDR_STATE_DEAD;
@@ -1021,7 +1025,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 
 	write_unlock_bh(&ifp->idev->lock);
 
-	addrconf_del_dad_timer(ifp);
+	addrconf_del_dad_work(ifp);
 
 	ipv6_ifa_notify(RTM_DELADDR, ifp);
 
@@ -1604,7 +1608,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 {
 	if (ifp->flags&IFA_F_PERMANENT) {
 		spin_lock_bh(&ifp->lock);
-		addrconf_del_dad_timer(ifp);
+		addrconf_del_dad_work(ifp);
 		ifp->flags |= IFA_F_TENTATIVE;
 		if (dad_failed)
 			ifp->flags |= IFA_F_DADFAILED;
@@ -1625,20 +1629,21 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 			spin_unlock_bh(&ifp->lock);
 		}
 		ipv6_del_addr(ifp);
-	} else
+	} else {
 		ipv6_del_addr(ifp);
+	}
 }
 
 static int addrconf_dad_end(struct inet6_ifaddr *ifp)
 {
 	int err = -ENOENT;
 
-	spin_lock(&ifp->state_lock);
+	spin_lock_bh(&ifp->state_lock);
 	if (ifp->state == INET6_IFADDR_STATE_DAD) {
 		ifp->state = INET6_IFADDR_STATE_POSTDAD;
 		err = 0;
 	}
-	spin_unlock(&ifp->state_lock);
+	spin_unlock_bh(&ifp->state_lock);
 
 	return err;
 }
@@ -1671,7 +1676,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 		}
 	}
 
-	addrconf_dad_stop(ifp, 1);
+	spin_lock_bh(&ifp->state_lock);
+	/* transition from _POSTDAD to _ERRDAD */
+	ifp->state = INET6_IFADDR_STATE_ERRDAD;
+	spin_unlock_bh(&ifp->state_lock);
+
+	addrconf_mod_dad_work(ifp, 0);
 }
 
 /* Join to solicited addr multicast group. */
@@ -1680,6 +1690,8 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
 {
 	struct in6_addr maddr;
 
+	ASSERT_RTNL();
+
 	if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
 		return;
 
@@ -1691,6 +1703,8 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
 {
 	struct in6_addr maddr;
 
+	ASSERT_RTNL();
+
 	if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
 		return;
 
@@ -1701,6 +1715,9 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
 {
 	struct in6_addr addr;
+
+	ASSERT_RTNL();
+
 	if (ifp->prefix_len >= 127) /* RFC 6164 */
 		return;
 	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -1712,6 +1729,9 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
 {
 	struct in6_addr addr;
+
+	ASSERT_RTNL();
+
 	if (ifp->prefix_len >= 127) /* RFC 6164 */
 		return;
 	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -2271,11 +2291,13 @@ ok:
 				return;
 			}
 
-			ifp->flags |= IFA_F_MANAGETEMPADDR;
 			update_lft = 0;
 			create = 1;
+			spin_lock_bh(&ifp->lock);
+			ifp->flags |= IFA_F_MANAGETEMPADDR;
 			ifp->cstamp = jiffies;
 			ifp->tokenized = tokenized;
+			spin_unlock_bh(&ifp->lock);
 			addrconf_dad_start(ifp);
 		}
 
@@ -2326,7 +2348,7 @@ ok:
 					 create, now);
 
 			in6_ifa_put(ifp);
-			addrconf_verify(0);
+			addrconf_verify();
 		}
 	}
 	inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
@@ -2475,7 +2497,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
 			manage_tempaddrs(idev, ifp, valid_lft, prefered_lft,
 					 true, jiffies);
 		in6_ifa_put(ifp);
-		addrconf_verify(0);
+		addrconf_verify_rtnl();
 		return 0;
 	}
 
@@ -3011,7 +3033,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		hlist_for_each_entry_rcu(ifa, h, addr_lst) {
 			if (ifa->idev == idev) {
 				hlist_del_init_rcu(&ifa->addr_lst);
-				addrconf_del_dad_timer(ifa);
+				addrconf_del_dad_work(ifa);
 				goto restart;
 			}
 		}
@@ -3049,7 +3071,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	while (!list_empty(&idev->addr_list)) {
 		ifa = list_first_entry(&idev->addr_list,
 				       struct inet6_ifaddr, if_list);
-		addrconf_del_dad_timer(ifa);
+		addrconf_del_dad_work(ifa);
 
 		list_del(&ifa->if_list);
 
@@ -3148,10 +3170,10 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
 		rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1);
 
 	ifp->dad_probes = idev->cnf.dad_transmits;
-	addrconf_mod_dad_timer(ifp, rand_num);
+	addrconf_mod_dad_work(ifp, rand_num);
 }
 
-static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 {
 	struct inet6_dev *idev = ifp->idev;
 	struct net_device *dev = idev->dev;
@@ -3203,25 +3225,68 @@ out:
 	read_unlock_bh(&idev->lock);
 }
 
-static void addrconf_dad_timer(unsigned long data)
+static void addrconf_dad_start(struct inet6_ifaddr *ifp)
 {
-	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+	bool begin_dad = false;
+
+	spin_lock_bh(&ifp->state_lock);
+	if (ifp->state != INET6_IFADDR_STATE_DEAD) {
+		ifp->state = INET6_IFADDR_STATE_PREDAD;
+		begin_dad = true;
+	}
+	spin_unlock_bh(&ifp->state_lock);
+
+	if (begin_dad)
+		addrconf_mod_dad_work(ifp, 0);
+}
+
+static void addrconf_dad_work(struct work_struct *w)
+{
+	struct inet6_ifaddr *ifp = container_of(to_delayed_work(w),
+						struct inet6_ifaddr,
+						dad_work);
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr mcaddr;
 
+	enum {
+		DAD_PROCESS,
+		DAD_BEGIN,
+		DAD_ABORT,
+	} action = DAD_PROCESS;
+
+	rtnl_lock();
+
+	spin_lock_bh(&ifp->state_lock);
+	if (ifp->state == INET6_IFADDR_STATE_PREDAD) {
+		action = DAD_BEGIN;
+		ifp->state = INET6_IFADDR_STATE_DAD;
+	} else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) {
+		action = DAD_ABORT;
+		ifp->state = INET6_IFADDR_STATE_POSTDAD;
+	}
+	spin_unlock_bh(&ifp->state_lock);
+
+	if (action == DAD_BEGIN) {
+		addrconf_dad_begin(ifp);
+		goto out;
+	} else if (action == DAD_ABORT) {
+		addrconf_dad_stop(ifp, 1);
+		goto out;
+	}
+
 	if (!ifp->dad_probes && addrconf_dad_end(ifp))
 		goto out;
 
-	write_lock(&idev->lock);
+	write_lock_bh(&idev->lock);
 	if (idev->dead || !(idev->if_flags & IF_READY)) {
-		write_unlock(&idev->lock);
+		write_unlock_bh(&idev->lock);
 		goto out;
 	}
 
 	spin_lock(&ifp->lock);
 	if (ifp->state == INET6_IFADDR_STATE_DEAD) {
 		spin_unlock(&ifp->lock);
-		write_unlock(&idev->lock);
+		write_unlock_bh(&idev->lock);
 		goto out;
 	}
 
@@ -3232,7 +3297,7 @@ static void addrconf_dad_timer(unsigned long data)
 
 		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
 		spin_unlock(&ifp->lock);
-		write_unlock(&idev->lock);
+		write_unlock_bh(&idev->lock);
 
 		addrconf_dad_completed(ifp);
 
@@ -3240,16 +3305,17 @@ static void addrconf_dad_timer(unsigned long data)
 	}
 
 	ifp->dad_probes--;
-	addrconf_mod_dad_timer(ifp,
-			       NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));
+	addrconf_mod_dad_work(ifp,
+			      NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));
 	spin_unlock(&ifp->lock);
-	write_unlock(&idev->lock);
+	write_unlock_bh(&idev->lock);
 
 	/* send a neighbour solicitation for our addr */
 	addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
 	ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
 out:
 	in6_ifa_put(ifp);
+	rtnl_unlock();
 }
 
 /* ifp->idev must be at least read locked */
@@ -3276,7 +3342,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 	struct in6_addr lladdr;
 	bool send_rs, send_mld;
 
-	addrconf_del_dad_timer(ifp);
+	addrconf_del_dad_work(ifp);
 
 	/*
 	 *	Configure the address for reception. Now it is valid.
@@ -3517,23 +3583,23 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
  *	Periodic address status verification
  */
 
-static void addrconf_verify(unsigned long foo)
+static void addrconf_verify_rtnl(void)
 {
 	unsigned long now, next, next_sec, next_sched;
 	struct inet6_ifaddr *ifp;
 	int i;
 
+	ASSERT_RTNL();
+
 	rcu_read_lock_bh();
-	spin_lock(&addrconf_verify_lock);
 	now = jiffies;
 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
 
-	del_timer(&addr_chk_timer);
+	cancel_delayed_work(&addr_chk_work);
 
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 restart:
-		hlist_for_each_entry_rcu_bh(ifp,
-					 &inet6_addr_lst[i], addr_lst) {
+		hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
 
 			/* When setting preferred_lft to a value not zero or
@@ -3628,13 +3694,22 @@ restart:
 
 	ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
 	      now, next, next_sec, next_sched);
-
-	addr_chk_timer.expires = next_sched;
-	add_timer(&addr_chk_timer);
-	spin_unlock(&addrconf_verify_lock);
+	mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
 	rcu_read_unlock_bh();
 }
 
+static void addrconf_verify_work(struct work_struct *w)
+{
+	rtnl_lock();
+	addrconf_verify_rtnl();
+	rtnl_unlock();
+}
+
+static void addrconf_verify(void)
+{
+	mod_delayed_work(addrconf_wq, &addr_chk_work, 0);
+}
+
 static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
 				     struct in6_addr **peer_pfx)
 {
@@ -3691,6 +3766,8 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 	bool was_managetempaddr;
 	bool had_prefixroute;
 
+	ASSERT_RTNL();
+
 	if (!valid_lft || (prefered_lft > valid_lft))
 		return -EINVAL;
 
@@ -3756,7 +3833,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 				 !was_managetempaddr, jiffies);
 	}
 
-	addrconf_verify(0);
+	addrconf_verify_rtnl();
 
 	return 0;
 }
@@ -4386,6 +4463,8 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
 	bool update_rs = false;
 	struct in6_addr ll_addr;
 
+	ASSERT_RTNL();
+
 	if (token == NULL)
 		return -EINVAL;
 	if (ipv6_addr_any(token))
@@ -4434,7 +4513,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
 	}
 
 	write_unlock_bh(&idev->lock);
-	addrconf_verify(0);
+	addrconf_verify_rtnl();
 	return 0;
 }
 
@@ -4636,6 +4715,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 {
 	struct net *net = dev_net(ifp->idev->dev);
 
+	if (event)
+		ASSERT_RTNL();
+
 	inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
 
 	switch (event) {
@@ -5244,6 +5326,12 @@ int __init addrconf_init(void)
 	if (err < 0)
 		goto out_addrlabel;
 
+	addrconf_wq = create_workqueue("ipv6_addrconf");
+	if (!addrconf_wq) {
+		err = -ENOMEM;
+		goto out_nowq;
+	}
+
 	/* The addrconf netdev notifier requires that loopback_dev
 	 * has it's ipv6 private information allocated and setup
 	 * before it can bring up and give link-local addresses
@@ -5274,7 +5362,7 @@ int __init addrconf_init(void)
 
 	register_netdevice_notifier(&ipv6_dev_notf);
 
-	addrconf_verify(0);
+	addrconf_verify();
 
 	rtnl_af_register(&inet6_ops);
 
@@ -5302,6 +5390,8 @@ errout:
 	rtnl_af_unregister(&inet6_ops);
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 errlo:
+	destroy_workqueue(addrconf_wq);
+out_nowq:
 	unregister_pernet_subsys(&addrconf_ops);
 out_addrlabel:
 	ipv6_addr_label_cleanup();
@@ -5337,7 +5427,8 @@ void addrconf_cleanup(void)
 	for (i = 0; i < IN6_ADDR_HSIZE; i++)
 		WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
 	spin_unlock_bh(&addrconf_hash_lock);
-
-	del_timer(&addr_chk_timer);
+	cancel_delayed_work(&addr_chk_work);
 	rtnl_unlock();
+
+	destroy_workqueue(addrconf_wq);
 }
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 net] ipv6: move DAD and addrconf_verify processing to workqueue
  2014-03-27 17:28             ` [PATCH v3 " Hannes Frederic Sowa
@ 2014-03-28 20:57               ` David Miller
  0 siblings, 0 replies; 17+ messages in thread
From: David Miller @ 2014-03-28 20:57 UTC (permalink / raw)
  To: hannes; +Cc: stephen, netdev

From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Thu, 27 Mar 2014 18:28:07 +0100

> addrconf_join_solict and addrconf_join_anycast may cause actions which
> need rtnl locked, especially on first address creation.
> 
> A new DAD state is introduced which deferres processing of the initial
> DAD processing into a workqueue.
> 
> To get rtnl lock we need to push the code paths which depend on those
> calls up to workqueues, specifically addrconf_verify and the DAD
> processing.
> 
> (v2)
> addrconf_dad_failure needs to be queued up to the workqueue, too. This
> patch introduces a new DAD state and stop the DAD processing in the
> workqueue (this is because of the possible ipv6_del_addr processing
> which removes the solicited multicast address from the device).
> 
> addrconf_verify_lock is removed, too. After the transition it is not
> needed any more.
> 
> As we are not processing in bottom half anymore we need to be a bit more
> careful about disabling bottom half out when we lock spin_locks which are also
> used in bh.
> 
> Relevant backtrace:
 ...
> Hunks and backtrace stolen from a patch by Stephen Hemminger.
> 
> Reported-by: Stephen Hemminger <stephen@networkplumber.org>
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> ---
> v3 (only minor change):
> * reword pr_notice in inet6_ifa_finish_destroy (thanks, David!)

Applied, thanks a lot Hannes.

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2014-03-28 20:57 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-03-17 23:18 [PATCH net] ipv6: fix RTNL assert fail in DAD Stephen Hemminger
2014-03-18  0:29 ` Hannes Frederic Sowa
2014-03-19  0:54   ` Stephen Hemminger
2014-03-19  4:17     ` David Miller
2014-03-19  6:58       ` Stephen Hemminger
2014-03-19 17:53         ` David Miller
2014-03-19 22:44           ` Hannes Frederic Sowa
2014-03-20  3:52             ` David Miller
2014-03-20  6:38               ` Hannes Frederic Sowa
2014-03-23  2:03 ` Hannes Frederic Sowa
2014-03-25  8:03   ` [PATCH net] ipv6: move DAD and addrconf_verify processing to workqueue Hannes Frederic Sowa
2014-03-25 21:20     ` Stephen Hemminger
2014-03-26  0:00       ` Hannes Frederic Sowa
2014-03-26  6:17         ` [PATCH v2 " Hannes Frederic Sowa
2014-03-27 17:15           ` David Miller
2014-03-27 17:28             ` [PATCH v3 " Hannes Frederic Sowa
2014-03-28 20:57               ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.