All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] vxlan: synchronously and race-free destruction of vxlan sockets
@ 2016-04-07 14:57 Hannes Frederic Sowa
  2016-04-08 18:51 ` Marcelo Ricardo Leitner
  0 siblings, 1 reply; 4+ messages in thread
From: Hannes Frederic Sowa @ 2016-04-07 14:57 UTC (permalink / raw)
  To: netdev; +Cc: Jiri Benc

Due to the fact that the udp socket is destructed asynchronously in a
work queue, we have some nondeterministic behavior during shutdown of
vxlan tunnels and creating new ones. Fix this by keeping the destruction
process synchronous in regards to the user space process so IFF_UP can
be reliably set.

udp_tunnel_sock_release destroys vs->sock->sk if reference counter
indicates so. We expect to have the same lifetime of vxlan_sock and
vxlan_sock->sock->sk even in fast paths with only rcu locks held. So
only destruct the whole socket after we can be sure it cannot be found
by searching vxlan_net->sock_list.

Cc: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 drivers/net/vxlan.c | 20 +++-----------------
 include/net/vxlan.h |  2 --
 2 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 1c0fa364323e28..487e48b7a53090 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -98,7 +98,6 @@ struct vxlan_fdb {
 
 /* salt for hash table */
 static u32 vxlan_salt __read_mostly;
-static struct workqueue_struct *vxlan_wq;
 
 static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
 {
@@ -1065,7 +1064,9 @@ static void __vxlan_sock_release(struct vxlan_sock *vs)
 	vxlan_notify_del_rx_port(vs);
 	spin_unlock(&vn->sock_lock);
 
-	queue_work(vxlan_wq, &vs->del_work);
+	synchronize_rcu();
+	udp_tunnel_sock_release(vs->sock);
+	kfree(vs);
 }
 
 static void vxlan_sock_release(struct vxlan_dev *vxlan)
@@ -2574,13 +2575,6 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
 	.get_link	= ethtool_op_get_link,
 };
 
-static void vxlan_del_work(struct work_struct *work)
-{
-	struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work);
-	udp_tunnel_sock_release(vs->sock);
-	kfree_rcu(vs, rcu);
-}
-
 static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
 					__be16 port, u32 flags)
 {
@@ -2626,8 +2620,6 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
 	for (h = 0; h < VNI_HASH_SIZE; ++h)
 		INIT_HLIST_HEAD(&vs->vni_list[h]);
 
-	INIT_WORK(&vs->del_work, vxlan_del_work);
-
 	sock = vxlan_create_sock(net, ipv6, port, flags);
 	if (IS_ERR(sock)) {
 		pr_info("Cannot bind port %d, err=%ld\n", ntohs(port),
@@ -3218,10 +3210,6 @@ static int __init vxlan_init_module(void)
 {
 	int rc;
 
-	vxlan_wq = alloc_workqueue("vxlan", 0, 0);
-	if (!vxlan_wq)
-		return -ENOMEM;
-
 	get_random_bytes(&vxlan_salt, sizeof(vxlan_salt));
 
 	rc = register_pernet_subsys(&vxlan_net_ops);
@@ -3242,7 +3230,6 @@ out3:
 out2:
 	unregister_pernet_subsys(&vxlan_net_ops);
 out1:
-	destroy_workqueue(vxlan_wq);
 	return rc;
 }
 late_initcall(vxlan_init_module);
@@ -3251,7 +3238,6 @@ static void __exit vxlan_cleanup_module(void)
 {
 	rtnl_link_unregister(&vxlan_link_ops);
 	unregister_netdevice_notifier(&vxlan_notifier_block);
-	destroy_workqueue(vxlan_wq);
 	unregister_pernet_subsys(&vxlan_net_ops);
 	/* rcu_barrier() is called by netns */
 }
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 73ed2e951c020d..2113f808e905a4 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -126,9 +126,7 @@ struct vxlan_metadata {
 /* per UDP socket information */
 struct vxlan_sock {
 	struct hlist_node hlist;
-	struct work_struct del_work;
 	struct socket	 *sock;
-	struct rcu_head	  rcu;
 	struct hlist_head vni_list[VNI_HASH_SIZE];
 	atomic_t	  refcnt;
 	struct udp_offload udp_offloads;
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH net] vxlan: synchronously and race-free destruction of vxlan sockets
  2016-04-07 14:57 [PATCH net] vxlan: synchronously and race-free destruction of vxlan sockets Hannes Frederic Sowa
@ 2016-04-08 18:51 ` Marcelo Ricardo Leitner
  2016-04-08 20:30   ` Hannes Frederic Sowa
  0 siblings, 1 reply; 4+ messages in thread
From: Marcelo Ricardo Leitner @ 2016-04-08 18:51 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: netdev, Jiri Benc

Hi Hannes,

On Thu, Apr 07, 2016 at 04:57:40PM +0200, Hannes Frederic Sowa wrote:
> Due to the fact that the udp socket is destructed asynchronously in a
> work queue, we have some nondeterministic behavior during shutdown of
> vxlan tunnels and creating new ones. Fix this by keeping the destruction
> process synchronous in regards to the user space process so IFF_UP can
> be reliably set.
> 
> udp_tunnel_sock_release destroys vs->sock->sk if reference counter
> indicates so. We expect to have the same lifetime of vxlan_sock and
> vxlan_sock->sock->sk even in fast paths with only rcu locks held. So
> only destruct the whole socket after we can be sure it cannot be found
> by searching vxlan_net->sock_list.
> 
> Cc: Jiri Benc <jbenc@redhat.com>
> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> ---
>  drivers/net/vxlan.c | 20 +++-----------------
>  include/net/vxlan.h |  2 --
>  2 files changed, 3 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index 1c0fa364323e28..487e48b7a53090 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -98,7 +98,6 @@ struct vxlan_fdb {
>  
>  /* salt for hash table */
>  static u32 vxlan_salt __read_mostly;
> -static struct workqueue_struct *vxlan_wq;
>  
>  static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
>  {
> @@ -1065,7 +1064,9 @@ static void __vxlan_sock_release(struct vxlan_sock *vs)
>  	vxlan_notify_del_rx_port(vs);
>  	spin_unlock(&vn->sock_lock);
>  
> -	queue_work(vxlan_wq, &vs->del_work);
> +	synchronize_rcu();

__vxlan_sock_release is called by vxlan_sock_release which is called by
vxlan_open/stop. Do we really want to have synchronize_rcu() while
holding rtnl?

> +	udp_tunnel_sock_release(vs->sock);
> +	kfree(vs);
>  }
>  
>  static void vxlan_sock_release(struct vxlan_dev *vxlan)
> @@ -2574,13 +2575,6 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
>  	.get_link	= ethtool_op_get_link,
>  };
>  
> -static void vxlan_del_work(struct work_struct *work)
> -{
> -	struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work);
> -	udp_tunnel_sock_release(vs->sock);
> -	kfree_rcu(vs, rcu);
> -}
> -
>  static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
>  					__be16 port, u32 flags)
>  {
> @@ -2626,8 +2620,6 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
>  	for (h = 0; h < VNI_HASH_SIZE; ++h)
>  		INIT_HLIST_HEAD(&vs->vni_list[h]);
>  
> -	INIT_WORK(&vs->del_work, vxlan_del_work);
> -
>  	sock = vxlan_create_sock(net, ipv6, port, flags);
>  	if (IS_ERR(sock)) {
>  		pr_info("Cannot bind port %d, err=%ld\n", ntohs(port),
> @@ -3218,10 +3210,6 @@ static int __init vxlan_init_module(void)
>  {
>  	int rc;
>  
> -	vxlan_wq = alloc_workqueue("vxlan", 0, 0);
> -	if (!vxlan_wq)
> -		return -ENOMEM;
> -
>  	get_random_bytes(&vxlan_salt, sizeof(vxlan_salt));
>  
>  	rc = register_pernet_subsys(&vxlan_net_ops);
> @@ -3242,7 +3230,6 @@ out3:
>  out2:
>  	unregister_pernet_subsys(&vxlan_net_ops);
>  out1:
> -	destroy_workqueue(vxlan_wq);
>  	return rc;
>  }
>  late_initcall(vxlan_init_module);
> @@ -3251,7 +3238,6 @@ static void __exit vxlan_cleanup_module(void)
>  {
>  	rtnl_link_unregister(&vxlan_link_ops);
>  	unregister_netdevice_notifier(&vxlan_notifier_block);
> -	destroy_workqueue(vxlan_wq);
>  	unregister_pernet_subsys(&vxlan_net_ops);
>  	/* rcu_barrier() is called by netns */
>  }
> diff --git a/include/net/vxlan.h b/include/net/vxlan.h
> index 73ed2e951c020d..2113f808e905a4 100644
> --- a/include/net/vxlan.h
> +++ b/include/net/vxlan.h
> @@ -126,9 +126,7 @@ struct vxlan_metadata {
>  /* per UDP socket information */
>  struct vxlan_sock {
>  	struct hlist_node hlist;
> -	struct work_struct del_work;
>  	struct socket	 *sock;
> -	struct rcu_head	  rcu;
>  	struct hlist_head vni_list[VNI_HASH_SIZE];
>  	atomic_t	  refcnt;
>  	struct udp_offload udp_offloads;
> -- 
> 2.5.5
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net] vxlan: synchronously and race-free destruction of vxlan sockets
  2016-04-08 18:51 ` Marcelo Ricardo Leitner
@ 2016-04-08 20:30   ` Hannes Frederic Sowa
  2016-04-08 20:40     ` Eric Dumazet
  0 siblings, 1 reply; 4+ messages in thread
From: Hannes Frederic Sowa @ 2016-04-08 20:30 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner; +Cc: netdev, Jiri Benc

Hi Marcelo,


On 08.04.2016 20:51, Marcelo Ricardo Leitner wrote:
> On Thu, Apr 07, 2016 at 04:57:40PM +0200, Hannes Frederic Sowa wrote:
>> Due to the fact that the udp socket is destructed asynchronously in a
>> work queue, we have some nondeterministic behavior during shutdown of
>> vxlan tunnels and creating new ones. Fix this by keeping the destruction
>> process synchronous in regards to the user space process so IFF_UP can
>> be reliably set.
>>
>> udp_tunnel_sock_release destroys vs->sock->sk if reference counter
>> indicates so. We expect to have the same lifetime of vxlan_sock and
>> vxlan_sock->sock->sk even in fast paths with only rcu locks held. So
>> only destruct the whole socket after we can be sure it cannot be found
>> by searching vxlan_net->sock_list.
>>
>> Cc: Jiri Benc <jbenc@redhat.com>
>> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
>> ---
>>   drivers/net/vxlan.c | 20 +++-----------------
>>   include/net/vxlan.h |  2 --
>>   2 files changed, 3 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
>> index 1c0fa364323e28..487e48b7a53090 100644
>> --- a/drivers/net/vxlan.c
>> +++ b/drivers/net/vxlan.c
>> @@ -98,7 +98,6 @@ struct vxlan_fdb {
>>
>>   /* salt for hash table */
>>   static u32 vxlan_salt __read_mostly;
>> -static struct workqueue_struct *vxlan_wq;
>>
>>   static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
>>   {
>> @@ -1065,7 +1064,9 @@ static void __vxlan_sock_release(struct vxlan_sock *vs)
>>   	vxlan_notify_del_rx_port(vs);
>>   	spin_unlock(&vn->sock_lock);
>>
>> -	queue_work(vxlan_wq, &vs->del_work);
>> +	synchronize_rcu();
>
> __vxlan_sock_release is called by vxlan_sock_release which is called by
> vxlan_open/stop. Do we really want to have synchronize_rcu() while
> holding rtnl?

I thought about that and try not to use synchronize_rcu, but I don't see 
any other way. Anyway, ndo_stop isn't really fast path and is used to 
shut the interface down. Also since we have lwtunnels we don't really 
need a lot of interfaces created and torn down.

But I could switch to synchronize_rcu_expedited here.

Also we have another synchronize_rcu during device dismantling, maybe we 
can split ndo_stop into two callbacks, one preparing for stopping and 
the other one after the synchronize_rcu when we safely can free resources.

I will investigate this but for the mean time I think this patch is 
already improving things as user space can bind the socket again when 
the dellink command returned.

Thanks,
Hannes

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net] vxlan: synchronously and race-free destruction of vxlan sockets
  2016-04-08 20:30   ` Hannes Frederic Sowa
@ 2016-04-08 20:40     ` Eric Dumazet
  0 siblings, 0 replies; 4+ messages in thread
From: Eric Dumazet @ 2016-04-08 20:40 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: Marcelo Ricardo Leitner, netdev, Jiri Benc

On Fri, 2016-04-08 at 22:30 +0200, Hannes Frederic Sowa wrote:
> Hi Marcelo,
> ng rtnl?
> 
> I thought about that and try not to use synchronize_rcu, but I don't see 
> any other way. Anyway, ndo_stop isn't really fast path and is used to 
> shut the interface down. Also since we have lwtunnels we don't really 
> need a lot of interfaces created and torn down.
> 
> But I could switch to synchronize_rcu_expedited here.
> 
> Also we have another synchronize_rcu during device dismantling, maybe we 
> can split ndo_stop into two callbacks, one preparing for stopping and 
> the other one after the synchronize_rcu when we safely can free resources.
> 
> I will investigate this but for the mean time I think this patch is 
> already improving things as user space can bind the socket again when 
> the dellink command returned.

Of course, we have synchronize_net() which specifically put in a single
point the knowledge (rtnl being held or not)

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-04-08 20:40 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-07 14:57 [PATCH net] vxlan: synchronously and race-free destruction of vxlan sockets Hannes Frederic Sowa
2016-04-08 18:51 ` Marcelo Ricardo Leitner
2016-04-08 20:30   ` Hannes Frederic Sowa
2016-04-08 20:40     ` Eric Dumazet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.