Netdev Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH net-next] xfrm: add missing rcu verbs to fix data-race
@ 2019-11-08  3:47 Eric Dumazet
  2019-11-08 21:57 ` David Miller
  2019-11-09  1:33 ` Herbert Xu
  0 siblings, 2 replies; 4+ messages in thread
From: Eric Dumazet @ 2019-11-08  3:47 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, Eric Dumazet, Eric Dumazet, syzbot, Steffen Klassert, Herbert Xu

KCSAN reported a data-race in xfrm_lookup_with_ifid() and
xfrm_sk_free_policy() [1]

This should be solved using rcu_access_pointer() and rcu_assign_pointer()
to avoid potential load/store-tearing.

[1]
BUG: KCSAN: data-race in sk_common_release / xfrm_lookup_with_ifid

write to 0xffff888121172668 of 8 bytes by task 22196 on cpu 0:
 xfrm_sk_free_policy include/net/xfrm.h:1193 [inline]
 sk_common_release+0x18c/0x1d0 net/core/sock.c:3198
 udp_lib_close+0x1f/0x30 include/net/udp.h:202
 inet_release+0x86/0x100 net/ipv4/af_inet.c:427
 inet6_release+0x4a/0x70 net/ipv6/af_inet6.c:470
 __sock_release+0x85/0x160 net/socket.c:590
 sock_close+0x24/0x30 net/socket.c:1268
 __fput+0x1e1/0x520 fs/file_table.c:280
 ____fput+0x1f/0x30 fs/file_table.c:313
 task_work_run+0xf6/0x130 kernel/task_work.c:113
 tracehook_notify_resume include/linux/tracehook.h:188 [inline]
 exit_to_usermode_loop+0x2b4/0x2c0 arch/x86/entry/common.c:163
 prepare_exit_to_usermode arch/x86/entry/common.c:194 [inline]
 syscall_return_slowpath arch/x86/entry/common.c:274 [inline]
 do_syscall_64+0x353/0x370 arch/x86/entry/common.c:300
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

read to 0xffff888121172668 of 8 bytes by task 22201 on cpu 1:
 xfrm_lookup_with_ifid+0xc0/0x1310 net/xfrm/xfrm_policy.c:3035
 xfrm_lookup net/xfrm/xfrm_policy.c:3174 [inline]
 xfrm_lookup_route+0x44/0x100 net/xfrm/xfrm_policy.c:3185
 ip6_dst_lookup_flow+0xde/0x120 net/ipv6/ip6_output.c:1159
 inet6_csk_route_socket+0x2f7/0x420 net/ipv6/inet6_connection_sock.c:106
 inet6_csk_xmit+0x91/0x1f0 net/ipv6/inet6_connection_sock.c:121
 l2tp_xmit_core net/l2tp/l2tp_core.c:1030 [inline]
 l2tp_xmit_skb+0x8c9/0x8e0 net/l2tp/l2tp_core.c:1132
 pppol2tp_sendmsg+0x2fc/0x3c0 net/l2tp/l2tp_ppp.c:325
 sock_sendmsg_nosec net/socket.c:637 [inline]
 sock_sendmsg+0x9f/0xc0 net/socket.c:657
 ___sys_sendmsg+0x2b7/0x5d0 net/socket.c:2311
 __sys_sendmmsg+0x123/0x350 net/socket.c:2413
 __do_sys_sendmmsg net/socket.c:2442 [inline]
 __se_sys_sendmmsg net/socket.c:2439 [inline]
 __x64_sys_sendmmsg+0x64/0x80 net/socket.c:2439
 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 22201 Comm: syz-executor.5 Not tainted 5.4.0-rc6+ #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/net/xfrm.h     | 9 +++++----
 net/smc/smc.h          | 4 ++--
 net/xfrm/xfrm_policy.c | 4 ++--
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index aa08a7a5f6ac5836524dd34115cd57e2675e574d..8884575ae2135b739a2c316bf8a92b56d6cc807c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1093,7 +1093,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
 	struct net *net = dev_net(skb->dev);
 	int ndir = dir | (reverse ? XFRM_POLICY_MASK + 1 : 0);
 
-	if (sk && sk->sk_policy[XFRM_POLICY_IN])
+	if (sk && rcu_access_pointer(sk->sk_policy[XFRM_POLICY_IN]))
 		return __xfrm_policy_check(sk, ndir, skb, family);
 
 	return	(!net->xfrm.policy_count[dir] && !secpath_exists(skb)) ||
@@ -1171,7 +1171,8 @@ static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
 {
 	sk->sk_policy[0] = NULL;
 	sk->sk_policy[1] = NULL;
-	if (unlikely(osk->sk_policy[0] || osk->sk_policy[1]))
+	if (unlikely(rcu_access_pointer(osk->sk_policy[0]) ||
+		     rcu_access_pointer(osk->sk_policy[1])))
 		return __xfrm_sk_clone_policy(sk, osk);
 	return 0;
 }
@@ -1185,12 +1186,12 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
 	pol = rcu_dereference_protected(sk->sk_policy[0], 1);
 	if (unlikely(pol != NULL)) {
 		xfrm_policy_delete(pol, XFRM_POLICY_MAX);
-		sk->sk_policy[0] = NULL;
+		rcu_assign_pointer(sk->sk_policy[0], NULL);
 	}
 	pol = rcu_dereference_protected(sk->sk_policy[1], 1);
 	if (unlikely(pol != NULL)) {
 		xfrm_policy_delete(pol, XFRM_POLICY_MAX+1);
-		sk->sk_policy[1] = NULL;
+		rcu_assign_pointer(sk->sk_policy[1], NULL);
 	}
 }
 
diff --git a/net/smc/smc.h b/net/smc/smc.h
index be11ba41190fb58be3ce9e8ab1a9ea4f8aa6a05b..4324dd39de99ba5967e1325746a2f5eff4baf2e7 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -253,8 +253,8 @@ static inline u32 ntoh24(u8 *net)
 #ifdef CONFIG_XFRM
 static inline bool using_ipsec(struct smc_sock *smc)
 {
-	return (smc->clcsock->sk->sk_policy[0] ||
-		smc->clcsock->sk->sk_policy[1]) ? true : false;
+	return (rcu_access_pointer(smc->clcsock->sk->sk_policy[0]) ||
+		rcu_access_pointer(smc->clcsock->sk->sk_policy[1])) ? true : false;
 }
 #else
 static inline bool using_ipsec(struct smc_sock *smc)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f2d1e573ea55154eb2ee4fc3dbdd47313d969b98..d76ca0dfbefb31ae8a883b63d755e29ac749569b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3032,7 +3032,7 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
 	route = NULL;
 
 	sk = sk_const_to_full_sk(sk);
-	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
+	if (sk && rcu_access_pointer(sk->sk_policy[XFRM_POLICY_OUT])) {
 		num_pols = 1;
 		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family,
 						if_id);
@@ -3557,7 +3557,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 
 	pol = NULL;
 	sk = sk_to_full_sk(sk);
-	if (sk && sk->sk_policy[dir]) {
+	if (sk && rcu_access_pointer(sk->sk_policy[dir])) {
 		pol = xfrm_sk_policy_lookup(sk, dir, &fl, family, if_id);
 		if (IS_ERR(pol)) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
-- 
2.24.0.432.g9d3f5f5b63-goog


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next] xfrm: add missing rcu verbs to fix data-race
  2019-11-08  3:47 [PATCH net-next] xfrm: add missing rcu verbs to fix data-race Eric Dumazet
@ 2019-11-08 21:57 ` David Miller
  2019-11-09  1:33 ` Herbert Xu
  1 sibling, 0 replies; 4+ messages in thread
From: David Miller @ 2019-11-08 21:57 UTC (permalink / raw)
  To: edumazet; +Cc: netdev, eric.dumazet, syzkaller, steffen.klassert, herbert

From: Eric Dumazet <edumazet@google.com>
Date: Thu,  7 Nov 2019 19:47:01 -0800

> KCSAN reported a data-race in xfrm_lookup_with_ifid() and
> xfrm_sk_free_policy() [1]

Steffen please pick this up, thank you.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next] xfrm: add missing rcu verbs to fix data-race
  2019-11-08  3:47 [PATCH net-next] xfrm: add missing rcu verbs to fix data-race Eric Dumazet
  2019-11-08 21:57 ` David Miller
@ 2019-11-09  1:33 ` Herbert Xu
  2019-11-09  2:51   ` Eric Dumazet
  1 sibling, 1 reply; 4+ messages in thread
From: Herbert Xu @ 2019-11-09  1:33 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, netdev, Eric Dumazet, syzbot, Steffen Klassert,
	Dmitry Vyukov, Linus Torvalds, Paul E. McKenney, Will Deacon

On Thu, Nov 07, 2019 at 07:47:01PM -0800, Eric Dumazet wrote:
> KCSAN reported a data-race in xfrm_lookup_with_ifid() and
> xfrm_sk_free_policy() [1]

I'm very uncomfortable with these warnings being enabled in KASAN
unless there is a way to opt out of them without adding unnecessary
READ_ONCE/WRITE_ONCE tags.

All they do is create patches such as this one that simply adds
these tags without resolving the underlying issues (if there are
any).

> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index aa08a7a5f6ac5836524dd34115cd57e2675e574d..8884575ae2135b739a2c316bf8a92b56d6cc807c 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -1093,7 +1093,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
>  	struct net *net = dev_net(skb->dev);
>  	int ndir = dir | (reverse ? XFRM_POLICY_MASK + 1 : 0);
>  
> -	if (sk && sk->sk_policy[XFRM_POLICY_IN])
> +	if (sk && rcu_access_pointer(sk->sk_policy[XFRM_POLICY_IN]))
>  		return __xfrm_policy_check(sk, ndir, skb, family);

This is simply an optimisation and we don't care if we get it
wrong due to the lack of READ_ONCE/WRITE_ONCE.  Even with the
READ_ONCE tag, there is nothing stopping a policy from being
added after the test returns false, or all policies from being
deleted after the test returns true.

IOW this is simply unnecessary.

> @@ -1171,7 +1171,8 @@ static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
>  {
>  	sk->sk_policy[0] = NULL;
>  	sk->sk_policy[1] = NULL;
> -	if (unlikely(osk->sk_policy[0] || osk->sk_policy[1]))
> +	if (unlikely(rcu_access_pointer(osk->sk_policy[0]) ||
> +		     rcu_access_pointer(osk->sk_policy[1])))
>  		return __xfrm_sk_clone_policy(sk, osk);
>  	return 0;

These on the other hand are done under socket lock.  IOW they
are completely synchronised with respect to the write side which
is also under socket lock so no tagging is necessary.

Incidentally rcu_access_pointer is now practically the same as
rcu_dereference because the smp_read_barrier_depends has been
moved over to READ_ONCE.  In fact there is no performance difference
between rcu_dereference and rcu_dereference_raw either.

I think we should either remove the smp_barrier_depends from
rcu_access_pointer, or just get rid of rcu_access_pointer completely.

> @@ -1185,12 +1186,12 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
>  	pol = rcu_dereference_protected(sk->sk_policy[0], 1);
>  	if (unlikely(pol != NULL)) {
>  		xfrm_policy_delete(pol, XFRM_POLICY_MAX);
> -		sk->sk_policy[0] = NULL;
> +		rcu_assign_pointer(sk->sk_policy[0], NULL);
>  	}
>  	pol = rcu_dereference_protected(sk->sk_policy[1], 1);
>  	if (unlikely(pol != NULL)) {
>  		xfrm_policy_delete(pol, XFRM_POLICY_MAX+1);
> -		sk->sk_policy[1] = NULL;
> +		rcu_assign_pointer(sk->sk_policy[1], NULL);

These should use RCU_INIT_POINTER.

> diff --git a/net/smc/smc.h b/net/smc/smc.h
> index be11ba41190fb58be3ce9e8ab1a9ea4f8aa6a05b..4324dd39de99ba5967e1325746a2f5eff4baf2e7 100644
> --- a/net/smc/smc.h
> +++ b/net/smc/smc.h
> @@ -253,8 +253,8 @@ static inline u32 ntoh24(u8 *net)
>  #ifdef CONFIG_XFRM
>  static inline bool using_ipsec(struct smc_sock *smc)
>  {
> -	return (smc->clcsock->sk->sk_policy[0] ||
> -		smc->clcsock->sk->sk_policy[1]) ? true : false;
> +	return (rcu_access_pointer(smc->clcsock->sk->sk_policy[0]) ||
> +		rcu_access_pointer(smc->clcsock->sk->sk_policy[1])) ? true : false;
>  }
>  #else
>  static inline bool using_ipsec(struct smc_sock *smc)

Now this could actually be a real bug because it doesn't appear
to be an optimisation and it doesn't seem to be holding the socket
lock either.

Again this shows that we shouldn't just add READ_ONCE/WRITE_ONCE
tags because they might be papering over real bugs and help them
hide better because people will automatically assume that using
READ_ONCE/WRITE_ONCE means that the code is *safe*.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next] xfrm: add missing rcu verbs to fix data-race
  2019-11-09  1:33 ` Herbert Xu
@ 2019-11-09  2:51   ` Eric Dumazet
  0 siblings, 0 replies; 4+ messages in thread
From: Eric Dumazet @ 2019-11-09  2:51 UTC (permalink / raw)
  To: Herbert Xu
  Cc: David S . Miller, netdev, Eric Dumazet, syzbot, Steffen Klassert,
	Dmitry Vyukov, Linus Torvalds, Paul E. McKenney, Will Deacon

On Fri, Nov 8, 2019 at 5:33 PM Herbert Xu <herbert@gondor.apana.org.au> wrote:
>
> On Thu, Nov 07, 2019 at 07:47:01PM -0800, Eric Dumazet wrote:
> > KCSAN reported a data-race in xfrm_lookup_with_ifid() and
> > xfrm_sk_free_policy() [1]
>
> I'm very uncomfortable with these warnings being enabled in KASAN
> unless there is a way to opt out of them without adding unnecessary
> READ_ONCE/WRITE_ONCE tags.
>
> All they do is create patches such as this one that simply adds
> these tags without resolving the underlying issues (if there are
> any).
>
> > diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> > index aa08a7a5f6ac5836524dd34115cd57e2675e574d..8884575ae2135b739a2c316bf8a92b56d6cc807c 100644
> > --- a/include/net/xfrm.h
> > +++ b/include/net/xfrm.h
> > @@ -1093,7 +1093,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
> >       struct net *net = dev_net(skb->dev);
> >       int ndir = dir | (reverse ? XFRM_POLICY_MASK + 1 : 0);
> >
> > -     if (sk && sk->sk_policy[XFRM_POLICY_IN])
> > +     if (sk && rcu_access_pointer(sk->sk_policy[XFRM_POLICY_IN]))
> >               return __xfrm_policy_check(sk, ndir, skb, family);
>
> This is simply an optimisation and we don't care if we get it
> wrong due to the lack of READ_ONCE/WRITE_ONCE.  Even with the
> READ_ONCE tag, there is nothing stopping a policy from being
> added after the test returns false, or all policies from being
> deleted after the test returns true.
>
> IOW this is simply unnecessary.

How do you suggest to silence the KCSAN warnings then ?

>
> > @@ -1171,7 +1171,8 @@ static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
> >  {
> >       sk->sk_policy[0] = NULL;
> >       sk->sk_policy[1] = NULL;
> > -     if (unlikely(osk->sk_policy[0] || osk->sk_policy[1]))
> > +     if (unlikely(rcu_access_pointer(osk->sk_policy[0]) ||
> > +                  rcu_access_pointer(osk->sk_policy[1])))
> >               return __xfrm_sk_clone_policy(sk, osk);
> >       return 0;
>
> These on the other hand are done under socket lock.  IOW they
> are completely synchronised with respect to the write side which
> is also under socket lock so no tagging is necessary.
>
> Incidentally rcu_access_pointer is now practically the same as
> rcu_dereference because the smp_read_barrier_depends has been
> moved over to READ_ONCE.  In fact there is no performance difference
> between rcu_dereference and rcu_dereference_raw either.
>
> I think we should either remove the smp_barrier_depends from
> rcu_access_pointer, or just get rid of rcu_access_pointer completely.
>
> > @@ -1185,12 +1186,12 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
> >       pol = rcu_dereference_protected(sk->sk_policy[0], 1);
> >       if (unlikely(pol != NULL)) {
> >               xfrm_policy_delete(pol, XFRM_POLICY_MAX);
> > -             sk->sk_policy[0] = NULL;
> > +             rcu_assign_pointer(sk->sk_policy[0], NULL);
> >       }
> >       pol = rcu_dereference_protected(sk->sk_policy[1], 1);
> >       if (unlikely(pol != NULL)) {
> >               xfrm_policy_delete(pol, XFRM_POLICY_MAX+1);
> > -             sk->sk_policy[1] = NULL;
> > +             rcu_assign_pointer(sk->sk_policy[1], NULL);
>
> These should use RCU_INIT_POINTER.

This does not matter anymore.
Please double check rcu_assign_pointer(), it handles the NULL case.

commit 3a37f7275cda5ad25c1fe9be8f20c76c60d175fa
Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Date:   Sun May 1 18:46:54 2016 -0700

    rcu: No ordering for rcu_assign_pointer() of NULL

    This commit does a compile-time check for rcu_assign_pointer() of NULL,
    and uses WRITE_ONCE() rather than smp_store_release() in that case.

    Reported-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>


>
> > diff --git a/net/smc/smc.h b/net/smc/smc.h
> > index be11ba41190fb58be3ce9e8ab1a9ea4f8aa6a05b..4324dd39de99ba5967e1325746a2f5eff4baf2e7 100644
> > --- a/net/smc/smc.h
> > +++ b/net/smc/smc.h
> > @@ -253,8 +253,8 @@ static inline u32 ntoh24(u8 *net)
> >  #ifdef CONFIG_XFRM
> >  static inline bool using_ipsec(struct smc_sock *smc)
> >  {
> > -     return (smc->clcsock->sk->sk_policy[0] ||
> > -             smc->clcsock->sk->sk_policy[1]) ? true : false;
> > +     return (rcu_access_pointer(smc->clcsock->sk->sk_policy[0]) ||
> > +             rcu_access_pointer(smc->clcsock->sk->sk_policy[1])) ? true : false;
> >  }
> >  #else
> >  static inline bool using_ipsec(struct smc_sock *smc)
>
> Now this could actually be a real bug because it doesn't appear
> to be an optimisation and it doesn't seem to be holding the socket
> lock either.
>
> Again this shows that we shouldn't just add READ_ONCE/WRITE_ONCE
> tags because they might be papering over real bugs and help them
> hide better because people will automatically assume that using
> READ_ONCE/WRITE_ONCE means that the code is *safe*.
>

Hmm, I will leave to you the resolution of this bug.

Thanks.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, back to index

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-08  3:47 [PATCH net-next] xfrm: add missing rcu verbs to fix data-race Eric Dumazet
2019-11-08 21:57 ` David Miller
2019-11-09  1:33 ` Herbert Xu
2019-11-09  2:51   ` Eric Dumazet

Netdev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netdev/0 netdev/git/0.git
	git clone --mirror https://lore.kernel.org/netdev/1 netdev/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netdev netdev/ https://lore.kernel.org/netdev \
		netdev@vger.kernel.org
	public-inbox-index netdev

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netdev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git