linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] xfrm: Fix RCU vs hash_resize_mutex lock inversion
@ 2021-06-28 13:34 Frederic Weisbecker
  2021-06-30  6:57 ` Steffen Klassert
  0 siblings, 1 reply; 6+ messages in thread
From: Frederic Weisbecker @ 2021-06-28 13:34 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: LKML, Frederic Weisbecker, Peter Zijlstra, David S . Miller,
	Ahmed S . Darwish, stable, Varad Gautam, Herbert Xu, netdev

xfrm_bydst_resize() calls synchronize_rcu() while holding
hash_resize_mutex. But then on PREEMPT_RT configurations,
xfrm_policy_lookup_bytype() may acquire that mutex while running in an
RCU read side critical section. This results in a deadlock.

In fact the scope of hash_resize_mutex is way beyond the purpose of
xfrm_policy_lookup_bytype() to just fetch a coherent and stable policy
for a given destination/direction, along with other details.

The lower level net->xfrm.xfrm_policy_lock, which among other things
protects per destination/direction references to policy entries, is
enough to serialize and benefit from priority inheritance against the
write side. As a bonus, it makes it officially a per network namespace
synchronization business where a policy table resize on namespace A
shouldn't block a policy lookup on namespace B.

Fixes: 77cc278f7b20 (xfrm: policy: Use sequence counters with associated lock)
Cc: stable@vger.kernel.org
Cc: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Varad Gautam <varad.gautam@suse.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 include/net/netns/xfrm.h |  1 +
 net/xfrm/xfrm_policy.c   | 17 ++++++++---------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index e816b6a3ef2b..9b376b87bd54 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -74,6 +74,7 @@ struct netns_xfrm {
 #endif
 	spinlock_t		xfrm_state_lock;
 	seqcount_spinlock_t	xfrm_state_hash_generation;
+	seqcount_spinlock_t	xfrm_policy_hash_generation;
 
 	spinlock_t xfrm_policy_lock;
 	struct mutex xfrm_cfg_mutex;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ce500f847b99..46a6d15b66d6 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -155,7 +155,6 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
 						__read_mostly;
 
 static struct kmem_cache *xfrm_dst_cache __ro_after_init;
-static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation;
 
 static struct rhashtable xfrm_policy_inexact_table;
 static const struct rhashtable_params xfrm_pol_inexact_params;
@@ -585,7 +584,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
 		return;
 
 	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
-	write_seqcount_begin(&xfrm_policy_hash_generation);
+	write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
 
 	odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
 				lockdep_is_held(&net->xfrm.xfrm_policy_lock));
@@ -596,7 +595,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
 	rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
 	net->xfrm.policy_bydst[dir].hmask = nhashmask;
 
-	write_seqcount_end(&xfrm_policy_hash_generation);
+	write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
 	synchronize_rcu();
@@ -1245,7 +1244,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
 	} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
 
 	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
-	write_seqcount_begin(&xfrm_policy_hash_generation);
+	write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
 
 	/* make sure that we can insert the indirect policies again before
 	 * we start with destructive action.
@@ -1354,7 +1353,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
 
 out_unlock:
 	__xfrm_policy_inexact_flush(net);
-	write_seqcount_end(&xfrm_policy_hash_generation);
+	write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
 	mutex_unlock(&hash_resize_mutex);
@@ -2095,9 +2094,9 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 	rcu_read_lock();
  retry:
 	do {
-		sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
+		sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
 		chain = policy_hash_direct(net, daddr, saddr, family, dir);
-	} while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence));
+	} while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence));
 
 	ret = NULL;
 	hlist_for_each_entry_rcu(pol, chain, bydst) {
@@ -2128,7 +2127,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 	}
 
 skip_inexact:
-	if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
+	if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence))
 		goto retry;
 
 	if (ret && !xfrm_pol_hold_rcu(ret))
@@ -4084,6 +4083,7 @@ static int __net_init xfrm_net_init(struct net *net)
 	/* Initialize the per-net locks here */
 	spin_lock_init(&net->xfrm.xfrm_state_lock);
 	spin_lock_init(&net->xfrm.xfrm_policy_lock);
+	seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock);
 	mutex_init(&net->xfrm.xfrm_cfg_mutex);
 
 	rv = xfrm_statistics_init(net);
@@ -4128,7 +4128,6 @@ void __init xfrm_init(void)
 {
 	register_pernet_subsys(&xfrm_net_ops);
 	xfrm_dev_init();
-	seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex);
 	xfrm_input_init();
 
 #ifdef CONFIG_XFRM_ESPINTCP
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] xfrm: Fix RCU vs hash_resize_mutex lock inversion
  2021-06-28 13:34 [PATCH] xfrm: Fix RCU vs hash_resize_mutex lock inversion Frederic Weisbecker
@ 2021-06-30  6:57 ` Steffen Klassert
  2021-06-30 12:11   ` Varad Gautam
  2021-07-05 11:58   ` Steffen Klassert
  0 siblings, 2 replies; 6+ messages in thread
From: Steffen Klassert @ 2021-06-30  6:57 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: LKML, Peter Zijlstra, David S . Miller, Ahmed S . Darwish,
	stable, Varad Gautam, Herbert Xu, netdev

On Mon, Jun 28, 2021 at 03:34:28PM +0200, Frederic Weisbecker wrote:
> xfrm_bydst_resize() calls synchronize_rcu() while holding
> hash_resize_mutex. But then on PREEMPT_RT configurations,
> xfrm_policy_lookup_bytype() may acquire that mutex while running in an
> RCU read side critical section. This results in a deadlock.
> 
> In fact the scope of hash_resize_mutex is way beyond the purpose of
> xfrm_policy_lookup_bytype() to just fetch a coherent and stable policy
> for a given destination/direction, along with other details.
> 
> The lower level net->xfrm.xfrm_policy_lock, which among other things
> protects per destination/direction references to policy entries, is
> enough to serialize and benefit from priority inheritance against the
> write side. As a bonus, it makes it officially a per network namespace
> synchronization business where a policy table resize on namespace A
> shouldn't block a policy lookup on namespace B.
> 
> Fixes: 77cc278f7b20 (xfrm: policy: Use sequence counters with associated lock)
> Cc: stable@vger.kernel.org
> Cc: Ahmed S. Darwish <a.darwish@linutronix.de>
> Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
> Cc: Varad Gautam <varad.gautam@suse.com>
> Cc: Steffen Klassert <steffen.klassert@secunet.com>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: David S. Miller <davem@davemloft.net>
> Signed-off-by: Frederic Weisbecker <frederic@kernel.org>

Your patch has a conflicht with ("commit d7b0408934c7 xfrm: policy: Read
seqcount outside of rcu-read side in xfrm_policy_lookup_bytype")
from Varad. Can you please rebase onto the ipsec tree?

Btw. Varad, your above mentioned patch tried to fix the same issue.
Do we still need it, or is it obsolete with the fix from Frederic?

Thanks!

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] xfrm: Fix RCU vs hash_resize_mutex lock inversion
  2021-06-30  6:57 ` Steffen Klassert
@ 2021-06-30 12:11   ` Varad Gautam
  2021-06-30 12:21     ` Steffen Klassert
  2021-07-05 11:58   ` Steffen Klassert
  1 sibling, 1 reply; 6+ messages in thread
From: Varad Gautam @ 2021-06-30 12:11 UTC (permalink / raw)
  To: Steffen Klassert, Frederic Weisbecker
  Cc: LKML, Peter Zijlstra, David S . Miller, Ahmed S . Darwish,
	stable, Herbert Xu, netdev

On 6/30/21 8:57 AM, Steffen Klassert wrote:
> On Mon, Jun 28, 2021 at 03:34:28PM +0200, Frederic Weisbecker wrote:
>> xfrm_bydst_resize() calls synchronize_rcu() while holding
>> hash_resize_mutex. But then on PREEMPT_RT configurations,
>> xfrm_policy_lookup_bytype() may acquire that mutex while running in an
>> RCU read side critical section. This results in a deadlock.
>>
>> In fact the scope of hash_resize_mutex is way beyond the purpose of
>> xfrm_policy_lookup_bytype() to just fetch a coherent and stable policy
>> for a given destination/direction, along with other details.
>>
>> The lower level net->xfrm.xfrm_policy_lock, which among other things
>> protects per destination/direction references to policy entries, is
>> enough to serialize and benefit from priority inheritance against the
>> write side. As a bonus, it makes it officially a per network namespace
>> synchronization business where a policy table resize on namespace A
>> shouldn't block a policy lookup on namespace B.
>>
>> Fixes: 77cc278f7b20 (xfrm: policy: Use sequence counters with associated lock)
>> Cc: stable@vger.kernel.org
>> Cc: Ahmed S. Darwish <a.darwish@linutronix.de>
>> Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
>> Cc: Varad Gautam <varad.gautam@suse.com>
>> Cc: Steffen Klassert <steffen.klassert@secunet.com>
>> Cc: Herbert Xu <herbert@gondor.apana.org.au>
>> Cc: David S. Miller <davem@davemloft.net>
>> Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
> 
> Your patch has a conflicht with ("commit d7b0408934c7 xfrm: policy: Read
> seqcount outside of rcu-read side in xfrm_policy_lookup_bytype")
> from Varad. Can you please rebase onto the ipsec tree?
> 
> Btw. Varad, your above mentioned patch tried to fix the same issue.
> Do we still need it, or is it obsolete with the fix from Frederic?
> 

The patch "xfrm: policy: Read seqcount outside of rcu-read side in
xfrm_policy_lookup_bytype" shouldn't be needed after Frederic's fix since
the offending mutex is now gone. It can be dropped.

Regards,
Varad

> Thanks!
> 

-- 
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5
90409 Nürnberg
Germany

HRB 36809, AG Nürnberg
Geschäftsführer: Felix Imendörffer


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] xfrm: Fix RCU vs hash_resize_mutex lock inversion
  2021-06-30 12:11   ` Varad Gautam
@ 2021-06-30 12:21     ` Steffen Klassert
  0 siblings, 0 replies; 6+ messages in thread
From: Steffen Klassert @ 2021-06-30 12:21 UTC (permalink / raw)
  To: Varad Gautam
  Cc: Frederic Weisbecker, LKML, Peter Zijlstra, David S . Miller,
	Ahmed S . Darwish, stable, Herbert Xu, netdev

On Wed, Jun 30, 2021 at 02:11:24PM +0200, Varad Gautam wrote:
> On 6/30/21 8:57 AM, Steffen Klassert wrote:
> > On Mon, Jun 28, 2021 at 03:34:28PM +0200, Frederic Weisbecker wrote:
> >> xfrm_bydst_resize() calls synchronize_rcu() while holding
> >> hash_resize_mutex. But then on PREEMPT_RT configurations,
> >> xfrm_policy_lookup_bytype() may acquire that mutex while running in an
> >> RCU read side critical section. This results in a deadlock.
> >>
> >> In fact the scope of hash_resize_mutex is way beyond the purpose of
> >> xfrm_policy_lookup_bytype() to just fetch a coherent and stable policy
> >> for a given destination/direction, along with other details.
> >>
> >> The lower level net->xfrm.xfrm_policy_lock, which among other things
> >> protects per destination/direction references to policy entries, is
> >> enough to serialize and benefit from priority inheritance against the
> >> write side. As a bonus, it makes it officially a per network namespace
> >> synchronization business where a policy table resize on namespace A
> >> shouldn't block a policy lookup on namespace B.
> >>
> >> Fixes: 77cc278f7b20 (xfrm: policy: Use sequence counters with associated lock)
> >> Cc: stable@vger.kernel.org
> >> Cc: Ahmed S. Darwish <a.darwish@linutronix.de>
> >> Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
> >> Cc: Varad Gautam <varad.gautam@suse.com>
> >> Cc: Steffen Klassert <steffen.klassert@secunet.com>
> >> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> >> Cc: David S. Miller <davem@davemloft.net>
> >> Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
> > 
> > Your patch has a conflicht with ("commit d7b0408934c7 xfrm: policy: Read
> > seqcount outside of rcu-read side in xfrm_policy_lookup_bytype")
> > from Varad. Can you please rebase onto the ipsec tree?
> > 
> > Btw. Varad, your above mentioned patch tried to fix the same issue.
> > Do we still need it, or is it obsolete with the fix from Frederic?
> > 
> 
> The patch "xfrm: policy: Read seqcount outside of rcu-read side in
> xfrm_policy_lookup_bytype" shouldn't be needed after Frederic's fix since
> the offending mutex is now gone. It can be dropped.

Ok, so I'll revert your patch and apply Frederic's patch on
top of that revert.

Thanks a lot everyone!

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] xfrm: Fix RCU vs hash_resize_mutex lock inversion
  2021-06-30  6:57 ` Steffen Klassert
  2021-06-30 12:11   ` Varad Gautam
@ 2021-07-05 11:58   ` Steffen Klassert
  2021-07-06 10:49     ` Frederic Weisbecker
  1 sibling, 1 reply; 6+ messages in thread
From: Steffen Klassert @ 2021-07-05 11:58 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: LKML, Peter Zijlstra, David S . Miller, Ahmed S . Darwish,
	stable, Varad Gautam, Herbert Xu, netdev

On Wed, Jun 30, 2021 at 08:57:53AM +0200, Steffen Klassert wrote:
> On Mon, Jun 28, 2021 at 03:34:28PM +0200, Frederic Weisbecker wrote:
> > xfrm_bydst_resize() calls synchronize_rcu() while holding
> > hash_resize_mutex. But then on PREEMPT_RT configurations,
> > xfrm_policy_lookup_bytype() may acquire that mutex while running in an
> > RCU read side critical section. This results in a deadlock.
> > 
> > In fact the scope of hash_resize_mutex is way beyond the purpose of
> > xfrm_policy_lookup_bytype() to just fetch a coherent and stable policy
> > for a given destination/direction, along with other details.
> > 
> > The lower level net->xfrm.xfrm_policy_lock, which among other things
> > protects per destination/direction references to policy entries, is
> > enough to serialize and benefit from priority inheritance against the
> > write side. As a bonus, it makes it officially a per network namespace
> > synchronization business where a policy table resize on namespace A
> > shouldn't block a policy lookup on namespace B.
> > 
> > Fixes: 77cc278f7b20 (xfrm: policy: Use sequence counters with associated lock)
> > Cc: stable@vger.kernel.org
> > Cc: Ahmed S. Darwish <a.darwish@linutronix.de>
> > Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
> > Cc: Varad Gautam <varad.gautam@suse.com>
> > Cc: Steffen Klassert <steffen.klassert@secunet.com>
> > Cc: Herbert Xu <herbert@gondor.apana.org.au>
> > Cc: David S. Miller <davem@davemloft.net>
> > Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
> 
> Your patch has a conflicht with ("commit d7b0408934c7 xfrm: policy: Read
> seqcount outside of rcu-read side in xfrm_policy_lookup_bytype")
> from Varad. Can you please rebase onto the ipsec tree?

This patch is now applied to the ipsec tree (on top of the
revert of commit d7b0408934c7).

Thanks everyone!

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] xfrm: Fix RCU vs hash_resize_mutex lock inversion
  2021-07-05 11:58   ` Steffen Klassert
@ 2021-07-06 10:49     ` Frederic Weisbecker
  0 siblings, 0 replies; 6+ messages in thread
From: Frederic Weisbecker @ 2021-07-06 10:49 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: LKML, Peter Zijlstra, David S . Miller, Ahmed S . Darwish,
	stable, Varad Gautam, Herbert Xu, netdev

On Mon, Jul 05, 2021 at 01:58:50PM +0200, Steffen Klassert wrote:
> On Wed, Jun 30, 2021 at 08:57:53AM +0200, Steffen Klassert wrote:
> > On Mon, Jun 28, 2021 at 03:34:28PM +0200, Frederic Weisbecker wrote:
> > > xfrm_bydst_resize() calls synchronize_rcu() while holding
> > > hash_resize_mutex. But then on PREEMPT_RT configurations,
> > > xfrm_policy_lookup_bytype() may acquire that mutex while running in an
> > > RCU read side critical section. This results in a deadlock.
> > > 
> > > In fact the scope of hash_resize_mutex is way beyond the purpose of
> > > xfrm_policy_lookup_bytype() to just fetch a coherent and stable policy
> > > for a given destination/direction, along with other details.
> > > 
> > > The lower level net->xfrm.xfrm_policy_lock, which among other things
> > > protects per destination/direction references to policy entries, is
> > > enough to serialize and benefit from priority inheritance against the
> > > write side. As a bonus, it makes it officially a per network namespace
> > > synchronization business where a policy table resize on namespace A
> > > shouldn't block a policy lookup on namespace B.
> > > 
> > > Fixes: 77cc278f7b20 (xfrm: policy: Use sequence counters with associated lock)
> > > Cc: stable@vger.kernel.org
> > > Cc: Ahmed S. Darwish <a.darwish@linutronix.de>
> > > Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
> > > Cc: Varad Gautam <varad.gautam@suse.com>
> > > Cc: Steffen Klassert <steffen.klassert@secunet.com>
> > > Cc: Herbert Xu <herbert@gondor.apana.org.au>
> > > Cc: David S. Miller <davem@davemloft.net>
> > > Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
> > 
> > Your patch has a conflicht with ("commit d7b0408934c7 xfrm: policy: Read
> > seqcount outside of rcu-read side in xfrm_policy_lookup_bytype")
> > from Varad. Can you please rebase onto the ipsec tree?
> 
> This patch is now applied to the ipsec tree (on top of the
> revert of commit d7b0408934c7).
> 
> Thanks everyone!

Thanks a lot!

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-07-06 10:49 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-28 13:34 [PATCH] xfrm: Fix RCU vs hash_resize_mutex lock inversion Frederic Weisbecker
2021-06-30  6:57 ` Steffen Klassert
2021-06-30 12:11   ` Varad Gautam
2021-06-30 12:21     ` Steffen Klassert
2021-07-05 11:58   ` Steffen Klassert
2021-07-06 10:49     ` Frederic Weisbecker

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).