All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/6] xfrm: convert xfrm_state_find to rcu
@ 2016-08-09 10:16 Florian Westphal
  2016-08-09 10:16 ` [PATCH net-next 1/6] xfrm: state: use hlist_for_each_entry_rcu helper Florian Westphal
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Florian Westphal @ 2016-08-09 10:16 UTC (permalink / raw)
  To: netdev

This series converts state_by{dst,src,spi} to allow lookups without
holding xfrm_state_lock.

Only xfrm_state_find() is converted here for the (more common) case where
we do not query key manager.

Once more flows are created/destroyed (or the flow cache is overloaded and
often cleans out other entries) this function (and the state lock)
start to show up in perf.

Florian Westphal (6):
      xfrm: state: use hlist_for_each_entry_rcu helper
      xfrm: state: use atomic_inc_not_zero to increment refcount
      xfrm: state: delay freeing until rcu grace period has elapsed
      xfrm: state: add sequence count to detect hash resizes
      xfrm: state: use rcu_deref and assign_pointer helpers
      xfrm: state: don't use lock anymore unless acquire operation is needed

 include/net/netns/xfrm.h |    6 +-
 net/xfrm/xfrm_state.c    |  103 +++++++++++++++++++++++++++++++----------------
 2 files changed, 72 insertions(+), 37 deletions(-)

NB: I have a similar patch series for the policy rwlock, I will send
it once/if this series is applied.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH net-next 1/6] xfrm: state: use hlist_for_each_entry_rcu helper
  2016-08-09 10:16 [PATCH net-next 0/6] xfrm: convert xfrm_state_find to rcu Florian Westphal
@ 2016-08-09 10:16 ` Florian Westphal
  2016-08-09 10:16 ` [PATCH net-next 2/6] xfrm: state: use atomic_inc_not_zero to increment refcount Florian Westphal
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Florian Westphal @ 2016-08-09 10:16 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

This is required once we allow lockless access of bydst/bysrc hash tables.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/xfrm/xfrm_state.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9895a8c..904ab4d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -76,18 +76,18 @@ static void xfrm_hash_transfer(struct hlist_head *list,
 		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
 				    x->props.reqid, x->props.family,
 				    nhashmask);
-		hlist_add_head(&x->bydst, ndsttable+h);
+		hlist_add_head_rcu(&x->bydst, ndsttable + h);
 
 		h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
 				    x->props.family,
 				    nhashmask);
-		hlist_add_head(&x->bysrc, nsrctable+h);
+		hlist_add_head_rcu(&x->bysrc, nsrctable + h);
 
 		if (x->id.spi) {
 			h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
 					    x->id.proto, x->props.family,
 					    nhashmask);
-			hlist_add_head(&x->byspi, nspitable+h);
+			hlist_add_head_rcu(&x->byspi, nspitable + h);
 		}
 	}
 }
@@ -520,10 +520,10 @@ int __xfrm_state_delete(struct xfrm_state *x)
 		x->km.state = XFRM_STATE_DEAD;
 		spin_lock(&net->xfrm.xfrm_state_lock);
 		list_del(&x->km.all);
-		hlist_del(&x->bydst);
-		hlist_del(&x->bysrc);
+		hlist_del_rcu(&x->bydst);
+		hlist_del_rcu(&x->bysrc);
 		if (x->id.spi)
-			hlist_del(&x->byspi);
+			hlist_del_rcu(&x->byspi);
 		net->xfrm.state_num--;
 		spin_unlock(&net->xfrm.xfrm_state_lock);
 
@@ -659,7 +659,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
 	unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
 	struct xfrm_state *x;
 
-	hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) {
+	hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
 		if (x->props.family != family ||
 		    x->id.spi       != spi ||
 		    x->id.proto     != proto ||
@@ -683,7 +683,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 	unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
 	struct xfrm_state *x;
 
-	hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
+	hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
 		if (x->props.family != family ||
 		    x->id.proto     != proto ||
 		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
@@ -781,7 +781,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 
 	spin_lock_bh(&net->xfrm.xfrm_state_lock);
 	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
-	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
+	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
 		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
@@ -797,7 +797,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 		goto found;
 
 	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
-	hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) {
+	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
 		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
@@ -852,12 +852,12 @@ found:
 		if (km_query(x, tmpl, pol) == 0) {
 			x->km.state = XFRM_STATE_ACQ;
 			list_add(&x->km.all, &net->xfrm.state_all);
-			hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+			hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
 			h = xfrm_src_hash(net, daddr, saddr, encap_family);
-			hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+			hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
 			if (x->id.spi) {
 				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
-				hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+				hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
 			}
 			x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
 			tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
@@ -945,16 +945,16 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 
 	h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
 			  x->props.reqid, x->props.family);
-	hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+	hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
 
 	h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
-	hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+	hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
 
 	if (x->id.spi) {
 		h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
 				  x->props.family);
 
-		hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+		hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
 	}
 
 	tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
@@ -1063,9 +1063,9 @@ static struct xfrm_state *__find_acq_core(struct net *net,
 		xfrm_state_hold(x);
 		tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
 		list_add(&x->km.all, &net->xfrm.state_all);
-		hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+		hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
 		h = xfrm_src_hash(net, daddr, saddr, family);
-		hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+		hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
 
 		net->xfrm.state_num++;
 
@@ -1581,7 +1581,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
 	if (x->id.spi) {
 		spin_lock_bh(&net->xfrm.xfrm_state_lock);
 		h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
-		hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+		hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
 		spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 
 		err = 0;
-- 
2.7.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 2/6] xfrm: state: use atomic_inc_not_zero to increment refcount
  2016-08-09 10:16 [PATCH net-next 0/6] xfrm: convert xfrm_state_find to rcu Florian Westphal
  2016-08-09 10:16 ` [PATCH net-next 1/6] xfrm: state: use hlist_for_each_entry_rcu helper Florian Westphal
@ 2016-08-09 10:16 ` Florian Westphal
  2016-08-09 10:16 ` [PATCH net-next 3/6] xfrm: state: delay freeing until rcu grace period has elapsed Florian Westphal
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Florian Westphal @ 2016-08-09 10:16 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

Once xfrm_state_lookup_byaddr no longer acquires the state lock another
cpu might be freeing the state entry at the same time.

To detect this we use atomic_inc_not_zero, we then signal -EAGAIN to
caller in case our result was stale.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/xfrm/xfrm_state.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 904ab4d..84c1db6 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -37,6 +37,11 @@
 
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 
+static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
+{
+	return atomic_inc_not_zero(&x->refcnt);
+}
+
 static inline unsigned int xfrm_dst_hash(struct net *net,
 					 const xfrm_address_t *daddr,
 					 const xfrm_address_t *saddr,
@@ -668,7 +673,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
 
 		if ((mark & x->mark.m) != x->mark.v)
 			continue;
-		xfrm_state_hold(x);
+		if (!xfrm_state_hold_rcu(x))
+			continue;
 		return x;
 	}
 
@@ -692,7 +698,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 
 		if ((mark & x->mark.m) != x->mark.v)
 			continue;
-		xfrm_state_hold(x);
+		if (!xfrm_state_hold_rcu(x))
+			continue;
 		return x;
 	}
 
@@ -871,10 +878,14 @@ found:
 		}
 	}
 out:
-	if (x)
-		xfrm_state_hold(x);
-	else
+	if (x) {
+		if (!xfrm_state_hold_rcu(x)) {
+			*err = -EAGAIN;
+			x = NULL;
+		}
+	} else {
 		*err = acquire_in_progress ? -EAGAIN : error;
+	}
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 	if (to_put)
 		xfrm_state_put(to_put);
-- 
2.7.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 3/6] xfrm: state: delay freeing until rcu grace period has elapsed
  2016-08-09 10:16 [PATCH net-next 0/6] xfrm: convert xfrm_state_find to rcu Florian Westphal
  2016-08-09 10:16 ` [PATCH net-next 1/6] xfrm: state: use hlist_for_each_entry_rcu helper Florian Westphal
  2016-08-09 10:16 ` [PATCH net-next 2/6] xfrm: state: use atomic_inc_not_zero to increment refcount Florian Westphal
@ 2016-08-09 10:16 ` Florian Westphal
  2016-08-09 10:16 ` [PATCH net-next 4/6] xfrm: state: add sequence count to detect hash resizes Florian Westphal
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Florian Westphal @ 2016-08-09 10:16 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

The hash table backend memory and the state structs are free'd via
kfree/vfree.

Once we only rely on rcu during lookups we have to make sure no other cpu
is currently accessing this before doing the free.

Free operations already happen from worker so we can use synchronize_rcu
to wait until concurrent readers are done.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/xfrm/xfrm_state.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 84c1db6..8e37387 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -146,6 +146,9 @@ static void xfrm_hash_resize(struct work_struct *work)
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 
 	osize = (ohashmask + 1) * sizeof(struct hlist_head);
+
+	synchronize_rcu();
+
 	xfrm_hash_free(odst, osize);
 	xfrm_hash_free(osrc, osize);
 	xfrm_hash_free(ospi, osize);
@@ -369,6 +372,8 @@ static void xfrm_state_gc_task(struct work_struct *work)
 	hlist_move_list(&net->xfrm.state_gc_list, &gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 
+	synchronize_rcu();
+
 	hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
 		xfrm_state_gc_destroy(x);
 }
-- 
2.7.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 4/6] xfrm: state: add sequence count to detect hash resizes
  2016-08-09 10:16 [PATCH net-next 0/6] xfrm: convert xfrm_state_find to rcu Florian Westphal
                   ` (2 preceding siblings ...)
  2016-08-09 10:16 ` [PATCH net-next 3/6] xfrm: state: delay freeing until rcu grace period has elapsed Florian Westphal
@ 2016-08-09 10:16 ` Florian Westphal
  2016-08-09 10:16 ` [PATCH net-next 5/6] xfrm: state: use rcu_deref and assign_pointer helpers Florian Westphal
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Florian Westphal @ 2016-08-09 10:16 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

Once xfrm_state_find is lockless we have to cope with a concurrent
resize opertion.

We use a sequence counter to block in case a resize is in progress
and to detect if we might have missed a state that got moved to
a new hash table.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/xfrm/xfrm_state.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 8e37387..ac4037c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -36,6 +36,7 @@
  */
 
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
+static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
 
 static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
 {
@@ -127,6 +128,7 @@ static void xfrm_hash_resize(struct work_struct *work)
 	}
 
 	spin_lock_bh(&net->xfrm.xfrm_state_lock);
+	write_seqcount_begin(&xfrm_state_hash_generation);
 
 	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
 	for (i = net->xfrm.state_hmask; i >= 0; i--)
@@ -143,6 +145,7 @@ static void xfrm_hash_resize(struct work_struct *work)
 	net->xfrm.state_byspi = nspi;
 	net->xfrm.state_hmask = nhashmask;
 
+	write_seqcount_end(&xfrm_state_hash_generation);
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 
 	osize = (ohashmask + 1) * sizeof(struct hlist_head);
@@ -787,10 +790,13 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	struct xfrm_state *best = NULL;
 	u32 mark = pol->mark.v & pol->mark.m;
 	unsigned short encap_family = tmpl->encap_family;
+	unsigned int sequence;
 	struct km_event c;
 
 	to_put = NULL;
 
+	sequence = read_seqcount_begin(&xfrm_state_hash_generation);
+
 	spin_lock_bh(&net->xfrm.xfrm_state_lock);
 	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
 	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
@@ -894,6 +900,15 @@ out:
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 	if (to_put)
 		xfrm_state_put(to_put);
+
+	if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
+		*err = -EAGAIN;
+		if (x) {
+			xfrm_state_put(x);
+			x = NULL;
+		}
+	}
+
 	return x;
 }
 
-- 
2.7.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 5/6] xfrm: state: use rcu_deref and assign_pointer helpers
  2016-08-09 10:16 [PATCH net-next 0/6] xfrm: convert xfrm_state_find to rcu Florian Westphal
                   ` (3 preceding siblings ...)
  2016-08-09 10:16 ` [PATCH net-next 4/6] xfrm: state: add sequence count to detect hash resizes Florian Westphal
@ 2016-08-09 10:16 ` Florian Westphal
  2016-08-09 10:16 ` [PATCH net-next 6/6] xfrm: state: don't use lock anymore unless acquire operation is needed Florian Westphal
  2016-08-11 11:13 ` [PATCH net-next 0/6] xfrm: convert xfrm_state_find to rcu Steffen Klassert
  6 siblings, 0 replies; 8+ messages in thread
From: Florian Westphal @ 2016-08-09 10:16 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

Before xfrm_state_find() can use rcu_read_lock instead of xfrm_state_lock
we need to switch users of the hash table to assign/obtain the pointers
with the appropriate rcu helpers.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/xfrm/xfrm_state.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index ac4037c..53e7867 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -28,6 +28,9 @@
 
 #include "xfrm_hash.h"
 
+#define xfrm_state_deref_prot(table, net) \
+	rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
+
 /* Each xfrm_state may be linked to two tables:
 
    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
@@ -131,18 +134,17 @@ static void xfrm_hash_resize(struct work_struct *work)
 	write_seqcount_begin(&xfrm_state_hash_generation);
 
 	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
+	odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
 	for (i = net->xfrm.state_hmask; i >= 0; i--)
-		xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi,
-				   nhashmask);
+		xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
 
-	odst = net->xfrm.state_bydst;
-	osrc = net->xfrm.state_bysrc;
-	ospi = net->xfrm.state_byspi;
+	osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
+	ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
 	ohashmask = net->xfrm.state_hmask;
 
-	net->xfrm.state_bydst = ndst;
-	net->xfrm.state_bysrc = nsrc;
-	net->xfrm.state_byspi = nspi;
+	rcu_assign_pointer(net->xfrm.state_bydst, ndst);
+	rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
+	rcu_assign_pointer(net->xfrm.state_byspi, nspi);
 	net->xfrm.state_hmask = nhashmask;
 
 	write_seqcount_end(&xfrm_state_hash_generation);
-- 
2.7.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 6/6] xfrm: state: don't use lock anymore unless acquire operation is needed
  2016-08-09 10:16 [PATCH net-next 0/6] xfrm: convert xfrm_state_find to rcu Florian Westphal
                   ` (4 preceding siblings ...)
  2016-08-09 10:16 ` [PATCH net-next 5/6] xfrm: state: use rcu_deref and assign_pointer helpers Florian Westphal
@ 2016-08-09 10:16 ` Florian Westphal
  2016-08-11 11:13 ` [PATCH net-next 0/6] xfrm: convert xfrm_state_find to rcu Steffen Klassert
  6 siblings, 0 replies; 8+ messages in thread
From: Florian Westphal @ 2016-08-09 10:16 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

push the lock down, after earlier patches we can rely on rcu to
make sure state struct won't go away.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netns/xfrm.h | 6 +++---
 net/xfrm/xfrm_state.c    | 6 ++++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 24cd394..1ab51d1 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -38,9 +38,9 @@ struct netns_xfrm {
 	 * mode. Also, it can be used by ah/esp icmp error handler to find
 	 * offending SA.
 	 */
-	struct hlist_head	*state_bydst;
-	struct hlist_head	*state_bysrc;
-	struct hlist_head	*state_byspi;
+	struct hlist_head	__rcu *state_bydst;
+	struct hlist_head	__rcu *state_bysrc;
+	struct hlist_head	__rcu *state_byspi;
 	unsigned int		state_hmask;
 	unsigned int		state_num;
 	struct work_struct	state_hash_work;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 53e7867..1a15b65 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -799,7 +799,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 
 	sequence = read_seqcount_begin(&xfrm_state_hash_generation);
 
-	spin_lock_bh(&net->xfrm.xfrm_state_lock);
+	rcu_read_lock();
 	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
 	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
 		if (x->props.family == encap_family &&
@@ -870,6 +870,7 @@ found:
 		}
 
 		if (km_query(x, tmpl, pol) == 0) {
+			spin_lock_bh(&net->xfrm.xfrm_state_lock);
 			x->km.state = XFRM_STATE_ACQ;
 			list_add(&x->km.all, &net->xfrm.state_all);
 			hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
@@ -883,6 +884,7 @@ found:
 			tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
 			net->xfrm.state_num++;
 			xfrm_hash_grow_check(net, x->bydst.next != NULL);
+			spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 		} else {
 			x->km.state = XFRM_STATE_DEAD;
 			to_put = x;
@@ -899,7 +901,7 @@ out:
 	} else {
 		*err = acquire_in_progress ? -EAGAIN : error;
 	}
-	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+	rcu_read_unlock();
 	if (to_put)
 		xfrm_state_put(to_put);
 
-- 
2.7.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 0/6] xfrm: convert xfrm_state_find to rcu
  2016-08-09 10:16 [PATCH net-next 0/6] xfrm: convert xfrm_state_find to rcu Florian Westphal
                   ` (5 preceding siblings ...)
  2016-08-09 10:16 ` [PATCH net-next 6/6] xfrm: state: don't use lock anymore unless acquire operation is needed Florian Westphal
@ 2016-08-11 11:13 ` Steffen Klassert
  6 siblings, 0 replies; 8+ messages in thread
From: Steffen Klassert @ 2016-08-11 11:13 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netdev

On Tue, Aug 09, 2016 at 12:16:03PM +0200, Florian Westphal wrote:
> This series converts state_by{dst,src,spi} to allow lookups without
> holding xfrm_state_lock.
> 
> Only xfrm_state_find() is converted here for the (more common) case where
> we do not query key manager.
> 
> Once more flows are created/destroyed (or the flow cache is overloaded and
> often cleans out other entries) this function (and the state lock)
> start to show up in perf.
> 
> Florian Westphal (6):
>       xfrm: state: use hlist_for_each_entry_rcu helper
>       xfrm: state: use atomic_inc_not_zero to increment refcount
>       xfrm: state: delay freeing until rcu grace period has elapsed
>       xfrm: state: add sequence count to detect hash resizes
>       xfrm: state: use rcu_deref and assign_pointer helpers
>       xfrm: state: don't use lock anymore unless acquire operation is needed
> 
>  include/net/netns/xfrm.h |    6 +-
>  net/xfrm/xfrm_state.c    |  103 +++++++++++++++++++++++++++++++----------------
>  2 files changed, 72 insertions(+), 37 deletions(-)

All applied to the ipsec-next tree, thanks Florian!

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2016-08-11 11:13 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-09 10:16 [PATCH net-next 0/6] xfrm: convert xfrm_state_find to rcu Florian Westphal
2016-08-09 10:16 ` [PATCH net-next 1/6] xfrm: state: use hlist_for_each_entry_rcu helper Florian Westphal
2016-08-09 10:16 ` [PATCH net-next 2/6] xfrm: state: use atomic_inc_not_zero to increment refcount Florian Westphal
2016-08-09 10:16 ` [PATCH net-next 3/6] xfrm: state: delay freeing until rcu grace period has elapsed Florian Westphal
2016-08-09 10:16 ` [PATCH net-next 4/6] xfrm: state: add sequence count to detect hash resizes Florian Westphal
2016-08-09 10:16 ` [PATCH net-next 5/6] xfrm: state: use rcu_deref and assign_pointer helpers Florian Westphal
2016-08-09 10:16 ` [PATCH net-next 6/6] xfrm: state: don't use lock anymore unless acquire operation is needed Florian Westphal
2016-08-11 11:13 ` [PATCH net-next 0/6] xfrm: convert xfrm_state_find to rcu Steffen Klassert

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.