All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC ipsec-next] flow cache removal
@ 2017-06-28 13:26 Florian Westphal
  2017-06-28 13:26 ` [RFC net-next 1/9] vti: revert flush x-netns xfrm cache when vti interface is removed Florian Westphal
                   ` (8 more replies)
  0 siblings, 9 replies; 13+ messages in thread
From: Florian Westphal @ 2017-06-28 13:26 UTC (permalink / raw)
  To: netdev

Here is an updated version of the flow cache removal
set.

Changes since last iteration:
 - rebase
 - split removal into multiple gradual chunks to ease review
 - add a small pcpu xdst cache to reduce alloc/free overhead
   when subsequent packet can re-use previous xdst

I did some sanity testing and ran a few netperf tests.
The most severe hit is with pure UDP_RR workload.
TCP_STREAM is ok-ish, UDP_STREAM is marginally faster with
the simpler pcpu cache (we only instantiate one xfrm_dst and then
reuse it).

We can discuss fine print and possible further work (avoid this_cpu_xchg,
xfrm_genid removal, etc) at NFWS if needed.

Thanks,
Florian

 Documentation/networking/ip-sysctl.txt |    6 
 /include/net/flow.h                     |   34 --
 /include/net/netns/xfrm.h               |   11 
 /include/net/xfrm.h                     |    9 
 /net/core/Makefile                      |    1 
 /net/ipv4/ip_vti.c                      |   31 -
 /net/ipv4/xfrm4_policy.c                |   11 
 /net/ipv6/ip6_vti.c                     |   31 -
 /net/ipv6/xfrm6_policy.c                |   11 
 /net/key/af_key.c                       |    4 
 /net/xfrm/xfrm_device.c                 |    3 
 /net/xfrm/xfrm_policy.c                 |  334 ++++----------------
 /net/xfrm/xfrm_user.c                   |    2 
 /security/selinux/include/xfrm.h        |    4 
 include/net/flowcache.h                  |   25 -
 net/core/flow.c                          |  516 -------------------------------
 16 files changed, 82 insertions(+), 951 deletions(-)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [RFC net-next 1/9] vti: revert flush x-netns xfrm cache when vti interface is removed
  2017-06-28 13:26 [RFC ipsec-next] flow cache removal Florian Westphal
@ 2017-06-28 13:26 ` Florian Westphal
  2017-06-28 13:26 ` [RFC net-next 2/9] net: xfrm: revert to lower xfrm dst gc limit Florian Westphal
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Florian Westphal @ 2017-06-28 13:26 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

flow cache is removed in next commit.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/ipv4/ip_vti.c  | 31 -------------------------------
 net/ipv6/ip6_vti.c | 31 -------------------------------
 2 files changed, 62 deletions(-)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 0192c255e508..5ed63d250950 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -584,33 +584,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
 	.get_link_net	= ip_tunnel_get_link_net,
 };
 
-static bool is_vti_tunnel(const struct net_device *dev)
-{
-	return dev->netdev_ops == &vti_netdev_ops;
-}
-
-static int vti_device_event(struct notifier_block *unused,
-			    unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct ip_tunnel *tunnel = netdev_priv(dev);
-
-	if (!is_vti_tunnel(dev))
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case NETDEV_DOWN:
-		if (!net_eq(tunnel->net, dev_net(dev)))
-			xfrm_garbage_collect(tunnel->net);
-		break;
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block vti_notifier_block __read_mostly = {
-	.notifier_call = vti_device_event,
-};
-
 static int __init vti_init(void)
 {
 	const char *msg;
@@ -618,8 +591,6 @@ static int __init vti_init(void)
 
 	pr_info("IPv4 over IPsec tunneling driver\n");
 
-	register_netdevice_notifier(&vti_notifier_block);
-
 	msg = "tunnel device";
 	err = register_pernet_device(&vti_net_ops);
 	if (err < 0)
@@ -652,7 +623,6 @@ static int __init vti_init(void)
 xfrm_proto_esp_failed:
 	unregister_pernet_device(&vti_net_ops);
 pernet_dev_failed:
-	unregister_netdevice_notifier(&vti_notifier_block);
 	pr_err("vti init: failed to register %s\n", msg);
 	return err;
 }
@@ -664,7 +634,6 @@ static void __exit vti_fini(void)
 	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
 	xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
 	unregister_pernet_device(&vti_net_ops);
-	unregister_netdevice_notifier(&vti_notifier_block);
 }
 
 module_init(vti_init);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 486c2305f53c..79444a4bfd6d 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1145,33 +1145,6 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
 	.priority	=	100,
 };
 
-static bool is_vti6_tunnel(const struct net_device *dev)
-{
-	return dev->netdev_ops == &vti6_netdev_ops;
-}
-
-static int vti6_device_event(struct notifier_block *unused,
-			     unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct ip6_tnl *t = netdev_priv(dev);
-
-	if (!is_vti6_tunnel(dev))
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case NETDEV_DOWN:
-		if (!net_eq(t->net, dev_net(dev)))
-			xfrm_garbage_collect(t->net);
-		break;
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block vti6_notifier_block __read_mostly = {
-	.notifier_call = vti6_device_event,
-};
-
 /**
  * vti6_tunnel_init - register protocol and reserve needed resources
  *
@@ -1182,8 +1155,6 @@ static int __init vti6_tunnel_init(void)
 	const char *msg;
 	int err;
 
-	register_netdevice_notifier(&vti6_notifier_block);
-
 	msg = "tunnel device";
 	err = register_pernet_device(&vti6_net_ops);
 	if (err < 0)
@@ -1216,7 +1187,6 @@ static int __init vti6_tunnel_init(void)
 xfrm_proto_esp_failed:
 	unregister_pernet_device(&vti6_net_ops);
 pernet_dev_failed:
-	unregister_netdevice_notifier(&vti6_notifier_block);
 	pr_err("vti6 init: failed to register %s\n", msg);
 	return err;
 }
@@ -1231,7 +1201,6 @@ static void __exit vti6_tunnel_cleanup(void)
 	xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
 	xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
 	unregister_pernet_device(&vti6_net_ops);
-	unregister_netdevice_notifier(&vti6_notifier_block);
 }
 
 module_init(vti6_tunnel_init);
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC net-next 2/9] net: xfrm: revert to lower xfrm dst gc limit
  2017-06-28 13:26 [RFC ipsec-next] flow cache removal Florian Westphal
  2017-06-28 13:26 ` [RFC net-next 1/9] vti: revert flush x-netns xfrm cache when vti interface is removed Florian Westphal
@ 2017-06-28 13:26 ` Florian Westphal
  2017-06-28 13:26 ` [RFC net-next 3/9] xfrm_policy: bypass flow_cache_lookup Florian Westphal
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Florian Westphal @ 2017-06-28 13:26 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

revert c386578f1cdb4dac230395 ("xfrm: Let the flowcache handle its size by default.").

Once we remove flow cache, we don't have a flow cache limit anymore.
We must not allow (virtually) unlimited allocations of xfrm dst entries.
Revert back to the old xfrm dst gc limits.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 Documentation/networking/ip-sysctl.txt | 6 ++----
 net/ipv4/xfrm4_policy.c                | 2 +-
 net/ipv6/xfrm6_policy.c                | 2 +-
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 974ab47ae53a..f485d553e65c 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1291,8 +1291,7 @@ tag - INTEGER
 xfrm4_gc_thresh - INTEGER
 	The threshold at which we will start garbage collecting for IPv4
 	destination cache entries.  At twice this value the system will
-	refuse new allocations. The value must be set below the flowcache
-	limit (4096 * number of online cpus) to take effect.
+	refuse new allocations.
 
 igmp_link_local_mcast_reports - BOOLEAN
 	Enable IGMP reports for link local multicast groups in the
@@ -1778,8 +1777,7 @@ ratelimit - INTEGER
 xfrm6_gc_thresh - INTEGER
 	The threshold at which we will start garbage collecting for IPv6
 	destination cache entries.  At twice this value the system will
-	refuse new allocations. The value must be set below the flowcache
-	limit (4096 * number of online cpus) to take effect.
+	refuse new allocations.
 
 
 IPv6 Update by:
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 71b4ecc195c7..19455a5fc328 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -266,7 +266,7 @@ static struct dst_ops xfrm4_dst_ops_template = {
 	.destroy =		xfrm4_dst_destroy,
 	.ifdown =		xfrm4_dst_ifdown,
 	.local_out =		__ip_local_out,
-	.gc_thresh =		INT_MAX,
+	.gc_thresh =		32768,
 };
 
 static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 79651bc71bf0..ae30dc4973e8 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -286,7 +286,7 @@ static struct dst_ops xfrm6_dst_ops_template = {
 	.destroy =		xfrm6_dst_destroy,
 	.ifdown =		xfrm6_dst_ifdown,
 	.local_out =		__ip6_local_out,
-	.gc_thresh =		INT_MAX,
+	.gc_thresh =		32768,
 };
 
 static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC net-next 3/9] xfrm_policy: bypass flow_cache_lookup
  2017-06-28 13:26 [RFC ipsec-next] flow cache removal Florian Westphal
  2017-06-28 13:26 ` [RFC net-next 1/9] vti: revert flush x-netns xfrm cache when vti interface is removed Florian Westphal
  2017-06-28 13:26 ` [RFC net-next 2/9] net: xfrm: revert to lower xfrm dst gc limit Florian Westphal
@ 2017-06-28 13:26 ` Florian Westphal
  2017-06-28 13:26 ` [RFC net-next 4/9] xfrm_policy: remove always true/false branches Florian Westphal
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Florian Westphal @ 2017-06-28 13:26 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

Instead of consulting flow cache, call the xfrm bundle/policy lookup
functions directly.  This pretends the flow cache had no entry.

This helps to gradually remove flow cache integration,
followup commit will remove the dead code that this change adds.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/xfrm/xfrm_policy.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index a3dc7ab0b7ed..084736ff2681 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2056,13 +2056,12 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
 }
 
 static struct flow_cache_object *
-xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
-		   struct flow_cache_object *oldflo, void *ctx)
+xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo)
 {
-	struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	struct xfrm_dst *xdst, *new_xdst;
 	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
+	struct flow_cache_object *oldflo = NULL;
 
 	/* Check if the policies from old bundle are usable */
 	xdst = NULL;
@@ -2132,8 +2131,6 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 		dst_release_immediate(&xdst->u.dst);
 	}
 
-	/* We do need to return one reference for original caller */
-	dst_hold(&new_xdst->u.dst);
 	return &new_xdst->flo;
 
 make_dummy_bundle:
@@ -2246,8 +2243,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
 			goto nopol;
 
-		flo = flow_cache_lookup(net, fl, family, dir,
-					xfrm_bundle_lookup, &xflo);
+		flo = xfrm_bundle_lookup(net, fl, family, dir, &xflo);
 		if (flo == NULL)
 			goto nopol;
 		if (IS_ERR(flo)) {
@@ -2493,8 +2489,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	if (!pol) {
 		struct flow_cache_object *flo;
 
-		flo = flow_cache_lookup(net, &fl, family, fl_dir,
-					xfrm_policy_lookup, NULL);
+		flo = xfrm_policy_lookup(net, &fl, family, dir, NULL, NULL);
+
 		if (IS_ERR_OR_NULL(flo))
 			pol = ERR_CAST(flo);
 		else
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC net-next 4/9] xfrm_policy: remove always true/false branches
  2017-06-28 13:26 [RFC ipsec-next] flow cache removal Florian Westphal
                   ` (2 preceding siblings ...)
  2017-06-28 13:26 ` [RFC net-next 3/9] xfrm_policy: bypass flow_cache_lookup Florian Westphal
@ 2017-06-28 13:26 ` Florian Westphal
  2017-06-28 13:26 ` [RFC net-next 5/9] xfrm_policy: kill flow to policy dir conversion Florian Westphal
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Florian Westphal @ 2017-06-28 13:26 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

after previous change oldflo and xdst are always NULL.
These branches were already removed by gcc, this doesn't change code.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/xfrm/xfrm_policy.c | 74 ++++++++++----------------------------------------
 1 file changed, 14 insertions(+), 60 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 084736ff2681..64cef0e601b8 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2060,48 +2060,23 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 {
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	struct xfrm_dst *xdst, *new_xdst;
-	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
-	struct flow_cache_object *oldflo = NULL;
+	int num_pols = 0, num_xfrms = 0, err;
 
 	/* Check if the policies from old bundle are usable */
 	xdst = NULL;
-	if (oldflo) {
-		xdst = container_of(oldflo, struct xfrm_dst, flo);
-		num_pols = xdst->num_pols;
-		num_xfrms = xdst->num_xfrms;
-		pol_dead = 0;
-		for (i = 0; i < num_pols; i++) {
-			pols[i] = xdst->pols[i];
-			pol_dead |= pols[i]->walk.dead;
-		}
-		if (pol_dead) {
-			/* Mark DST_OBSOLETE_DEAD to fail the next
-			 * xfrm_dst_check()
-			 */
-			xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
-			dst_release_immediate(&xdst->u.dst);
-			xdst = NULL;
-			num_pols = 0;
-			num_xfrms = 0;
-			oldflo = NULL;
-		}
-	}
-
 	/* Resolve policies to use if we couldn't get them from
 	 * previous cache entry */
-	if (xdst == NULL) {
-		num_pols = 1;
-		pols[0] = __xfrm_policy_lookup(net, fl, family,
-					       flow_to_policy_dir(dir));
-		err = xfrm_expand_policies(fl, family, pols,
+	num_pols = 1;
+	pols[0] = __xfrm_policy_lookup(net, fl, family,
+				       flow_to_policy_dir(dir));
+	err = xfrm_expand_policies(fl, family, pols,
 					   &num_pols, &num_xfrms);
-		if (err < 0)
-			goto inc_error;
-		if (num_pols == 0)
-			return NULL;
-		if (num_xfrms <= 0)
-			goto make_dummy_bundle;
-	}
+	if (err < 0)
+		goto inc_error;
+	if (num_pols == 0)
+		return NULL;
+	if (num_xfrms <= 0)
+		goto make_dummy_bundle;
 
 	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
 						  xflo->dst_orig);
@@ -2109,26 +2084,10 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 		err = PTR_ERR(new_xdst);
 		if (err != -EAGAIN)
 			goto error;
-		if (oldflo == NULL)
-			goto make_dummy_bundle;
-		dst_hold(&xdst->u.dst);
-		return oldflo;
+		goto make_dummy_bundle;
 	} else if (new_xdst == NULL) {
 		num_xfrms = 0;
-		if (oldflo == NULL)
-			goto make_dummy_bundle;
-		xdst->num_xfrms = 0;
-		dst_hold(&xdst->u.dst);
-		return oldflo;
-	}
-
-	/* Kill the previous bundle */
-	if (xdst) {
-		/* The policies were stolen for newly generated bundle */
-		xdst->num_pols = 0;
-		/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-		xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
-		dst_release_immediate(&xdst->u.dst);
+		goto make_dummy_bundle;
 	}
 
 	return &new_xdst->flo;
@@ -2152,12 +2111,7 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 inc_error:
 	XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
 error:
-	if (xdst != NULL) {
-		/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-		xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
-		dst_release_immediate(&xdst->u.dst);
-	} else
-		xfrm_pols_put(pols, num_pols);
+	xfrm_pols_put(pols, num_pols);
 	return ERR_PTR(err);
 }
 
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC net-next 5/9] xfrm_policy: kill flow to policy dir conversion
  2017-06-28 13:26 [RFC ipsec-next] flow cache removal Florian Westphal
                   ` (3 preceding siblings ...)
  2017-06-28 13:26 ` [RFC net-next 4/9] xfrm_policy: remove always true/false branches Florian Westphal
@ 2017-06-28 13:26 ` Florian Westphal
  2017-06-28 13:26 ` [RFC net-next 6/9] xfrm_policy: remove xfrm_policy_lookup Florian Westphal
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Florian Westphal @ 2017-06-28 13:26 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

XFRM_POLICY_IN/OUT/FWD are identical to FLOW_DIR_*, so gcc already
removed this function as its just returns the argument.  Again, no
code change.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/xfrm/xfrm_policy.c | 46 ++++------------------------------------------
 1 file changed, 4 insertions(+), 42 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 64cef0e601b8..626351915a97 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1191,24 +1191,6 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir
 	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
 }
 
-static int flow_to_policy_dir(int dir)
-{
-	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
-	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
-	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
-		return dir;
-
-	switch (dir) {
-	default:
-	case FLOW_DIR_IN:
-		return XFRM_POLICY_IN;
-	case FLOW_DIR_OUT:
-		return XFRM_POLICY_OUT;
-	case FLOW_DIR_FWD:
-		return XFRM_POLICY_FWD;
-	}
-}
-
 static struct flow_cache_object *
 xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
 		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
@@ -1218,7 +1200,7 @@ xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
 	if (old_obj)
 		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
 
-	pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
+	pol = __xfrm_policy_lookup(net, fl, family, dir);
 	if (IS_ERR_OR_NULL(pol))
 		return ERR_CAST(pol);
 
@@ -1229,23 +1211,6 @@ xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
 	return &pol->flo;
 }
 
-static inline int policy_to_flow_dir(int dir)
-{
-	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
-	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
-	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
-		return dir;
-	switch (dir) {
-	default:
-	case XFRM_POLICY_IN:
-		return FLOW_DIR_IN;
-	case XFRM_POLICY_OUT:
-		return FLOW_DIR_OUT;
-	case XFRM_POLICY_FWD:
-		return FLOW_DIR_FWD;
-	}
-}
-
 static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 						 const struct flowi *fl, u16 family)
 {
@@ -1265,7 +1230,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 			}
 			err = security_xfrm_policy_lookup(pol->security,
 						      fl->flowi_secid,
-						      policy_to_flow_dir(dir));
+						      dir);
 			if (!err) {
 				if (!xfrm_pol_hold_rcu(pol))
 					goto again;
@@ -2067,8 +2032,7 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 	/* Resolve policies to use if we couldn't get them from
 	 * previous cache entry */
 	num_pols = 1;
-	pols[0] = __xfrm_policy_lookup(net, fl, family,
-				       flow_to_policy_dir(dir));
+	pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
 	err = xfrm_expand_policies(fl, family, pols,
 					   &num_pols, &num_xfrms);
 	if (err < 0)
@@ -2146,7 +2110,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 	struct xfrm_dst *xdst;
 	struct dst_entry *dst, *route;
 	u16 family = dst_orig->ops->family;
-	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+	u8 dir = XFRM_POLICY_OUT;
 	int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
 
 	dst = NULL;
@@ -2403,12 +2367,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	int pi;
 	int reverse;
 	struct flowi fl;
-	u8 fl_dir;
 	int xerr_idx = -1;
 
 	reverse = dir & ~XFRM_POLICY_MASK;
 	dir &= XFRM_POLICY_MASK;
-	fl_dir = policy_to_flow_dir(dir);
 
 	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC net-next 6/9] xfrm_policy: remove xfrm_policy_lookup
  2017-06-28 13:26 [RFC ipsec-next] flow cache removal Florian Westphal
                   ` (4 preceding siblings ...)
  2017-06-28 13:26 ` [RFC net-next 5/9] xfrm_policy: kill flow to policy dir conversion Florian Westphal
@ 2017-06-28 13:26 ` Florian Westphal
  2017-06-28 13:26 ` [RFC net-next 7/9] xfrm_policy: make xfrm_bundle_lookup return xfrm dst object Florian Westphal
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Florian Westphal @ 2017-06-28 13:26 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

This removes the wrapper and renames the __xfrm_policy_lookup variant
to get rid of another place that used flow cache objects.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/xfrm/xfrm_policy.c | 36 ++++--------------------------------
 1 file changed, 4 insertions(+), 32 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 626351915a97..86907731f161 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1179,7 +1179,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 }
 
 static struct xfrm_policy *
-__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
+xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
 {
 #ifdef CONFIG_XFRM_SUB_POLICY
 	struct xfrm_policy *pol;
@@ -1191,26 +1191,6 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir
 	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
 }
 
-static struct flow_cache_object *
-xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
-		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
-{
-	struct xfrm_policy *pol;
-
-	if (old_obj)
-		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
-
-	pol = __xfrm_policy_lookup(net, fl, family, dir);
-	if (IS_ERR_OR_NULL(pol))
-		return ERR_CAST(pol);
-
-	/* Resolver returns two references:
-	 * one for cache and one for caller of flow_cache_lookup() */
-	xfrm_pol_hold(pol);
-
-	return &pol->flo;
-}
-
 static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 						 const struct flowi *fl, u16 family)
 {
@@ -2032,7 +2012,7 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 	/* Resolve policies to use if we couldn't get them from
 	 * previous cache entry */
 	num_pols = 1;
-	pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
+	pols[0] = xfrm_policy_lookup(net, fl, family, dir);
 	err = xfrm_expand_policies(fl, family, pols,
 					   &num_pols, &num_xfrms);
 	if (err < 0)
@@ -2402,16 +2382,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		}
 	}
 
-	if (!pol) {
-		struct flow_cache_object *flo;
-
-		flo = xfrm_policy_lookup(net, &fl, family, dir, NULL, NULL);
-
-		if (IS_ERR_OR_NULL(flo))
-			pol = ERR_CAST(flo);
-		else
-			pol = container_of(flo, struct xfrm_policy, flo);
-	}
+	if (!pol)
+		pol = xfrm_policy_lookup(net, &fl, family, dir);
 
 	if (IS_ERR(pol)) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC net-next 7/9] xfrm_policy: make xfrm_bundle_lookup return xfrm dst object
  2017-06-28 13:26 [RFC ipsec-next] flow cache removal Florian Westphal
                   ` (5 preceding siblings ...)
  2017-06-28 13:26 ` [RFC net-next 6/9] xfrm_policy: remove xfrm_policy_lookup Florian Westphal
@ 2017-06-28 13:26 ` Florian Westphal
  2017-06-28 13:26 ` [RFC net-next 8/9] xfrm: remove flow cache Florian Westphal
  2017-06-28 13:26 ` [RFC net-next 9/9] xfrm: add a small xdst pcpu cache Florian Westphal
  8 siblings, 0 replies; 13+ messages in thread
From: Florian Westphal @ 2017-06-28 13:26 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

This allows to remove flow cache object embedded in struct xfrm_dst.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/xfrm/xfrm_policy.c | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 86907731f161..5bb049d8e8d5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2000,15 +2000,13 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
 	goto out;
 }
 
-static struct flow_cache_object *
+static struct xfrm_dst *
 xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo)
 {
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
-	struct xfrm_dst *xdst, *new_xdst;
 	int num_pols = 0, num_xfrms = 0, err;
+	struct xfrm_dst *xdst;
 
-	/* Check if the policies from old bundle are usable */
-	xdst = NULL;
 	/* Resolve policies to use if we couldn't get them from
 	 * previous cache entry */
 	num_pols = 1;
@@ -2022,19 +2020,19 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 	if (num_xfrms <= 0)
 		goto make_dummy_bundle;
 
-	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
+	xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
 						  xflo->dst_orig);
-	if (IS_ERR(new_xdst)) {
-		err = PTR_ERR(new_xdst);
+	if (IS_ERR(xdst)) {
+		err = PTR_ERR(xdst);
 		if (err != -EAGAIN)
 			goto error;
 		goto make_dummy_bundle;
-	} else if (new_xdst == NULL) {
+	} else if (xdst == NULL) {
 		num_xfrms = 0;
 		goto make_dummy_bundle;
 	}
 
-	return &new_xdst->flo;
+	return xdst;
 
 make_dummy_bundle:
 	/* We found policies, but there's no bundles to instantiate:
@@ -2050,7 +2048,7 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 
 	dst_hold(&xdst->u.dst);
-	return &xdst->flo;
+	return xdst;
 
 inc_error:
 	XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
@@ -2086,7 +2084,6 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 			      const struct sock *sk, int flags)
 {
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
-	struct flow_cache_object *flo;
 	struct xfrm_dst *xdst;
 	struct dst_entry *dst, *route;
 	u16 family = dst_orig->ops->family;
@@ -2141,14 +2138,13 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
 			goto nopol;
 
-		flo = xfrm_bundle_lookup(net, fl, family, dir, &xflo);
-		if (flo == NULL)
+		xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo);
+		if (xdst == NULL)
 			goto nopol;
-		if (IS_ERR(flo)) {
-			err = PTR_ERR(flo);
+		if (IS_ERR(xdst)) {
+			err = PTR_ERR(xdst);
 			goto dropdst;
 		}
-		xdst = container_of(flo, struct xfrm_dst, flo);
 
 		num_pols = xdst->num_pols;
 		num_xfrms = xdst->num_xfrms;
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC net-next 8/9] xfrm: remove flow cache
  2017-06-28 13:26 [RFC ipsec-next] flow cache removal Florian Westphal
                   ` (6 preceding siblings ...)
  2017-06-28 13:26 ` [RFC net-next 7/9] xfrm_policy: make xfrm_bundle_lookup return xfrm dst object Florian Westphal
@ 2017-06-28 13:26 ` Florian Westphal
  2017-06-28 13:26 ` [RFC net-next 9/9] xfrm: add a small xdst pcpu cache Florian Westphal
  8 siblings, 0 replies; 13+ messages in thread
From: Florian Westphal @ 2017-06-28 13:26 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

After rcu conversions performance degradation in forward tests isn't that
noticeable anymore.

See next patch for some numbers.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/flow.h              |  34 ---
 include/net/flowcache.h         |  25 --
 include/net/netns/xfrm.h        |  11 -
 include/net/xfrm.h              |   8 -
 net/core/Makefile               |   1 -
 net/core/flow.c                 | 516 ----------------------------------------
 net/ipv4/xfrm4_policy.c         |   9 -
 net/ipv6/xfrm6_policy.c         |   9 -
 net/key/af_key.c                |   4 -
 net/xfrm/xfrm_device.c          |   2 -
 net/xfrm/xfrm_policy.c          | 108 ---------
 net/xfrm/xfrm_user.c            |   2 -
 security/selinux/include/xfrm.h |   4 +-
 13 files changed, 1 insertion(+), 732 deletions(-)
 delete mode 100644 include/net/flowcache.h
 delete mode 100644 net/core/flow.c

diff --git a/include/net/flow.h b/include/net/flow.h
index bae198b3039e..f3dc61b29bb5 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -218,40 +218,6 @@ static inline unsigned int flow_key_size(u16 family)
 	return 0;
 }
 
-#define FLOW_DIR_IN	0
-#define FLOW_DIR_OUT	1
-#define FLOW_DIR_FWD	2
-
-struct net;
-struct sock;
-struct flow_cache_ops;
-
-struct flow_cache_object {
-	const struct flow_cache_ops *ops;
-};
-
-struct flow_cache_ops {
-	struct flow_cache_object *(*get)(struct flow_cache_object *);
-	int (*check)(struct flow_cache_object *);
-	void (*delete)(struct flow_cache_object *);
-};
-
-typedef struct flow_cache_object *(*flow_resolve_t)(
-		struct net *net, const struct flowi *key, u16 family,
-		u8 dir, struct flow_cache_object *oldobj, void *ctx);
-
-struct flow_cache_object *flow_cache_lookup(struct net *net,
-					    const struct flowi *key, u16 family,
-					    u8 dir, flow_resolve_t resolver,
-					    void *ctx);
-int flow_cache_init(struct net *net);
-void flow_cache_fini(struct net *net);
-void flow_cache_hp_init(void);
-
-void flow_cache_flush(struct net *net);
-void flow_cache_flush_deferred(struct net *net);
-extern atomic_t flow_cache_genid;
-
 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
 
 static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
diff --git a/include/net/flowcache.h b/include/net/flowcache.h
deleted file mode 100644
index 51eb971e8973..000000000000
--- a/include/net/flowcache.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _NET_FLOWCACHE_H
-#define _NET_FLOWCACHE_H
-
-#include <linux/interrupt.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/notifier.h>
-
-struct flow_cache_percpu {
-	struct hlist_head		*hash_table;
-	unsigned int			hash_count;
-	u32				hash_rnd;
-	int				hash_rnd_recalc;
-	struct tasklet_struct		flush_tasklet;
-};
-
-struct flow_cache {
-	u32				hash_shift;
-	struct flow_cache_percpu __percpu *percpu;
-	struct hlist_node		node;
-	unsigned int			low_watermark;
-	unsigned int			high_watermark;
-	struct timer_list		rnd_timer;
-};
-#endif	/* _NET_FLOWCACHE_H */
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 27bb9633c69d..611521646dd4 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -6,7 +6,6 @@
 #include <linux/workqueue.h>
 #include <linux/xfrm.h>
 #include <net/dst_ops.h>
-#include <net/flowcache.h>
 
 struct ctl_table_header;
 
@@ -73,16 +72,6 @@ struct netns_xfrm {
 	spinlock_t xfrm_state_lock;
 	spinlock_t xfrm_policy_lock;
 	struct mutex xfrm_cfg_mutex;
-
-	/* flow cache part */
-	struct flow_cache	flow_cache_global;
-	atomic_t		flow_cache_genid;
-	struct list_head	flow_cache_gc_list;
-	atomic_t		flow_cache_gc_count;
-	spinlock_t		flow_cache_gc_lock;
-	struct work_struct	flow_cache_gc_work;
-	struct work_struct	flow_cache_flush_work;
-	struct mutex		flow_flush_sem;
 };
 
 #endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 01f5bc144ee5..9b85367529a4 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -562,7 +562,6 @@ struct xfrm_policy {
 	atomic_t		refcnt;
 	struct timer_list	timer;
 
-	struct flow_cache_object flo;
 	atomic_t		genid;
 	u32			priority;
 	u32			index;
@@ -977,7 +976,6 @@ struct xfrm_dst {
 		struct rt6_info		rt6;
 	} u;
 	struct dst_entry *route;
-	struct flow_cache_object flo;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	int num_pols, num_xfrms;
 	u32 xfrm_genid;
@@ -1225,9 +1223,6 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
 	}
 }
 
-void xfrm_garbage_collect(struct net *net);
-void xfrm_garbage_collect_deferred(struct net *net);
-
 #else
 
 static inline void xfrm_sk_free_policy(struct sock *sk) {}
@@ -1262,9 +1257,6 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
 {
 	return 1;
 }
-static inline void xfrm_garbage_collect(struct net *net)
-{
-}
 #endif
 
 static __inline__
diff --git a/net/core/Makefile b/net/core/Makefile
index 79f9479e9658..d501c4278015 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -11,7 +11,6 @@ obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
 			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
 
-obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
diff --git a/net/core/flow.c b/net/core/flow.c
deleted file mode 100644
index f7f5d1932a27..000000000000
--- a/net/core/flow.c
+++ /dev/null
@@ -1,516 +0,0 @@
-/* flow.c: Generic flow cache.
- *
- * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
- * Copyright (C) 2003 David S. Miller (davem@redhat.com)
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/jhash.h>
-#include <linux/interrupt.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/completion.h>
-#include <linux/percpu.h>
-#include <linux/bitops.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/mutex.h>
-#include <net/flow.h>
-#include <linux/atomic.h>
-#include <linux/security.h>
-#include <net/net_namespace.h>
-
-struct flow_cache_entry {
-	union {
-		struct hlist_node	hlist;
-		struct list_head	gc_list;
-	} u;
-	struct net			*net;
-	u16				family;
-	u8				dir;
-	u32				genid;
-	struct flowi			key;
-	struct flow_cache_object	*object;
-};
-
-struct flow_flush_info {
-	struct flow_cache		*cache;
-	atomic_t			cpuleft;
-	struct completion		completion;
-};
-
-static struct kmem_cache *flow_cachep __read_mostly;
-
-#define flow_cache_hash_size(cache)	(1U << (cache)->hash_shift)
-#define FLOW_HASH_RND_PERIOD		(10 * 60 * HZ)
-
-static void flow_cache_new_hashrnd(unsigned long arg)
-{
-	struct flow_cache *fc = (void *) arg;
-	int i;
-
-	for_each_possible_cpu(i)
-		per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
-
-	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&fc->rnd_timer);
-}
-
-static int flow_entry_valid(struct flow_cache_entry *fle,
-				struct netns_xfrm *xfrm)
-{
-	if (atomic_read(&xfrm->flow_cache_genid) != fle->genid)
-		return 0;
-	if (fle->object && !fle->object->ops->check(fle->object))
-		return 0;
-	return 1;
-}
-
-static void flow_entry_kill(struct flow_cache_entry *fle,
-				struct netns_xfrm *xfrm)
-{
-	if (fle->object)
-		fle->object->ops->delete(fle->object);
-	kmem_cache_free(flow_cachep, fle);
-}
-
-static void flow_cache_gc_task(struct work_struct *work)
-{
-	struct list_head gc_list;
-	struct flow_cache_entry *fce, *n;
-	struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
-						flow_cache_gc_work);
-
-	INIT_LIST_HEAD(&gc_list);
-	spin_lock_bh(&xfrm->flow_cache_gc_lock);
-	list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
-	spin_unlock_bh(&xfrm->flow_cache_gc_lock);
-
-	list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) {
-		flow_entry_kill(fce, xfrm);
-		atomic_dec(&xfrm->flow_cache_gc_count);
-	}
-}
-
-static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
-				     unsigned int deleted,
-				     struct list_head *gc_list,
-				     struct netns_xfrm *xfrm)
-{
-	if (deleted) {
-		atomic_add(deleted, &xfrm->flow_cache_gc_count);
-		fcp->hash_count -= deleted;
-		spin_lock_bh(&xfrm->flow_cache_gc_lock);
-		list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
-		spin_unlock_bh(&xfrm->flow_cache_gc_lock);
-		schedule_work(&xfrm->flow_cache_gc_work);
-	}
-}
-
-static void __flow_cache_shrink(struct flow_cache *fc,
-				struct flow_cache_percpu *fcp,
-				unsigned int shrink_to)
-{
-	struct flow_cache_entry *fle;
-	struct hlist_node *tmp;
-	LIST_HEAD(gc_list);
-	unsigned int deleted = 0;
-	struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
-						flow_cache_global);
-	unsigned int i;
-
-	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		unsigned int saved = 0;
-
-		hlist_for_each_entry_safe(fle, tmp,
-					  &fcp->hash_table[i], u.hlist) {
-			if (saved < shrink_to &&
-			    flow_entry_valid(fle, xfrm)) {
-				saved++;
-			} else {
-				deleted++;
-				hlist_del(&fle->u.hlist);
-				list_add_tail(&fle->u.gc_list, &gc_list);
-			}
-		}
-	}
-
-	flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
-}
-
-static void flow_cache_shrink(struct flow_cache *fc,
-			      struct flow_cache_percpu *fcp)
-{
-	unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
-
-	__flow_cache_shrink(fc, fcp, shrink_to);
-}
-
-static void flow_new_hash_rnd(struct flow_cache *fc,
-			      struct flow_cache_percpu *fcp)
-{
-	get_random_bytes(&fcp->hash_rnd, sizeof(u32));
-	fcp->hash_rnd_recalc = 0;
-	__flow_cache_shrink(fc, fcp, 0);
-}
-
-static u32 flow_hash_code(struct flow_cache *fc,
-			  struct flow_cache_percpu *fcp,
-			  const struct flowi *key,
-			  unsigned int keysize)
-{
-	const u32 *k = (const u32 *) key;
-	const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
-
-	return jhash2(k, length, fcp->hash_rnd)
-		& (flow_cache_hash_size(fc) - 1);
-}
-
-/* I hear what you're saying, use memcmp.  But memcmp cannot make
- * important assumptions that we can here, such as alignment.
- */
-static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
-			    unsigned int keysize)
-{
-	const flow_compare_t *k1, *k1_lim, *k2;
-
-	k1 = (const flow_compare_t *) key1;
-	k1_lim = k1 + keysize;
-
-	k2 = (const flow_compare_t *) key2;
-
-	do {
-		if (*k1++ != *k2++)
-			return 1;
-	} while (k1 < k1_lim);
-
-	return 0;
-}
-
-struct flow_cache_object *
-flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
-		  flow_resolve_t resolver, void *ctx)
-{
-	struct flow_cache *fc = &net->xfrm.flow_cache_global;
-	struct flow_cache_percpu *fcp;
-	struct flow_cache_entry *fle, *tfle;
-	struct flow_cache_object *flo;
-	unsigned int keysize;
-	unsigned int hash;
-
-	local_bh_disable();
-	fcp = this_cpu_ptr(fc->percpu);
-
-	fle = NULL;
-	flo = NULL;
-
-	keysize = flow_key_size(family);
-	if (!keysize)
-		goto nocache;
-
-	/* Packet really early in init?  Making flow_cache_init a
-	 * pre-smp initcall would solve this.  --RR */
-	if (!fcp->hash_table)
-		goto nocache;
-
-	if (fcp->hash_rnd_recalc)
-		flow_new_hash_rnd(fc, fcp);
-
-	hash = flow_hash_code(fc, fcp, key, keysize);
-	hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) {
-		if (tfle->net == net &&
-		    tfle->family == family &&
-		    tfle->dir == dir &&
-		    flow_key_compare(key, &tfle->key, keysize) == 0) {
-			fle = tfle;
-			break;
-		}
-	}
-
-	if (unlikely(!fle)) {
-		if (fcp->hash_count > fc->high_watermark)
-			flow_cache_shrink(fc, fcp);
-
-		if (atomic_read(&net->xfrm.flow_cache_gc_count) >
-		    2 * num_online_cpus() * fc->high_watermark) {
-			flo = ERR_PTR(-ENOBUFS);
-			goto ret_object;
-		}
-
-		fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
-		if (fle) {
-			fle->net = net;
-			fle->family = family;
-			fle->dir = dir;
-			memcpy(&fle->key, key, keysize * sizeof(flow_compare_t));
-			fle->object = NULL;
-			hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
-			fcp->hash_count++;
-		}
-	} else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) {
-		flo = fle->object;
-		if (!flo)
-			goto ret_object;
-		flo = flo->ops->get(flo);
-		if (flo)
-			goto ret_object;
-	} else if (fle->object) {
-	        flo = fle->object;
-	        flo->ops->delete(flo);
-	        fle->object = NULL;
-	}
-
-nocache:
-	flo = NULL;
-	if (fle) {
-		flo = fle->object;
-		fle->object = NULL;
-	}
-	flo = resolver(net, key, family, dir, flo, ctx);
-	if (fle) {
-		fle->genid = atomic_read(&net->xfrm.flow_cache_genid);
-		if (!IS_ERR(flo))
-			fle->object = flo;
-		else
-			fle->genid--;
-	} else {
-		if (!IS_ERR_OR_NULL(flo))
-			flo->ops->delete(flo);
-	}
-ret_object:
-	local_bh_enable();
-	return flo;
-}
-EXPORT_SYMBOL(flow_cache_lookup);
-
-static void flow_cache_flush_tasklet(unsigned long data)
-{
-	struct flow_flush_info *info = (void *)data;
-	struct flow_cache *fc = info->cache;
-	struct flow_cache_percpu *fcp;
-	struct flow_cache_entry *fle;
-	struct hlist_node *tmp;
-	LIST_HEAD(gc_list);
-	unsigned int deleted = 0;
-	struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
-						flow_cache_global);
-	unsigned int i;
-
-	fcp = this_cpu_ptr(fc->percpu);
-	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		hlist_for_each_entry_safe(fle, tmp,
-					  &fcp->hash_table[i], u.hlist) {
-			if (flow_entry_valid(fle, xfrm))
-				continue;
-
-			deleted++;
-			hlist_del(&fle->u.hlist);
-			list_add_tail(&fle->u.gc_list, &gc_list);
-		}
-	}
-
-	flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
-
-	if (atomic_dec_and_test(&info->cpuleft))
-		complete(&info->completion);
-}
-
-/*
- * Return whether a cpu needs flushing.  Conservatively, we assume
- * the presence of any entries means the core may require flushing,
- * since the flow_cache_ops.check() function may assume it's running
- * on the same core as the per-cpu cache component.
- */
-static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
-{
-	struct flow_cache_percpu *fcp;
-	unsigned int i;
-
-	fcp = per_cpu_ptr(fc->percpu, cpu);
-	for (i = 0; i < flow_cache_hash_size(fc); i++)
-		if (!hlist_empty(&fcp->hash_table[i]))
-			return 0;
-	return 1;
-}
-
-static void flow_cache_flush_per_cpu(void *data)
-{
-	struct flow_flush_info *info = data;
-	struct tasklet_struct *tasklet;
-
-	tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet;
-	tasklet->data = (unsigned long)info;
-	tasklet_schedule(tasklet);
-}
-
-void flow_cache_flush(struct net *net)
-{
-	struct flow_flush_info info;
-	cpumask_var_t mask;
-	int i, self;
-
-	/* Track which cpus need flushing to avoid disturbing all cores. */
-	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-		return;
-	cpumask_clear(mask);
-
-	/* Don't want cpus going down or up during this. */
-	get_online_cpus();
-	mutex_lock(&net->xfrm.flow_flush_sem);
-	info.cache = &net->xfrm.flow_cache_global;
-	for_each_online_cpu(i)
-		if (!flow_cache_percpu_empty(info.cache, i))
-			cpumask_set_cpu(i, mask);
-	atomic_set(&info.cpuleft, cpumask_weight(mask));
-	if (atomic_read(&info.cpuleft) == 0)
-		goto done;
-
-	init_completion(&info.completion);
-
-	local_bh_disable();
-	self = cpumask_test_and_clear_cpu(smp_processor_id(), mask);
-	on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0);
-	if (self)
-		flow_cache_flush_tasklet((unsigned long)&info);
-	local_bh_enable();
-
-	wait_for_completion(&info.completion);
-
-done:
-	mutex_unlock(&net->xfrm.flow_flush_sem);
-	put_online_cpus();
-	free_cpumask_var(mask);
-}
-
-static void flow_cache_flush_task(struct work_struct *work)
-{
-	struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
-						flow_cache_flush_work);
-	struct net *net = container_of(xfrm, struct net, xfrm);
-
-	flow_cache_flush(net);
-}
-
-void flow_cache_flush_deferred(struct net *net)
-{
-	schedule_work(&net->xfrm.flow_cache_flush_work);
-}
-
-static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
-{
-	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
-	unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
-
-	if (!fcp->hash_table) {
-		fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
-		if (!fcp->hash_table) {
-			pr_err("NET: failed to allocate flow cache sz %u\n", sz);
-			return -ENOMEM;
-		}
-		fcp->hash_rnd_recalc = 1;
-		fcp->hash_count = 0;
-		tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
-	}
-	return 0;
-}
-
-static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node)
-{
-	struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
-
-	return flow_cache_cpu_prepare(fc, cpu);
-}
-
-static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node)
-{
-	struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
-	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
-
-	__flow_cache_shrink(fc, fcp, 0);
-	return 0;
-}
-
-int flow_cache_init(struct net *net)
-{
-	int i;
-	struct flow_cache *fc = &net->xfrm.flow_cache_global;
-
-	if (!flow_cachep)
-		flow_cachep = kmem_cache_create("flow_cache",
-						sizeof(struct flow_cache_entry),
-						0, SLAB_PANIC, NULL);
-	spin_lock_init(&net->xfrm.flow_cache_gc_lock);
-	INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list);
-	INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
-	INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
-	mutex_init(&net->xfrm.flow_flush_sem);
-	atomic_set(&net->xfrm.flow_cache_gc_count, 0);
-
-	fc->hash_shift = 10;
-	fc->low_watermark = 2 * flow_cache_hash_size(fc);
-	fc->high_watermark = 4 * flow_cache_hash_size(fc);
-
-	fc->percpu = alloc_percpu(struct flow_cache_percpu);
-	if (!fc->percpu)
-		return -ENOMEM;
-
-	if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node))
-		goto err;
-
-	setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
-		    (unsigned long) fc);
-	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&fc->rnd_timer);
-
-	return 0;
-
-err:
-	for_each_possible_cpu(i) {
-		struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
-		kfree(fcp->hash_table);
-		fcp->hash_table = NULL;
-	}
-
-	free_percpu(fc->percpu);
-	fc->percpu = NULL;
-
-	return -ENOMEM;
-}
-EXPORT_SYMBOL(flow_cache_init);
-
-void flow_cache_fini(struct net *net)
-{
-	int i;
-	struct flow_cache *fc = &net->xfrm.flow_cache_global;
-
-	del_timer_sync(&fc->rnd_timer);
-
-	cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node);
-
-	for_each_possible_cpu(i) {
-		struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
-		kfree(fcp->hash_table);
-		fcp->hash_table = NULL;
-	}
-
-	free_percpu(fc->percpu);
-	fc->percpu = NULL;
-}
-EXPORT_SYMBOL(flow_cache_fini);
-
-void __init flow_cache_hp_init(void)
-{
-	int ret;
-
-	ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE,
-				      "net/flow:prepare",
-				      flow_cache_cpu_up_prep,
-				      flow_cache_cpu_dead);
-	WARN_ON(ret < 0);
-}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 19455a5fc328..4aefb149fe0a 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -213,14 +213,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 	fl4->flowi4_tos = iph->tos;
 }
 
-static inline int xfrm4_garbage_collect(struct dst_ops *ops)
-{
-	struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
-
-	xfrm_garbage_collect_deferred(net);
-	return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
-}
-
 static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
 			      struct sk_buff *skb, u32 mtu)
 {
@@ -259,7 +251,6 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static struct dst_ops xfrm4_dst_ops_template = {
 	.family =		AF_INET,
-	.gc =			xfrm4_garbage_collect,
 	.update_pmtu =		xfrm4_update_pmtu,
 	.redirect =		xfrm4_redirect,
 	.cow_metrics =		dst_cow_metrics_generic,
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ae30dc4973e8..f44b25a48478 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -214,14 +214,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	}
 }
 
-static inline int xfrm6_garbage_collect(struct dst_ops *ops)
-{
-	struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
-
-	xfrm_garbage_collect_deferred(net);
-	return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
-}
-
 static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
 			      struct sk_buff *skb, u32 mtu)
 {
@@ -279,7 +271,6 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static struct dst_ops xfrm6_dst_ops_template = {
 	.family =		AF_INET6,
-	.gc =			xfrm6_garbage_collect,
 	.update_pmtu =		xfrm6_update_pmtu,
 	.redirect =		xfrm6_redirect,
 	.cow_metrics =		dst_cow_metrics_generic,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ce9b8565d825..53d5416bf5fc 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2389,8 +2389,6 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
 
 out:
 	xfrm_pol_put(xp);
-	if (err == 0)
-		xfrm_garbage_collect(net);
 	return err;
 }
 
@@ -2641,8 +2639,6 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
 
 out:
 	xfrm_pol_put(xp);
-	if (delete && err == 0)
-		xfrm_garbage_collect(net);
 	return err;
 }
 
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 6d4a60d1bf19..d01cb256e89c 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -173,8 +173,6 @@ static int xfrm_dev_down(struct net_device *dev)
 	if (dev->features & NETIF_F_HW_ESP)
 		xfrm_dev_state_flush(dev_net(dev), dev, true);
 
-	xfrm_garbage_collect(dev_net(dev));
-
 	return NOTIFY_DONE;
 }
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 5bb049d8e8d5..f4419d1b9f38 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -246,36 +246,6 @@ static void xfrm_policy_timer(unsigned long data)
 	xfrm_pol_put(xp);
 }
 
-static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
-{
-	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
-
-	if (unlikely(pol->walk.dead))
-		flo = NULL;
-	else
-		xfrm_pol_hold(pol);
-
-	return flo;
-}
-
-static int xfrm_policy_flo_check(struct flow_cache_object *flo)
-{
-	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
-
-	return !pol->walk.dead;
-}
-
-static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
-{
-	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
-}
-
-static const struct flow_cache_ops xfrm_policy_fc_ops = {
-	.get = xfrm_policy_flo_get,
-	.check = xfrm_policy_flo_check,
-	.delete = xfrm_policy_flo_delete,
-};
-
 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
  * SPD calls.
  */
@@ -298,7 +268,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 				(unsigned long)policy);
 		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
 			    (unsigned long)policy);
-		policy->flo.ops = &xfrm_policy_fc_ops;
 	}
 	return policy;
 }
@@ -798,7 +767,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	else
 		hlist_add_head(&policy->bydst, chain);
 	__xfrm_policy_link(policy, dir);
-	atomic_inc(&net->xfrm.flow_cache_genid);
 
 	/* After previous checking, family can either be AF_INET or AF_INET6 */
 	if (policy->family == AF_INET)
@@ -1006,10 +974,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
 		err = -ESRCH;
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
-
-	if (cnt)
-		xfrm_garbage_collect(net);
-
 	return err;
 }
 EXPORT_SYMBOL(xfrm_policy_flush);
@@ -1494,58 +1458,6 @@ static int xfrm_get_tos(const struct flowi *fl, int family)
 	return tos;
 }
 
-static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
-{
-	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-	struct dst_entry *dst = &xdst->u.dst;
-
-	if (xdst->route == NULL) {
-		/* Dummy bundle - if it has xfrms we were not
-		 * able to build bundle as template resolution failed.
-		 * It means we need to try again resolving. */
-		if (xdst->num_xfrms > 0)
-			return NULL;
-	} else if (dst->flags & DST_XFRM_QUEUE) {
-		return NULL;
-	} else {
-		/* Real bundle */
-		if (stale_bundle(dst))
-			return NULL;
-	}
-
-	dst_hold(dst);
-	return flo;
-}
-
-static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
-{
-	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-	struct dst_entry *dst = &xdst->u.dst;
-
-	if (!xdst->route)
-		return 0;
-	if (stale_bundle(dst))
-		return 0;
-
-	return 1;
-}
-
-static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
-{
-	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-	struct dst_entry *dst = &xdst->u.dst;
-
-	/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-	dst->obsolete = DST_OBSOLETE_DEAD;
-	dst_release_immediate(dst);
-}
-
-static const struct flow_cache_ops xfrm_bundle_fc_ops = {
-	.get = xfrm_bundle_flo_get,
-	.check = xfrm_bundle_flo_check,
-	.delete = xfrm_bundle_flo_delete,
-};
-
 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 {
 	const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1573,7 +1485,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 		struct dst_entry *dst = &xdst->u.dst;
 
 		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
-		xdst->flo.ops = &xfrm_bundle_fc_ops;
 	} else
 		xdst = ERR_PTR(-ENOBUFS);
 
@@ -2569,18 +2480,6 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
 	return dst;
 }
 
-void xfrm_garbage_collect(struct net *net)
-{
-	flow_cache_flush(net);
-}
-EXPORT_SYMBOL(xfrm_garbage_collect);
-
-void xfrm_garbage_collect_deferred(struct net *net)
-{
-	flow_cache_flush_deferred(net);
-}
-EXPORT_SYMBOL(xfrm_garbage_collect_deferred);
-
 static void xfrm_init_pmtu(struct dst_entry *dst)
 {
 	do {
@@ -2918,14 +2817,9 @@ static int __net_init xfrm_net_init(struct net *net)
 	rv = xfrm_sysctl_init(net);
 	if (rv < 0)
 		goto out_sysctl;
-	rv = flow_cache_init(net);
-	if (rv < 0)
-		goto out;
 
 	return 0;
 
-out:
-	xfrm_sysctl_fini(net);
 out_sysctl:
 	xfrm_policy_fini(net);
 out_policy:
@@ -2938,7 +2832,6 @@ static int __net_init xfrm_net_init(struct net *net)
 
 static void __net_exit xfrm_net_exit(struct net *net)
 {
-	flow_cache_fini(net);
 	xfrm_sysctl_fini(net);
 	xfrm_policy_fini(net);
 	xfrm_state_fini(net);
@@ -2952,7 +2845,6 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
 
 void __init xfrm_init(void)
 {
-	flow_cache_hp_init();
 	register_pernet_subsys(&xfrm_net_ops);
 	seqcount_init(&xfrm_policy_hash_generation);
 	xfrm_input_init();
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 6197c7231bc7..1b539b7dcfab 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1815,8 +1815,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 out:
 	xfrm_pol_put(xp);
-	if (delete && err == 0)
-		xfrm_garbage_collect(net);
 	return err;
 }
 
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 1450f85b946d..36a7ce9e11ff 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -47,10 +47,8 @@ static inline void selinux_xfrm_notify_policyload(void)
 	struct net *net;
 
 	rtnl_lock();
-	for_each_net(net) {
-		atomic_inc(&net->xfrm.flow_cache_genid);
+	for_each_net(net)
 		rt_genid_bump_all(net);
-	}
 	rtnl_unlock();
 }
 #else
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC net-next 9/9] xfrm: add a small xdst pcpu cache
  2017-06-28 13:26 [RFC ipsec-next] flow cache removal Florian Westphal
                   ` (7 preceding siblings ...)
  2017-06-28 13:26 ` [RFC net-next 8/9] xfrm: remove flow cache Florian Westphal
@ 2017-06-28 13:26 ` Florian Westphal
  2017-06-29 13:06   ` Ilan Tayari
  8 siblings, 1 reply; 13+ messages in thread
From: Florian Westphal @ 2017-06-28 13:26 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

retain last used xfrm_dst in a pcpu cache.
On next request, reuse this dst if the policies are the same.

The cache does'nt help at all with strictly-RR workloads as
we never have a hit.

Also, the cache adds cost of this_cpu_xchg() in packet path.
It would be better to use plain this_cpu_read/write, however,
a netdev notifier can run in parallel on other cpu and write same
pcpu value so the xchg is needed to avoid race.

The notifier is needed so we do not add long hangs when a device
is dismantled but some pcpu xdst still holds a reference.

Test results using 4 network namespaces and null encryption:

ns1           ns2          -> ns3           -> ns4
netperf -> xfrm/null enc   -> xfrm/null dec -> netserver

what                    TCP_STREAM      UDP_STREAM      UDP_RR
Flow cache:		14804.4		279.738		326213.0
No flow cache:		14158.3		257.458		228486.8
Pcpu cache:		14766.4		286.958		239433.5

UDP tests used 64byte packets, tests ran for one minute each,
value is average over ten iterations.

'Flow cache' is 'net-next', 'No flow cache' is net-next plus this
series but without this one.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/xfrm.h     |  1 +
 net/xfrm/xfrm_device.c |  1 +
 net/xfrm/xfrm_policy.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 46 insertions(+)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9b85367529a4..8bde1d569790 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -316,6 +316,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam
 void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
 void km_policy_notify(struct xfrm_policy *xp, int dir,
 		      const struct km_event *c);
+void xfrm_policy_dev_unreg(void);
 void km_state_notify(struct xfrm_state *x, const struct km_event *c);
 
 struct xfrm_tmpl;
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index d01cb256e89c..8221d05d43d1 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -151,6 +151,7 @@ static int xfrm_dev_register(struct net_device *dev)
 
 static int xfrm_dev_unregister(struct net_device *dev)
 {
+	xfrm_policy_dev_unreg();
 	return NOTIFY_DONE;
 }
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f4419d1b9f38..ac83b39850ce 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -44,6 +44,7 @@ struct xfrm_flo {
 	u8 flags;
 };
 
+static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
 						__read_mostly;
@@ -1700,6 +1701,34 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
 
 }
 
+void xfrm_policy_dev_unreg(void)
+{
+	int cpu;
+
+	local_bh_disable();
+	rcu_read_lock();
+	for_each_possible_cpu(cpu) {
+		struct xfrm_dst *tmp, *old;
+
+		old = per_cpu(xfrm_last_dst, cpu);
+		if (!old || xfrm_bundle_ok(old))
+			continue;
+
+		tmp = cmpxchg(&(per_cpu(xfrm_last_dst, cpu)), old, NULL);
+		if (tmp == old)
+			dst_release(&old->u.dst);
+	}
+	rcu_read_unlock();
+	local_bh_enable();
+}
+
+static void xfrm_last_dst_update(struct xfrm_dst *xdst)
+{
+	struct xfrm_dst *old = this_cpu_xchg(xfrm_last_dst, xdst);
+	if (old)
+		dst_release(&old->u.dst);
+}
+
 static struct xfrm_dst *
 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 			       const struct flowi *fl, u16 family,
@@ -1711,17 +1740,29 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 	struct xfrm_dst *xdst;
 	int err;
 
+	xdst = this_cpu_read(xfrm_last_dst);
+	if (xdst &&
+	    xdst->u.dst.dev == dst_orig->dev &&
+	    xdst->num_pols == num_pols &&
+	    memcmp(xdst->pols, pols,
+		   sizeof(struct xfrm_policy *) * num_pols) == 0 &&
+	    xfrm_bundle_ok(xdst) &&
+	    dst_hold_safe(&xdst->u.dst))
+		return xdst;
+
 	/* Try to instantiate a bundle */
 	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
 	if (err <= 0) {
 		if (err != 0 && err != -EAGAIN)
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+		xfrm_last_dst_update(NULL);
 		return ERR_PTR(err);
 	}
 
 	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
 	if (IS_ERR(dst)) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
+		xfrm_last_dst_update(NULL);
 		return ERR_CAST(dst);
 	}
 
@@ -1731,6 +1772,9 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 	xdst->policy_genid = atomic_read(&pols[0]->genid);
 
+	atomic_set(&xdst->u.dst.__refcnt, 2);
+	xfrm_last_dst_update(xdst);
+
 	return xdst;
 }
 
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* RE: [RFC net-next 9/9] xfrm: add a small xdst pcpu cache
  2017-06-28 13:26 ` [RFC net-next 9/9] xfrm: add a small xdst pcpu cache Florian Westphal
@ 2017-06-29 13:06   ` Ilan Tayari
  2017-06-29 13:17     ` Florian Westphal
  2017-07-05  9:01     ` Ilan Tayari
  0 siblings, 2 replies; 13+ messages in thread
From: Ilan Tayari @ 2017-06-29 13:06 UTC (permalink / raw)
  To: Florian Westphal, netdev; +Cc: Yossi Kuperman, Steffen Klassert

> -----Original Message-----
> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
> Subject: [RFC net-next 9/9] xfrm: add a small xdst pcpu cache
> 
> retain last used xfrm_dst in a pcpu cache.
> On next request, reuse this dst if the policies are the same.
> 
> The cache does'nt help at all with strictly-RR workloads as
> we never have a hit.
> 
> Also, the cache adds cost of this_cpu_xchg() in packet path.
> It would be better to use plain this_cpu_read/write, however,
> a netdev notifier can run in parallel on other cpu and write same
> pcpu value so the xchg is needed to avoid race.
> 
> The notifier is needed so we do not add long hangs when a device
> is dismantled but some pcpu xdst still holds a reference.
> 
> Test results using 4 network namespaces and null encryption:
> 
> ns1           ns2          -> ns3           -> ns4
> netperf -> xfrm/null enc   -> xfrm/null dec -> netserver
> 
> what                    TCP_STREAM      UDP_STREAM      UDP_RR
> Flow cache:		14804.4		279.738		326213.0
> No flow cache:		14158.3		257.458		228486.8
> Pcpu cache:		14766.4		286.958		239433.5
> 
> UDP tests used 64byte packets, tests ran for one minute each,
> value is average over ten iterations.

Hi Florian,

I want to give this a go with hw-offload and see the impact on performance.
It may take us a few days to do that.

See one comment below.

> 
> 'Flow cache' is 'net-next', 'No flow cache' is net-next plus this
> series but without this one.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  include/net/xfrm.h     |  1 +
>  net/xfrm/xfrm_device.c |  1 +
>  net/xfrm/xfrm_policy.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 46 insertions(+)
> 
> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index 9b85367529a4..8bde1d569790 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -316,6 +316,7 @@ int xfrm_policy_register_afinfo(const struct
> xfrm_policy_afinfo *afinfo, int fam
>  void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo
> *afinfo);
>  void km_policy_notify(struct xfrm_policy *xp, int dir,
>  		      const struct km_event *c);
> +void xfrm_policy_dev_unreg(void);
>  void km_state_notify(struct xfrm_state *x, const struct km_event *c);
> 
>  struct xfrm_tmpl;
> diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
> index d01cb256e89c..8221d05d43d1 100644
> --- a/net/xfrm/xfrm_device.c
> +++ b/net/xfrm/xfrm_device.c
> @@ -151,6 +151,7 @@ static int xfrm_dev_register(struct net_device *dev)
> 
>  static int xfrm_dev_unregister(struct net_device *dev)
>  {
> +	xfrm_policy_dev_unreg();
>  	return NOTIFY_DONE;
>  }
> 
> diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
> index f4419d1b9f38..ac83b39850ce 100644
> --- a/net/xfrm/xfrm_policy.c
> +++ b/net/xfrm/xfrm_policy.c
> @@ -44,6 +44,7 @@ struct xfrm_flo {
>  	u8 flags;
>  };
> 
> +static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
>  static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
>  static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6
> + 1]
>  						__read_mostly;
> @@ -1700,6 +1701,34 @@ static int xfrm_expand_policies(const struct flowi
> *fl, u16 family,
> 
>  }
> 
> +void xfrm_policy_dev_unreg(void)

Maybe name it xfrm_policy_cache_flush() or something similar, and call it from some places where xfrm_garbage_collect() used to be called?

Such as from xfrm_policy_flush()
And maybe even from xfrm_flush_sa() as well

This would allow to unload esp4 and/or esp4_offload (or other algo module) after 'ip x s f' (or the swan equivalent)

> +{
> +	int cpu;
> +
> +	local_bh_disable();
> +	rcu_read_lock();
> +	for_each_possible_cpu(cpu) {
> +		struct xfrm_dst *tmp, *old;
> +
> +		old = per_cpu(xfrm_last_dst, cpu);
> +		if (!old || xfrm_bundle_ok(old))
> +			continue;
> +
> +		tmp = cmpxchg(&(per_cpu(xfrm_last_dst, cpu)), old, NULL);
> +		if (tmp == old)
> +			dst_release(&old->u.dst);
> +	}
> +	rcu_read_unlock();
> +	local_bh_enable();
> +}
> +
> +static void xfrm_last_dst_update(struct xfrm_dst *xdst)
> +{
> +	struct xfrm_dst *old = this_cpu_xchg(xfrm_last_dst, xdst);
> +	if (old)
> +		dst_release(&old->u.dst);
> +}
> +
>  static struct xfrm_dst *
>  xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
>  			       const struct flowi *fl, u16 family,
> @@ -1711,17 +1740,29 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy
> **pols, int num_pols,
>  	struct xfrm_dst *xdst;
>  	int err;
> 
> +	xdst = this_cpu_read(xfrm_last_dst);
> +	if (xdst &&
> +	    xdst->u.dst.dev == dst_orig->dev &&
> +	    xdst->num_pols == num_pols &&
> +	    memcmp(xdst->pols, pols,
> +		   sizeof(struct xfrm_policy *) * num_pols) == 0 &&
> +	    xfrm_bundle_ok(xdst) &&
> +	    dst_hold_safe(&xdst->u.dst))
> +		return xdst;
> +
>  	/* Try to instantiate a bundle */
>  	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
>  	if (err <= 0) {
>  		if (err != 0 && err != -EAGAIN)
>  			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
> +		xfrm_last_dst_update(NULL);
>  		return ERR_PTR(err);
>  	}
> 
>  	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
>  	if (IS_ERR(dst)) {
>  		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
> +		xfrm_last_dst_update(NULL);
>  		return ERR_CAST(dst);
>  	}
> 
> @@ -1731,6 +1772,9 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy
> **pols, int num_pols,
>  	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
>  	xdst->policy_genid = atomic_read(&pols[0]->genid);
> 
> +	atomic_set(&xdst->u.dst.__refcnt, 2);
> +	xfrm_last_dst_update(xdst);
> +
>  	return xdst;
>  }
> 
> --
> 2.13.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC net-next 9/9] xfrm: add a small xdst pcpu cache
  2017-06-29 13:06   ` Ilan Tayari
@ 2017-06-29 13:17     ` Florian Westphal
  2017-07-05  9:01     ` Ilan Tayari
  1 sibling, 0 replies; 13+ messages in thread
From: Florian Westphal @ 2017-06-29 13:17 UTC (permalink / raw)
  To: Ilan Tayari; +Cc: Florian Westphal, netdev, Yossi Kuperman, Steffen Klassert

Ilan Tayari <ilant@mellanox.com> wrote:
> > -----Original Message-----
> > From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
> > Subject: [RFC net-next 9/9] xfrm: add a small xdst pcpu cache
> > 
> > retain last used xfrm_dst in a pcpu cache.
> > On next request, reuse this dst if the policies are the same.
> > 
> > UDP tests used 64byte packets, tests ran for one minute each,
> > value is average over ten iterations.
> 
> Hi Florian,
> 
> I want to give this a go with hw-offload and see the impact on performance.
> It may take us a few days to do that.

Sure, take your time, thanks for testing!

> > +void xfrm_policy_dev_unreg(void)
> 
> Maybe name it xfrm_policy_cache_flush() or something similar, and call it from some places where xfrm_garbage_collect() used to be called?
> 
> Such as from xfrm_policy_flush()
> And maybe even from xfrm_flush_sa() as well
> 
> This would allow to unload esp4 and/or esp4_offload (or other algo module) after 'ip x s f' (or the swan equivalent)

Good point.  I did not consider module unload just device removal.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [RFC net-next 9/9] xfrm: add a small xdst pcpu cache
  2017-06-29 13:06   ` Ilan Tayari
  2017-06-29 13:17     ` Florian Westphal
@ 2017-07-05  9:01     ` Ilan Tayari
  1 sibling, 0 replies; 13+ messages in thread
From: Ilan Tayari @ 2017-07-05  9:01 UTC (permalink / raw)
  To: Ilan Tayari, Florian Westphal; +Cc: Yossi Kuperman, Steffen Klassert, netdev

> -----Original Message-----
> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
> Subject: RE: [RFC net-next 9/9] xfrm: add a small xdst pcpu cache
> 
> > -----Original Message-----
> > From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
> > Subject: [RFC net-next 9/9] xfrm: add a small xdst pcpu cache
> >
> > retain last used xfrm_dst in a pcpu cache.
> > On next request, reuse this dst if the policies are the same.
> >
> > The cache does'nt help at all with strictly-RR workloads as
> > we never have a hit.
> >
> > Also, the cache adds cost of this_cpu_xchg() in packet path.
> > It would be better to use plain this_cpu_read/write, however,
> > a netdev notifier can run in parallel on other cpu and write same
> > pcpu value so the xchg is needed to avoid race.
> >
> > The notifier is needed so we do not add long hangs when a device
> > is dismantled but some pcpu xdst still holds a reference.
> >
> > Test results using 4 network namespaces and null encryption:
> >
> > ns1           ns2          -> ns3           -> ns4
> > netperf -> xfrm/null enc   -> xfrm/null dec -> netserver
> >
> > what                    TCP_STREAM      UDP_STREAM      UDP_RR
> > Flow cache:		14804.4		279.738		326213.0
> > No flow cache:		14158.3		257.458		228486.8
> > Pcpu cache:		14766.4		286.958		239433.5
> >
> > UDP tests used 64byte packets, tests ran for one minute each,
> > value is average over ten iterations.
> 
> Hi Florian,
> 
> I want to give this a go with hw-offload and see the impact on
> performance.
> It may take us a few days to do that.

Hi Florian,

We tested with and without your patchset, using single SA with hw-crypto
offload (RFC4106) IPv4 ESP tunnel mode, and a single netperf TCP_STREAM
with a few different messages Sizes.

We didn't separate the pcpu cache patch from the rest of the patchset.

Here are the findings:

What         64-byte    512-byte  1024-byte  1500-byte
Flow cache   1602.89    11004.97   14634.46   14577.60
Pcpu cache   1513.38    10862.55   14246.94   14231.07

The overall degradation seems a bit more than what you measured with
null-crypto.
We used two machines and no namespaces.

Ilan.


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2017-07-05  9:01 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-28 13:26 [RFC ipsec-next] flow cache removal Florian Westphal
2017-06-28 13:26 ` [RFC net-next 1/9] vti: revert flush x-netns xfrm cache when vti interface is removed Florian Westphal
2017-06-28 13:26 ` [RFC net-next 2/9] net: xfrm: revert to lower xfrm dst gc limit Florian Westphal
2017-06-28 13:26 ` [RFC net-next 3/9] xfrm_policy: bypass flow_cache_lookup Florian Westphal
2017-06-28 13:26 ` [RFC net-next 4/9] xfrm_policy: remove always true/false branches Florian Westphal
2017-06-28 13:26 ` [RFC net-next 5/9] xfrm_policy: kill flow to policy dir conversion Florian Westphal
2017-06-28 13:26 ` [RFC net-next 6/9] xfrm_policy: remove xfrm_policy_lookup Florian Westphal
2017-06-28 13:26 ` [RFC net-next 7/9] xfrm_policy: make xfrm_bundle_lookup return xfrm dst object Florian Westphal
2017-06-28 13:26 ` [RFC net-next 8/9] xfrm: remove flow cache Florian Westphal
2017-06-28 13:26 ` [RFC net-next 9/9] xfrm: add a small xdst pcpu cache Florian Westphal
2017-06-29 13:06   ` Ilan Tayari
2017-06-29 13:17     ` Florian Westphal
2017-07-05  9:01     ` Ilan Tayari

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.