linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	Kristian Evensen <kristian.evensen@gmail.com>,
	Florian Westphal <fw@strlen.de>,
	Steffen Klassert <steffen.klassert@secunet.com>
Subject: [PATCH 4.14 71/71] xfrm: policy: remove pcpu policy cache
Date: Thu, 22 Aug 2019 10:19:46 -0700	[thread overview]
Message-ID: <20190822171730.781081830@linuxfoundation.org> (raw)
In-Reply-To: <20190822171726.131957995@linuxfoundation.org>

From: Florian Westphal <fw@strlen.de>

commit e4db5b61c572475bbbcf63e3c8a2606bfccf2c9d upstream.

Kristian Evensen says:
  In a project I am involved in, we are running ipsec (Strongswan) on
  different mt7621-based routers. Each router is configured as an
  initiator and has around ~30 tunnels to different responders (running
  on misc. devices). Before the flow cache was removed (kernel 4.9), we
  got a combined throughput of around 70Mbit/s for all tunnels on one
  router. However, we recently switched to kernel 4.14 (4.14.48), and
  the total throughput is somewhere around 57Mbit/s (best-case). I.e., a
  drop of around 20%. Reverting the flow cache removal restores, as
  expected, performance levels to that of kernel 4.9.

When pcpu xdst exists, it has to be validated first before it can be
used.

A negative hit thus increases cost vs. no-cache.

As number of tunnels increases, hit rate decreases so this pcpu caching
isn't a viable strategy.

Furthermore, the xdst cache also needs to run with BH off, so when
removing this the bh disable/enable pairs can be removed too.

Kristian tested a 4.14.y backport of this change and reported
increased performance:

  In our tests, the throughput reduction has been reduced from around -20%
  to -5%. We also see that the overall throughput is independent of the
  number of tunnels, while before the throughput was reduced as the number
  of tunnels increased.

Reported-by: Kristian Evensen <kristian.evensen@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>


---
 include/net/xfrm.h     |    1 
 net/xfrm/xfrm_device.c |   10 ---
 net/xfrm/xfrm_policy.c |  138 -------------------------------------------------
 net/xfrm/xfrm_state.c  |    5 -
 4 files changed, 3 insertions(+), 151 deletions(-)

--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -323,7 +323,6 @@ int xfrm_policy_register_afinfo(const st
 void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
 void km_policy_notify(struct xfrm_policy *xp, int dir,
 		      const struct km_event *c);
-void xfrm_policy_cache_flush(void);
 void km_state_notify(struct xfrm_state *x, const struct km_event *c);
 
 struct xfrm_tmpl;
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -153,12 +153,6 @@ static int xfrm_dev_register(struct net_
 	return NOTIFY_DONE;
 }
 
-static int xfrm_dev_unregister(struct net_device *dev)
-{
-	xfrm_policy_cache_flush();
-	return NOTIFY_DONE;
-}
-
 static int xfrm_dev_feat_change(struct net_device *dev)
 {
 	if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops)
@@ -178,7 +172,6 @@ static int xfrm_dev_down(struct net_devi
 	if (dev->features & NETIF_F_HW_ESP)
 		xfrm_dev_state_flush(dev_net(dev), dev, true);
 
-	xfrm_policy_cache_flush();
 	return NOTIFY_DONE;
 }
 
@@ -190,9 +183,6 @@ static int xfrm_dev_event(struct notifie
 	case NETDEV_REGISTER:
 		return xfrm_dev_register(dev);
 
-	case NETDEV_UNREGISTER:
-		return xfrm_dev_unregister(dev);
-
 	case NETDEV_FEAT_CHANGE:
 		return xfrm_dev_feat_change(dev);
 
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -45,8 +45,6 @@ struct xfrm_flo {
 	u8 flags;
 };
 
-static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
-static struct work_struct *xfrm_pcpu_work __read_mostly;
 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
 						__read_mostly;
@@ -1715,108 +1713,6 @@ static int xfrm_expand_policies(const st
 
 }
 
-static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
-{
-	this_cpu_write(xfrm_last_dst, xdst);
-	if (old)
-		dst_release(&old->u.dst);
-}
-
-static void __xfrm_pcpu_work_fn(void)
-{
-	struct xfrm_dst *old;
-
-	old = this_cpu_read(xfrm_last_dst);
-	if (old && !xfrm_bundle_ok(old))
-		xfrm_last_dst_update(NULL, old);
-}
-
-static void xfrm_pcpu_work_fn(struct work_struct *work)
-{
-	local_bh_disable();
-	rcu_read_lock();
-	__xfrm_pcpu_work_fn();
-	rcu_read_unlock();
-	local_bh_enable();
-}
-
-void xfrm_policy_cache_flush(void)
-{
-	struct xfrm_dst *old;
-	bool found = 0;
-	int cpu;
-
-	might_sleep();
-
-	local_bh_disable();
-	rcu_read_lock();
-	for_each_possible_cpu(cpu) {
-		old = per_cpu(xfrm_last_dst, cpu);
-		if (old && !xfrm_bundle_ok(old)) {
-			if (smp_processor_id() == cpu) {
-				__xfrm_pcpu_work_fn();
-				continue;
-			}
-			found = true;
-			break;
-		}
-	}
-
-	rcu_read_unlock();
-	local_bh_enable();
-
-	if (!found)
-		return;
-
-	get_online_cpus();
-
-	for_each_possible_cpu(cpu) {
-		bool bundle_release;
-
-		rcu_read_lock();
-		old = per_cpu(xfrm_last_dst, cpu);
-		bundle_release = old && !xfrm_bundle_ok(old);
-		rcu_read_unlock();
-
-		if (!bundle_release)
-			continue;
-
-		if (cpu_online(cpu)) {
-			schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
-			continue;
-		}
-
-		rcu_read_lock();
-		old = per_cpu(xfrm_last_dst, cpu);
-		if (old && !xfrm_bundle_ok(old)) {
-			per_cpu(xfrm_last_dst, cpu) = NULL;
-			dst_release(&old->u.dst);
-		}
-		rcu_read_unlock();
-	}
-
-	put_online_cpus();
-}
-
-static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
-				struct xfrm_state * const xfrm[],
-				int num)
-{
-	const struct dst_entry *dst = &xdst->u.dst;
-	int i;
-
-	if (xdst->num_xfrms != num)
-		return false;
-
-	for (i = 0; i < num; i++) {
-		if (!dst || dst->xfrm != xfrm[i])
-			return false;
-		dst = dst->child;
-	}
-
-	return xfrm_bundle_ok(xdst);
-}
-
 static struct xfrm_dst *
 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 			       const struct flowi *fl, u16 family,
@@ -1824,7 +1720,7 @@ xfrm_resolve_and_create_bundle(struct xf
 {
 	struct net *net = xp_net(pols[0]);
 	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
-	struct xfrm_dst *xdst, *old;
+	struct xfrm_dst *xdst;
 	struct dst_entry *dst;
 	int err;
 
@@ -1839,21 +1735,6 @@ xfrm_resolve_and_create_bundle(struct xf
 		return ERR_PTR(err);
 	}
 
-	xdst = this_cpu_read(xfrm_last_dst);
-	if (xdst &&
-	    xdst->u.dst.dev == dst_orig->dev &&
-	    xdst->num_pols == num_pols &&
-	    memcmp(xdst->pols, pols,
-		   sizeof(struct xfrm_policy *) * num_pols) == 0 &&
-	    xfrm_xdst_can_reuse(xdst, xfrm, err)) {
-		dst_hold(&xdst->u.dst);
-		while (err > 0)
-			xfrm_state_put(xfrm[--err]);
-		return xdst;
-	}
-
-	old = xdst;
-
 	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
 	if (IS_ERR(dst)) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
@@ -1866,9 +1747,6 @@ xfrm_resolve_and_create_bundle(struct xf
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 	xdst->policy_genid = atomic_read(&pols[0]->genid);
 
-	atomic_set(&xdst->u.dst.__refcnt, 2);
-	xfrm_last_dst_update(xdst, old);
-
 	return xdst;
 }
 
@@ -2069,11 +1947,8 @@ xfrm_bundle_lookup(struct net *net, cons
 	if (num_xfrms <= 0)
 		goto make_dummy_bundle;
 
-	local_bh_disable();
 	xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
 					      xflo->dst_orig);
-	local_bh_enable();
-
 	if (IS_ERR(xdst)) {
 		err = PTR_ERR(xdst);
 		if (err != -EAGAIN)
@@ -2160,11 +2035,9 @@ struct dst_entry *xfrm_lookup(struct net
 				goto no_transform;
 			}
 
-			local_bh_disable();
 			xdst = xfrm_resolve_and_create_bundle(
 					pols, num_pols, fl,
 					family, dst_orig);
-			local_bh_enable();
 
 			if (IS_ERR(xdst)) {
 				xfrm_pols_put(pols, num_pols);
@@ -2992,15 +2865,6 @@ static struct pernet_operations __net_in
 
 void __init xfrm_init(void)
 {
-	int i;
-
-	xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
-				       GFP_KERNEL);
-	BUG_ON(!xfrm_pcpu_work);
-
-	for (i = 0; i < NR_CPUS; i++)
-		INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
-
 	register_pernet_subsys(&xfrm_net_ops);
 	seqcount_init(&xfrm_policy_hash_generation);
 	xfrm_input_init();
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -735,10 +735,9 @@ restart:
 	}
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
-	if (cnt) {
+	if (cnt)
 		err = 0;
-		xfrm_policy_cache_flush();
-	}
+
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_flush);



  parent reply	other threads:[~2019-08-22 17:34 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-22 17:18 [PATCH 4.14 00/71] 4.14.140-stable review Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 01/71] scsi: mpt3sas: Use 63-bit DMA addressing on SAS35 HBA Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 02/71] sh: kernel: hw_breakpoint: Fix missing break in switch statement Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 03/71] mm/usercopy: use memory range to be accessed for wraparound check Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 04/71] mm/memcontrol.c: fix use after free in mem_cgroup_iter() Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 05/71] bpf: get rid of pure_initcall dependency to enable jits Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 06/71] bpf: restrict access to core bpf sysctls Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 07/71] bpf: add bpf_jit_limit knob to restrict unpriv allocations Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 08/71] x86/mm: Use WRITE_ONCE() when setting PTEs Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 09/71] xtensa: add missing isync to the cpu_reset TLB code Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 10/71] ALSA: hda - Apply workaround for another AMD chip 1022:1487 Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 11/71] ALSA: hda - Fix a memory leak bug Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 12/71] ALSA: hda - Add a generic reboot_notify Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 13/71] ALSA: hda - Let all conexant codec enter D3 when rebooting Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 14/71] HID: holtek: test for sanity of intfdata Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 15/71] HID: hiddev: avoid opening a disconnected device Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 16/71] HID: hiddev: do cleanup in failure of opening a device Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 17/71] Input: kbtab - sanity check for endpoint type Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 18/71] Input: iforce - add sanity checks Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 19/71] net: usb: pegasus: fix improper read if get_registers() fail Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 20/71] netfilter: ebtables: also count base chain policies Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 21/71] clk: at91: generated: Truncate divisor to GENERATED_MAX_DIV + 1 Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 22/71] clk: renesas: cpg-mssr: Fix reset control race condition Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 23/71] xen/pciback: remove set but not used variable old_state Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 24/71] irqchip/gic-v3-its: Free unused vpt_page when alloc vpe table fail Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 25/71] irqchip/irq-imx-gpcv2: Forward irq type to parent Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 26/71] perf header: Fix divide by zero error if f_header.attr_size==0 Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 27/71] perf header: Fix use of unitialized value warning Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 28/71] libata: zpodd: Fix small read overflow in zpodd_get_mech_type() Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 29/71] drm/bridge: lvds-encoder: Fix build error while CONFIG_DRM_KMS_HELPER=m Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 30/71] scsi: hpsa: correct scsi command status issue after reset Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 31/71] scsi: qla2xxx: Fix possible fcport null-pointer dereferences Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 32/71] ata: libahci: do not complain in case of deferred probe Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 33/71] kbuild: modpost: handle KBUILD_EXTRA_SYMBOLS only for external modules Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 34/71] arm64/efi: fix variable si set but not used Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 35/71] arm64: unwind: Prohibit probing on return_address() Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 36/71] arm64/mm: fix variable pud set but not used Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 37/71] IB/core: Add mitigation for Spectre V1 Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 38/71] IB/mad: Fix use-after-free in ib mad completion handling Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 39/71] drm: msm: Fix add_gpu_components Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 40/71] ocfs2: remove set but not used variable last_hash Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 41/71] asm-generic: fix -Wtype-limits compiler warnings Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 42/71] KVM: arm/arm64: Sync ICH_VMCR_EL2 back when about to block Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 43/71] staging: comedi: dt3000: Fix signed integer overflow divider * base Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 44/71] staging: comedi: dt3000: Fix rounding up of timer divisor Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 45/71] iio: adc: max9611: Fix temperature reading in probe Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 46/71] USB: core: Fix races in character device registration and deregistraion Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 47/71] usb: gadget: udc: renesas_usb3: Fix sysfs interface of "role" Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 48/71] usb: cdc-acm: make sure a refcount is taken early enough Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 49/71] USB: CDC: fix sanity checks in CDC union parser Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 50/71] USB: serial: option: add D-Link DWM-222 device ID Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 51/71] USB: serial: option: Add support for ZTE MF871A Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 52/71] USB: serial: option: add the BroadMobi BM818 card Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 53/71] USB: serial: option: Add Motorola modem UARTs Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 54/71] bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 55/71] Revert "tcp: Clear sk_send_head after purging the write queue" Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 56/71] arm64: compat: Allow single-byte watchpoints on all addresses Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 57/71] arm64: ftrace: Ensure module ftrace trampoline is coherent with I-side Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 58/71] netfilter: conntrack: Use consistent ct id hash calculation Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 59/71] Input: psmouse - fix build error of multiple definition Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 60/71] iommu/amd: Move iommu_init_pci() to .init section Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 61/71] bnx2x: Fix VFs VLAN reconfiguration in reload Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 62/71] net/mlx4_en: fix a memory leak bug Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 63/71] net/packet: fix race in tpacket_snd() Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 64/71] sctp: fix the transport error_count check Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 65/71] xen/netback: Reset nr_frags before freeing skb Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 66/71] net/mlx5e: Only support tx/rx pause setting for port owner Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 67/71] net/mlx5e: Use flow keys dissector to parse packets for ARFS Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 68/71] team: Add vlan tx offload to hw_enc_features Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 69/71] bonding: " Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 70/71] mmc: sdhci-of-arasan: Do now show error message in case of deffered probe Greg Kroah-Hartman
2019-08-22 17:19 ` Greg Kroah-Hartman [this message]
2019-08-22 21:17 ` [PATCH 4.14 00/71] 4.14.140-stable review kernelci.org bot
2019-08-22 23:23   ` Kevin Hilman
2019-08-22 23:39     ` Greg Kroah-Hartman
2019-08-23  2:07 ` Jon Hunter
2019-08-23  8:04 ` Naresh Kamboju
2019-08-23 14:28 ` Guenter Roeck
2019-08-24 17:55 ` shuah

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190822171730.781081830@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=fw@strlen.de \
    --cc=kristian.evensen@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=steffen.klassert@secunet.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).