linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Vladimir Davydov <vdavydov.dev@gmail.com>,
	Doug Smythies <dsmythies@telus.net>,
	Joonsoo Kim <iamjoonsoo.kim@lge.com>,
	Christoph Lameter <cl@linux.com>,
	David Rientjes <rientjes@google.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@kernel.org>,
	Pekka Enberg <penberg@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Sasha Levin <alexander.levin@verizon.com>
Subject: [PATCH 4.9 76/93] slub: move synchronize_sched out of slab_mutex on shrink
Date: Mon, 20 Mar 2017 18:51:51 +0100	[thread overview]
Message-ID: <20170320174740.219832036@linuxfoundation.org> (raw)
In-Reply-To: <20170320174735.243147498@linuxfoundation.org>

4.9-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Vladimir Davydov <vdavydov.dev@gmail.com>

[ Upstream commit 89e364db71fb5e7fc8d93228152abfa67daf35fa ]

synchronize_sched() is a heavy operation and calling it per each cache
owned by a memory cgroup being destroyed may take quite some time.  What
is worse, it's currently called under the slab_mutex, stalling all works
doing cache creation/destruction.

Actually, there isn't much point in calling synchronize_sched() for each
cache - it's enough to call it just once - after setting cpu_partial for
all caches and before shrinking them.  This way, we can also move it out
of the slab_mutex, which we have to hold for iterating over the slab
cache list.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=172991
Link: http://lkml.kernel.org/r/0a10d71ecae3db00fb4421bcd3f82bcc911f4be4.1475329751.git.vdavydov.dev@gmail.com
Signed-off-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Reported-by: Doug Smythies <dsmythies@telus.net>
Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/slab.c        |    4 ++--
 mm/slab.h        |    2 +-
 mm/slab_common.c |   27 +++++++++++++++++++++++++--
 mm/slob.c        |    2 +-
 mm/slub.c        |   19 ++-----------------
 5 files changed, 31 insertions(+), 23 deletions(-)

--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2332,7 +2332,7 @@ out:
 	return nr_freed;
 }
 
-int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate)
+int __kmem_cache_shrink(struct kmem_cache *cachep)
 {
 	int ret = 0;
 	int node;
@@ -2352,7 +2352,7 @@ int __kmem_cache_shrink(struct kmem_cach
 
 int __kmem_cache_shutdown(struct kmem_cache *cachep)
 {
-	return __kmem_cache_shrink(cachep, false);
+	return __kmem_cache_shrink(cachep);
 }
 
 void __kmem_cache_release(struct kmem_cache *cachep)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -146,7 +146,7 @@ static inline unsigned long kmem_cache_f
 
 int __kmem_cache_shutdown(struct kmem_cache *);
 void __kmem_cache_release(struct kmem_cache *);
-int __kmem_cache_shrink(struct kmem_cache *, bool);
+int __kmem_cache_shrink(struct kmem_cache *);
 void slab_kmem_cache_release(struct kmem_cache *);
 
 struct seq_file;
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -573,6 +573,29 @@ void memcg_deactivate_kmem_caches(struct
 	get_online_cpus();
 	get_online_mems();
 
+#ifdef CONFIG_SLUB
+	/*
+	 * In case of SLUB, we need to disable empty slab caching to
+	 * avoid pinning the offline memory cgroup by freeable kmem
+	 * pages charged to it. SLAB doesn't need this, as it
+	 * periodically purges unused slabs.
+	 */
+	mutex_lock(&slab_mutex);
+	list_for_each_entry(s, &slab_caches, list) {
+		c = is_root_cache(s) ? cache_from_memcg_idx(s, idx) : NULL;
+		if (c) {
+			c->cpu_partial = 0;
+			c->min_partial = 0;
+		}
+	}
+	mutex_unlock(&slab_mutex);
+	/*
+	 * kmem_cache->cpu_partial is checked locklessly (see
+	 * put_cpu_partial()). Make sure the change is visible.
+	 */
+	synchronize_sched();
+#endif
+
 	mutex_lock(&slab_mutex);
 	list_for_each_entry(s, &slab_caches, list) {
 		if (!is_root_cache(s))
@@ -584,7 +607,7 @@ void memcg_deactivate_kmem_caches(struct
 		if (!c)
 			continue;
 
-		__kmem_cache_shrink(c, true);
+		__kmem_cache_shrink(c);
 		arr->entries[idx] = NULL;
 	}
 	mutex_unlock(&slab_mutex);
@@ -755,7 +778,7 @@ int kmem_cache_shrink(struct kmem_cache
 	get_online_cpus();
 	get_online_mems();
 	kasan_cache_shrink(cachep);
-	ret = __kmem_cache_shrink(cachep, false);
+	ret = __kmem_cache_shrink(cachep);
 	put_online_mems();
 	put_online_cpus();
 	return ret;
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -634,7 +634,7 @@ void __kmem_cache_release(struct kmem_ca
 {
 }
 
-int __kmem_cache_shrink(struct kmem_cache *d, bool deactivate)
+int __kmem_cache_shrink(struct kmem_cache *d)
 {
 	return 0;
 }
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3887,7 +3887,7 @@ EXPORT_SYMBOL(kfree);
  * being allocated from last increasing the chance that the last objects
  * are freed in them.
  */
-int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate)
+int __kmem_cache_shrink(struct kmem_cache *s)
 {
 	int node;
 	int i;
@@ -3899,21 +3899,6 @@ int __kmem_cache_shrink(struct kmem_cach
 	unsigned long flags;
 	int ret = 0;
 
-	if (deactivate) {
-		/*
-		 * Disable empty slabs caching. Used to avoid pinning offline
-		 * memory cgroups by kmem pages that can be freed.
-		 */
-		s->cpu_partial = 0;
-		s->min_partial = 0;
-
-		/*
-		 * s->cpu_partial is checked locklessly (see put_cpu_partial),
-		 * so we have to make sure the change is visible.
-		 */
-		synchronize_sched();
-	}
-
 	flush_all(s);
 	for_each_kmem_cache_node(s, node, n) {
 		INIT_LIST_HEAD(&discard);
@@ -3970,7 +3955,7 @@ static int slab_mem_going_offline_callba
 
 	mutex_lock(&slab_mutex);
 	list_for_each_entry(s, &slab_caches, list)
-		__kmem_cache_shrink(s, false);
+		__kmem_cache_shrink(s);
 	mutex_unlock(&slab_mutex);
 
 	return 0;

  parent reply	other threads:[~2017-03-20 18:26 UTC|newest]

Thread overview: 91+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-20 17:50 [PATCH 4.9 00/93] 4.9.17-stable review Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 01/93] net/mlx5e: Register/unregister vport representors on interface attach/detach Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 02/93] net/mlx5e: Do not reduce LRO WQE size when not using build_skb Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 03/93] net/mlx5e: Fix wrong CQE decompression Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 04/93] vxlan: correctly validate VXLAN ID against VXLAN_N_VID Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 05/93] vti6: return GRE_KEY for vti6 Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 06/93] vxlan: dont allow overwrite of config src addr Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 07/93] ipv4: mask tos for input route Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 08/93] net sched actions: decrement module reference count after table flush Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 10/93] net: phy: Avoid deadlock during phy_error() Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 11/93] vxlan: lock RCU on TX path Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 12/93] geneve: " Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 13/93] mlxsw: spectrum_router: Avoid potential packets loss Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 14/93] tcp/dccp: block BH for SYN processing Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 15/93] net: bridge: allow IPv6 when multicast flood is disabled Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 16/93] net: dont call strlen() on the user buffer in packet_bind_spkt() Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 17/93] net: net_enable_timestamp() can be called from irq contexts Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 18/93] ipv6: orphan skbs in reassembly unit Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 19/93] dccp: Unlock sock before calling sk_free() Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 20/93] strparser: destroy workqueue on module exit Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 21/93] tcp: fix various issues for sockets morphing to listen state Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 22/93] net: fix socket refcounting in skb_complete_wifi_ack() Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 23/93] net: fix socket refcounting in skb_complete_tx_timestamp() Greg Kroah-Hartman
2017-03-20 17:50 ` [PATCH 4.9 24/93] net/sched: act_skbmod: remove unneeded rcu_read_unlock in tcf_skbmod_dump Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 25/93] dccp: fix use-after-free in dccp_feat_activate_values Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 26/93] vrf: Fix use-after-free in vrf_xmit Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 27/93] net/tunnel: set inner protocol in network gro hooks Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 28/93] uapi: fix linux/packet_diag.h userspace compilation error Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 30/93] mpls: Send route delete notifications when router module is unloaded Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 31/93] mpls: Do not decrement alive counter for unregister events Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 32/93] ipv6: make ECMP route replacement less greedy Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 33/93] ipv6: avoid write to a possibly cloned skb Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 34/93] bridge: drop netfilter fake rtable unconditionally Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 37/93] dccp: fix memory leak during tear-down of unsuccessful connection request Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 38/93] bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 39/93] bpf: fix state equivalence Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 40/93] bpf: fix regression on verifier pruning wrt map lookups Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 41/93] bpf: fix mark_reg_unknown_value for spilled regs on map value marking Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 42/93] dmaengine: iota: ioat_alloc_chan_resources should not perform sleeping allocations Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 43/93] xen: do not re-use pirq number cached in pci device msi msg data Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 44/93] igb: Workaround for igb i210 firmware issue Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 45/93] igb: add i211 to i210 PHY workaround Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 46/93] scsi: ibmvscsis: Issues from Dan Carpenter/Smatch Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 47/93] scsi: ibmvscsis: Return correct partition name/# to client Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 48/93] scsi: ibmvscsis: Clean up properly if target_submit_cmd/tmr fails Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 49/93] scsi: ibmvscsis: Rearrange functions for future patches Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 50/93] scsi: ibmvscsis: Synchronize cmds at tpg_enable_store time Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 51/93] scsi: ibmvscsis: Synchronize cmds at remove time Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 52/93] x86/hyperv: Handle unknown NMIs on one CPU when unknown_nmi_panic Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 53/93] PCI: Separate VF BAR updates from standard BAR updates Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 54/93] PCI: Remove pci_resource_bar() and pci_iov_resource_bar() Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 55/93] PCI: Add comments about ROM BAR updating Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 56/93] PCI: Decouple IORESOURCE_ROM_ENABLE and PCI_ROM_ADDRESS_ENABLE Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 57/93] PCI: Dont update VF BARs while VF memory space is enabled Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 58/93] PCI: Update BARs using property bits appropriate for type Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 59/93] PCI: Ignore BAR updates on virtual functions Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 60/93] PCI: Do any VF BAR updates before enabling the BARs Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 61/93] ibmveth: calculate gso_segs for large packets Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 62/93] Drivers: hv: ring_buffer: count on wrap around mappings in get_next_pkt_raw() (v2) Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 63/93] vfio/spapr: Postpone allocation of userspace version of TCE table Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 64/93] powerpc/iommu: Pass mm_struct to init/cleanup helpers Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 65/93] powerpc/iommu: Stop using @current in mm_iommu_xxx Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 66/93] vfio/spapr: Reference mm in tce_container Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 67/93] powerpc/mm/iommu, vfio/spapr: Put pages on VFIO container shutdown Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 68/93] vfio/spapr: Add a helper to create default DMA window Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 69/93] vfio/spapr: Postpone default window creation Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 70/93] drm/nouveau/disp/gp102: fix cursor/overlay immediate channel indices Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 71/93] drm/nouveau/disp/nv50-: split chid into chid.ctrl and chid.user Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 72/93] drm/nouveau/disp/nv50-: specify ctrl/user separately when constructing classes Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 73/93] block: allow WRITE_SAME commands with the SG_IO ioctl Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 74/93] s390/zcrypt: Introduce CEX6 toleration Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 75/93] [media] uvcvideo: uvc_scan_fallback() for webcams with broken chain Greg Kroah-Hartman
2017-03-20 17:51 ` Greg Kroah-Hartman [this message]
2017-03-20 17:51 ` [PATCH 4.9 77/93] ACPI / blacklist: add _REV quirks for Dell Precision 5520 and 3520 Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 78/93] ACPI / blacklist: Make Dell Latitude 3350 ethernet work Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 79/93] serial: 8250_pci: Detach low-level driver during PCI error recovery Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 80/93] usb: gadget: udc: atmel: remove memory leak Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 82/93] clk: bcm2835: Fix ->fixed_divider of pllh_aux Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 83/93] drm/vc4: Fix race between page flip completion event and clean-up Greg Kroah-Hartman
2017-03-20 17:51 ` [PATCH 4.9 84/93] drm/vc4: Fix ->clock_select setting for the VEC encoder Greg Kroah-Hartman
2017-03-20 17:52 ` [PATCH 4.9 85/93] arm64: KVM: VHE: Clear HCR_TGE when invalidating guest TLBs Greg Kroah-Hartman
2017-03-20 17:52 ` [PATCH 4.9 86/93] irqchip/gicv3-its: Add workaround for QDF2400 ITS erratum 0065 Greg Kroah-Hartman
2017-03-20 17:52 ` [PATCH 4.9 87/93] x86/tsc: Fix ART for TSC_KNOWN_FREQ Greg Kroah-Hartman
2017-03-20 17:52 ` [PATCH 4.9 88/93] x86/kasan: Fix boot with KASAN=y and PROFILE_ANNOTATED_BRANCHES=y Greg Kroah-Hartman
2017-03-20 17:52 ` [PATCH 4.9 89/93] x86/perf: Fix CR4.PCE propagation to use active_mm instead of mm Greg Kroah-Hartman
2017-03-20 17:52 ` [PATCH 4.9 90/93] futex: Fix potential use-after-free in FUTEX_REQUEUE_PI Greg Kroah-Hartman
2017-03-20 17:52 ` [PATCH 4.9 91/93] futex: Add missing error handling to FUTEX_REQUEUE_PI Greg Kroah-Hartman
2017-03-20 17:52 ` [PATCH 4.9 92/93] locking/rwsem: Fix down_write_killable() for CONFIG_RWSEM_GENERIC_SPINLOCK=y Greg Kroah-Hartman
2017-03-20 17:52 ` [PATCH 4.9 93/93] crypto: powerpc - Fix initialisation of crc32c context Greg Kroah-Hartman
2017-03-21  0:12 ` [PATCH 4.9 00/93] 4.9.17-stable review Shuah Khan
2017-03-21  2:13 ` Guenter Roeck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170320174740.219832036@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.levin@verizon.com \
    --cc=cl@linux.com \
    --cc=dsmythies@telus.net \
    --cc=hannes@cmpxchg.org \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mhocko@kernel.org \
    --cc=penberg@kernel.org \
    --cc=rientjes@google.com \
    --cc=stable@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=vdavydov.dev@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).