Linux-mm Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH] mm/slub: Detach node lock from counting free objects
@ 2020-02-01  3:15 Wen Yang
  2020-02-08  3:03 ` Wen Yang
  2020-02-12 22:52 ` Andrew Morton
  0 siblings, 2 replies; 12+ messages in thread
From: Wen Yang @ 2020-02-01  3:15 UTC (permalink / raw)
  To: Christoph Lameter, Pekka Enberg, David Rientjes, Joonsoo Kim,
	Andrew Morton
  Cc: Wen Yang, Xunlei Pang, linux-mm, linux-kernel

The lock, protecting the node partial list, is taken when couting the free
objects resident in that list. It introduces locking contention when the
page(s) is moved between CPU and node partial lists in allocation path
on another CPU. So reading "/proc/slabinfo" can possibily block the slab
allocation on another CPU for a while, 200ms in extreme cases. If the
slab object is to carry network packet, targeting the far-end disk array,
it causes block IO jitter issue.

This fixes the block IO jitter issue by caching the total inuse objects in
the node in advance. The value is retrieved without taking the node partial
list lock on reading "/proc/slabinfo".

Signed-off-by: Wen Yang <wenyang@linux.alibaba.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Xunlei Pang <xlpang@linux.alibaba.com>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
---
 mm/slab.h |  1 +
 mm/slub.c | 42 +++++++++++++++++++++++++-----------------
 2 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/mm/slab.h b/mm/slab.h
index 7e94700aa78c..27d22837f7ff 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -619,6 +619,7 @@ struct kmem_cache_node {
 #ifdef CONFIG_SLUB_DEBUG
 	atomic_long_t nr_slabs;
 	atomic_long_t total_objects;
+	atomic_long_t total_inuse;
 	struct list_head full;
 #endif
 #endif
diff --git a/mm/slub.c b/mm/slub.c
index 503e11b1c4e1..67640e797550 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1060,7 +1060,8 @@ static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
 	return atomic_long_read(&n->nr_slabs);
 }
 
-static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
+static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects,
+				  int inuse)
 {
 	struct kmem_cache_node *n = get_node(s, node);
 
@@ -1073,14 +1074,17 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
 	if (likely(n)) {
 		atomic_long_inc(&n->nr_slabs);
 		atomic_long_add(objects, &n->total_objects);
+		atomic_long_add(inuse, &n->total_inuse);
 	}
 }
-static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
+static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects,
+				  int inuse)
 {
 	struct kmem_cache_node *n = get_node(s, node);
 
 	atomic_long_dec(&n->nr_slabs);
 	atomic_long_sub(objects, &n->total_objects);
+	atomic_long_sub(inuse, &n->total_inuse);
 }
 
 /* Object debug checks for alloc/free paths */
@@ -1395,9 +1399,11 @@ static inline unsigned long slabs_node(struct kmem_cache *s, int node)
 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
 							{ return 0; }
 static inline void inc_slabs_node(struct kmem_cache *s, int node,
-							int objects) {}
+							int objects,
+							int inuse) {}
 static inline void dec_slabs_node(struct kmem_cache *s, int node,
-							int objects) {}
+							int objects,
+							int inuse) {}
 
 #endif /* CONFIG_SLUB_DEBUG */
 
@@ -1708,7 +1714,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	if (!page)
 		return NULL;
 
-	inc_slabs_node(s, page_to_nid(page), page->objects);
+	inc_slabs_node(s, page_to_nid(page), page->objects, page->inuse);
 
 	return page;
 }
@@ -1768,7 +1774,9 @@ static void free_slab(struct kmem_cache *s, struct page *page)
 
 static void discard_slab(struct kmem_cache *s, struct page *page)
 {
-	dec_slabs_node(s, page_to_nid(page), page->objects);
+	int inuse = page->objects;
+
+	dec_slabs_node(s, page_to_nid(page), page->objects, inuse);
 	free_slab(s, page);
 }
 
@@ -2396,9 +2404,9 @@ static inline int node_match(struct page *page, int node)
 }
 
 #ifdef CONFIG_SLUB_DEBUG
-static int count_free(struct page *page)
+static inline unsigned long node_nr_inuse(struct kmem_cache_node *n)
 {
-	return page->objects - page->inuse;
+	return atomic_long_read(&n->total_inuse);
 }
 
 static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
@@ -2448,14 +2456,14 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
 	for_each_kmem_cache_node(s, node, n) {
 		unsigned long nr_slabs;
 		unsigned long nr_objs;
-		unsigned long nr_free;
+		unsigned long nr_inuse;
 
-		nr_free  = count_partial(n, count_free);
 		nr_slabs = node_nr_slabs(n);
 		nr_objs  = node_nr_objs(n);
+		nr_inuse = node_nr_inuse(n);
 
 		pr_warn("  node %d: slabs: %ld, objs: %ld, free: %ld\n",
-			node, nr_slabs, nr_objs, nr_free);
+			node, nr_slabs, nr_objs, nr_objs - nr_inuse);
 	}
 #endif
 }
@@ -3348,6 +3356,7 @@ init_kmem_cache_node(struct kmem_cache_node *n)
 #ifdef CONFIG_SLUB_DEBUG
 	atomic_long_set(&n->nr_slabs, 0);
 	atomic_long_set(&n->total_objects, 0);
+	atomic_long_set(&n->total_inuse, 0);
 	INIT_LIST_HEAD(&n->full);
 #endif
 }
@@ -3411,7 +3420,7 @@ static void early_kmem_cache_node_alloc(int node)
 	page->frozen = 0;
 	kmem_cache_node->node[node] = n;
 	init_kmem_cache_node(n);
-	inc_slabs_node(kmem_cache_node, node, page->objects);
+	inc_slabs_node(kmem_cache_node, node, page->objects, page->inuse);
 
 	/*
 	 * No locks need to be taken here as it has just been
@@ -4857,8 +4866,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
 			if (flags & SO_TOTAL)
 				x = atomic_long_read(&n->total_objects);
 			else if (flags & SO_OBJECTS)
-				x = atomic_long_read(&n->total_objects) -
-					count_partial(n, count_free);
+				x = atomic_long_read(&n->total_inuse);
 			else
 				x = atomic_long_read(&n->nr_slabs);
 			total += x;
@@ -5900,17 +5908,17 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
 {
 	unsigned long nr_slabs = 0;
 	unsigned long nr_objs = 0;
-	unsigned long nr_free = 0;
+	unsigned long nr_inuse = 0;
 	int node;
 	struct kmem_cache_node *n;
 
 	for_each_kmem_cache_node(s, node, n) {
 		nr_slabs += node_nr_slabs(n);
 		nr_objs += node_nr_objs(n);
-		nr_free += count_partial(n, count_free);
+		nr_inuse += node_nr_inuse(n);
 	}
 
-	sinfo->active_objs = nr_objs - nr_free;
+	sinfo->active_objs = nr_inuse;
 	sinfo->num_objs = nr_objs;
 	sinfo->active_slabs = nr_slabs;
 	sinfo->num_slabs = nr_slabs;
-- 
2.23.0



^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, back to index

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-01  3:15 [PATCH] mm/slub: Detach node lock from counting free objects Wen Yang
2020-02-08  3:03 ` Wen Yang
2020-02-08 21:41   ` Christopher Lameter
2020-02-12 22:56     ` Andrew Morton
2020-02-14  2:16       ` Christopher Lameter
2020-02-12 22:52 ` Andrew Morton
2020-02-16  4:15   ` Wen Yang
2020-02-18 20:53     ` Roman Gushchin
2020-02-20 13:53       ` Wen Yang
2020-02-20 15:40         ` Roman Gushchin
2020-02-22  6:55           ` Wen Yang
2020-02-24 17:01             ` Roman Gushchin

Linux-mm Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-mm/0 linux-mm/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-mm linux-mm/ https://lore.kernel.org/linux-mm \
		linux-mm@kvack.org
	public-inbox-index linux-mm

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kvack.linux-mm


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git