* [PATCH v4 1/6] mm, slab: combine kmalloc_caches and kmalloc_dma_caches
2018-07-31 9:06 [PATCH v4 0/6] kmalloc-reclaimable caches Vlastimil Babka
@ 2018-07-31 9:06 ` Vlastimil Babka
2018-07-31 9:06 ` [PATCH v4 2/6] mm, slab/slub: introduce kmalloc-reclaimable caches Vlastimil Babka
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Vlastimil Babka @ 2018-07-31 9:06 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-mm, linux-kernel, linux-api, Roman Gushchin, Michal Hocko,
Johannes Weiner, Christoph Lameter, David Rientjes, Joonsoo Kim,
Mel Gorman, Matthew Wilcox, Vlastimil Babka
The kmalloc caches currently mainain separate (optional) array
kmalloc_dma_caches for __GFP_DMA allocations. There are tests for __GFP_DMA in
the allocation hotpaths. We can avoid the branches by combining kmalloc_caches
and kmalloc_dma_caches into a single two-dimensional array where the outer
dimension is cache "type". This will also allow to add kmalloc-reclaimable
caches as a third type.
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: Roman Gushchin <guro@fb.com>
---
include/linux/slab.h | 42 +++++++++++++++++++++++++++++++-----------
mm/slab.c | 4 ++--
mm/slab_common.c | 31 ++++++++++++-------------------
mm/slub.c | 13 +++++++------
4 files changed, 52 insertions(+), 38 deletions(-)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 14e3fe4bd6a1..f35f2c1f37b9 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -295,12 +295,29 @@ static inline void __check_heap_object(const void *ptr, unsigned long n,
#define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \
(KMALLOC_MIN_SIZE) : 16)
+enum kmalloc_cache_type {
+ KMALLOC_NORMAL = 0,
+#ifdef CONFIG_ZONE_DMA
+ KMALLOC_DMA,
+#endif
+ NR_KMALLOC_TYPES
+};
+
#ifndef CONFIG_SLOB
-extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
+extern struct kmem_cache *
+kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
+
+static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags)
+{
+ int is_dma = 0;
+
#ifdef CONFIG_ZONE_DMA
-extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
+ is_dma = !!(flags & __GFP_DMA);
#endif
+ return is_dma;
+}
+
/*
* Figure out which kmalloc slab an allocation of a certain size
* belongs to.
@@ -501,18 +518,20 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size)) {
+#ifndef CONFIG_SLOB
+ unsigned int index;
+#endif
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large(size, flags);
#ifndef CONFIG_SLOB
- if (!(flags & GFP_DMA)) {
- unsigned int index = kmalloc_index(size);
+ index = kmalloc_index(size);
- if (!index)
- return ZERO_SIZE_PTR;
+ if (!index)
+ return ZERO_SIZE_PTR;
- return kmem_cache_alloc_trace(kmalloc_caches[index],
- flags, size);
- }
+ return kmem_cache_alloc_trace(
+ kmalloc_caches[kmalloc_type(flags)][index],
+ flags, size);
#endif
}
return __kmalloc(size, flags);
@@ -542,13 +561,14 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
{
#ifndef CONFIG_SLOB
if (__builtin_constant_p(size) &&
- size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) {
+ size <= KMALLOC_MAX_CACHE_SIZE) {
unsigned int i = kmalloc_index(size);
if (!i)
return ZERO_SIZE_PTR;
- return kmem_cache_alloc_node_trace(kmalloc_caches[i],
+ return kmem_cache_alloc_node_trace(
+ kmalloc_caches[kmalloc_type(flags)][i],
flags, node, size);
}
#endif
diff --git a/mm/slab.c b/mm/slab.c
index aa76a70e087e..9515798f37b2 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1288,7 +1288,7 @@ void __init kmem_cache_init(void)
* Initialize the caches that provide memory for the kmem_cache_node
* structures first. Without this, further allocations will bug.
*/
- kmalloc_caches[INDEX_NODE] = create_kmalloc_cache(
+ kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE] = create_kmalloc_cache(
kmalloc_info[INDEX_NODE].name,
kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS,
0, kmalloc_size(INDEX_NODE));
@@ -1304,7 +1304,7 @@ void __init kmem_cache_init(void)
for_each_online_node(nid) {
init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
- init_list(kmalloc_caches[INDEX_NODE],
+ init_list(kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE],
&init_kmem_cache_node[SIZE_NODE + nid], nid);
}
}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2296caf87bfb..4572941440f3 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -973,14 +973,10 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name,
return s;
}
-struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
+struct kmem_cache *
+kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
EXPORT_SYMBOL(kmalloc_caches);
-#ifdef CONFIG_ZONE_DMA
-struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
-EXPORT_SYMBOL(kmalloc_dma_caches);
-#endif
-
/*
* Conversion table for small slabs sizes / 8 to the index in the
* kmalloc array. This is necessary for slabs < 192 since we have non power
@@ -1040,12 +1036,7 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
} else
index = fls(size - 1);
-#ifdef CONFIG_ZONE_DMA
- if (unlikely((flags & GFP_DMA)))
- return kmalloc_dma_caches[index];
-
-#endif
- return kmalloc_caches[index];
+ return kmalloc_caches[kmalloc_type(flags)][index];
}
/*
@@ -1119,7 +1110,8 @@ void __init setup_kmalloc_cache_index_table(void)
static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
{
- kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
+ kmalloc_caches[KMALLOC_NORMAL][idx] = create_kmalloc_cache(
+ kmalloc_info[idx].name,
kmalloc_info[idx].size, flags, 0,
kmalloc_info[idx].size);
}
@@ -1132,9 +1124,10 @@ static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
void __init create_kmalloc_caches(slab_flags_t flags)
{
int i;
+ int type = KMALLOC_NORMAL;
for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
- if (!kmalloc_caches[i])
+ if (!kmalloc_caches[type][i])
new_kmalloc_cache(i, flags);
/*
@@ -1142,9 +1135,9 @@ void __init create_kmalloc_caches(slab_flags_t flags)
* These have to be created immediately after the
* earlier power of two caches
*/
- if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
+ if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[type][1] && i == 6)
new_kmalloc_cache(1, flags);
- if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
+ if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[type][2] && i == 7)
new_kmalloc_cache(2, flags);
}
@@ -1153,7 +1146,7 @@ void __init create_kmalloc_caches(slab_flags_t flags)
#ifdef CONFIG_ZONE_DMA
for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
- struct kmem_cache *s = kmalloc_caches[i];
+ struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i];
if (s) {
unsigned int size = kmalloc_size(i);
@@ -1161,8 +1154,8 @@ void __init create_kmalloc_caches(slab_flags_t flags)
"dma-kmalloc-%u", size);
BUG_ON(!n);
- kmalloc_dma_caches[i] = create_kmalloc_cache(n,
- size, SLAB_CACHE_DMA | flags, 0, 0);
+ kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache(
+ n, size, SLAB_CACHE_DMA | flags, 0, 0);
}
}
#endif
diff --git a/mm/slub.c b/mm/slub.c
index 51258eff4178..a7b4657ea8e0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4659,6 +4659,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
static void __init resiliency_test(void)
{
u8 *p;
+ int type = KMALLOC_NORMAL;
BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
@@ -4671,7 +4672,7 @@ static void __init resiliency_test(void)
pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
p + 16);
- validate_slab_cache(kmalloc_caches[4]);
+ validate_slab_cache(kmalloc_caches[type][4]);
/* Hmmm... The next two are dangerous */
p = kzalloc(32, GFP_KERNEL);
@@ -4680,33 +4681,33 @@ static void __init resiliency_test(void)
p);
pr_err("If allocated object is overwritten then not detectable\n\n");
- validate_slab_cache(kmalloc_caches[5]);
+ validate_slab_cache(kmalloc_caches[type][5]);
p = kzalloc(64, GFP_KERNEL);
p += 64 + (get_cycles() & 0xff) * sizeof(void *);
*p = 0x56;
pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
p);
pr_err("If allocated object is overwritten then not detectable\n\n");
- validate_slab_cache(kmalloc_caches[6]);
+ validate_slab_cache(kmalloc_caches[type][6]);
pr_err("\nB. Corruption after free\n");
p = kzalloc(128, GFP_KERNEL);
kfree(p);
*p = 0x78;
pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
- validate_slab_cache(kmalloc_caches[7]);
+ validate_slab_cache(kmalloc_caches[type][7]);
p = kzalloc(256, GFP_KERNEL);
kfree(p);
p[50] = 0x9a;
pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
- validate_slab_cache(kmalloc_caches[8]);
+ validate_slab_cache(kmalloc_caches[type][8]);
p = kzalloc(512, GFP_KERNEL);
kfree(p);
p[512] = 0xab;
pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
- validate_slab_cache(kmalloc_caches[9]);
+ validate_slab_cache(kmalloc_caches[type][9]);
}
#else
#ifdef CONFIG_SYSFS
--
2.18.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v4 2/6] mm, slab/slub: introduce kmalloc-reclaimable caches
2018-07-31 9:06 [PATCH v4 0/6] kmalloc-reclaimable caches Vlastimil Babka
2018-07-31 9:06 ` [PATCH v4 1/6] mm, slab: combine kmalloc_caches and kmalloc_dma_caches Vlastimil Babka
@ 2018-07-31 9:06 ` Vlastimil Babka
2018-07-31 9:06 ` [PATCH v4 3/6] dcache: allocate external names from reclaimable kmalloc caches Vlastimil Babka
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Vlastimil Babka @ 2018-07-31 9:06 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-mm, linux-kernel, linux-api, Roman Gushchin, Michal Hocko,
Johannes Weiner, Christoph Lameter, David Rientjes, Joonsoo Kim,
Mel Gorman, Matthew Wilcox, Vlastimil Babka
Kmem caches can be created with a SLAB_RECLAIM_ACCOUNT flag, which indicates
they contain objects which can be reclaimed under memory pressure (typically
through a shrinker). This makes the slab pages accounted as NR_SLAB_RECLAIMABLE
in vmstat, which is reflected also the MemAvailable meminfo counter and in
overcommit decisions. The slab pages are also allocated with __GFP_RECLAIMABLE,
which is good for anti-fragmentation through grouping pages by mobility.
The generic kmalloc-X caches are created without this flag, but sometimes are
used also for objects that can be reclaimed, which due to varying size cannot
have a dedicated kmem cache with SLAB_RECLAIM_ACCOUNT flag. A prominent example
are dcache external names, which prompted the creation of a new, manually
managed vmstat counter NR_INDIRECTLY_RECLAIMABLE_BYTES in commit f1782c9bc547
("dcache: account external names as indirectly reclaimable memory").
To better handle this and any other similar cases, this patch introduces
SLAB_RECLAIM_ACCOUNT variants of kmalloc caches, named kmalloc-rcl-X.
They are used whenever the kmalloc() call passes __GFP_RECLAIMABLE among gfp
flags. They are added to the kmalloc_caches array as a new type. Allocations
with both __GFP_DMA and __GFP_RECLAIMABLE will use a dma type cache.
This change only applies to SLAB and SLUB, not SLOB. This is fine, since SLOB's
target are tiny system and this patch does add some overhead of kmem management
objects.
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: Roman Gushchin <guro@fb.com>
---
include/linux/slab.h | 16 ++++++++++++++-
mm/slab_common.c | 48 ++++++++++++++++++++++++++++----------------
2 files changed, 46 insertions(+), 18 deletions(-)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index f35f2c1f37b9..b79a7cd59875 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -295,8 +295,13 @@ static inline void __check_heap_object(const void *ptr, unsigned long n,
#define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \
(KMALLOC_MIN_SIZE) : 16)
+/*
+ * Whenever changing this, take care of that kmalloc_type() and
+ * create_kmalloc_caches() still work as intended.
+ */
enum kmalloc_cache_type {
KMALLOC_NORMAL = 0,
+ KMALLOC_RECLAIM,
#ifdef CONFIG_ZONE_DMA
KMALLOC_DMA,
#endif
@@ -310,12 +315,21 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags)
{
int is_dma = 0;
+ int type_dma = 0;
+ int is_reclaimable;
#ifdef CONFIG_ZONE_DMA
is_dma = !!(flags & __GFP_DMA);
+ type_dma = is_dma * KMALLOC_DMA;
#endif
- return is_dma;
+ is_reclaimable = !!(flags & __GFP_RECLAIMABLE);
+
+ /*
+ * If an allocation is both __GFP_DMA and __GFP_RECLAIMABLE, return
+ * KMALLOC_DMA and effectively ignore __GFP_RECLAIMABLE
+ */
+ return type_dma + (is_reclaimable & !is_dma) * KMALLOC_RECLAIM;
}
/*
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 4572941440f3..03f40b273ea3 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1108,10 +1108,21 @@ void __init setup_kmalloc_cache_index_table(void)
}
}
-static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
+static void __init
+new_kmalloc_cache(int idx, int type, slab_flags_t flags)
{
- kmalloc_caches[KMALLOC_NORMAL][idx] = create_kmalloc_cache(
- kmalloc_info[idx].name,
+ const char *name;
+
+ if (type == KMALLOC_RECLAIM) {
+ flags |= SLAB_RECLAIM_ACCOUNT;
+ name = kasprintf(GFP_NOWAIT, "kmalloc-rcl-%u",
+ kmalloc_info[idx].size);
+ BUG_ON(!name);
+ } else {
+ name = kmalloc_info[idx].name;
+ }
+
+ kmalloc_caches[type][idx] = create_kmalloc_cache(name,
kmalloc_info[idx].size, flags, 0,
kmalloc_info[idx].size);
}
@@ -1123,22 +1134,25 @@ static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
*/
void __init create_kmalloc_caches(slab_flags_t flags)
{
- int i;
- int type = KMALLOC_NORMAL;
+ int i, type;
- for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
- if (!kmalloc_caches[type][i])
- new_kmalloc_cache(i, flags);
+ for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) {
+ for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
+ if (!kmalloc_caches[type][i])
+ new_kmalloc_cache(i, type, flags);
- /*
- * Caches that are not of the two-to-the-power-of size.
- * These have to be created immediately after the
- * earlier power of two caches
- */
- if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[type][1] && i == 6)
- new_kmalloc_cache(1, flags);
- if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[type][2] && i == 7)
- new_kmalloc_cache(2, flags);
+ /*
+ * Caches that are not of the two-to-the-power-of size.
+ * These have to be created immediately after the
+ * earlier power of two caches
+ */
+ if (KMALLOC_MIN_SIZE <= 32 && i == 6 &&
+ !kmalloc_caches[type][1])
+ new_kmalloc_cache(1, type, flags);
+ if (KMALLOC_MIN_SIZE <= 64 && i == 7 &&
+ !kmalloc_caches[type][2])
+ new_kmalloc_cache(2, type, flags);
+ }
}
/* Kmalloc array is now usable */
--
2.18.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v4 3/6] dcache: allocate external names from reclaimable kmalloc caches
2018-07-31 9:06 [PATCH v4 0/6] kmalloc-reclaimable caches Vlastimil Babka
2018-07-31 9:06 ` [PATCH v4 1/6] mm, slab: combine kmalloc_caches and kmalloc_dma_caches Vlastimil Babka
2018-07-31 9:06 ` [PATCH v4 2/6] mm, slab/slub: introduce kmalloc-reclaimable caches Vlastimil Babka
@ 2018-07-31 9:06 ` Vlastimil Babka
2018-07-31 9:06 ` [PATCH v4 4/6] mm: rename and change semantics of nr_indirectly_reclaimable_bytes Vlastimil Babka
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Vlastimil Babka @ 2018-07-31 9:06 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-mm, linux-kernel, linux-api, Roman Gushchin, Michal Hocko,
Johannes Weiner, Christoph Lameter, David Rientjes, Joonsoo Kim,
Mel Gorman, Matthew Wilcox, Vlastimil Babka
We can use the newly introduced kmalloc-reclaimable-X caches, to allocate
external names in dcache, which will take care of the proper accounting
automatically, and also improve anti-fragmentation page grouping.
This effectively reverts commit f1782c9bc547 ("dcache: account external names
as indirectly reclaimable memory") and instead passes __GFP_RECLAIMABLE to
kmalloc(). The accounting thus moves from NR_INDIRECTLY_RECLAIMABLE_BYTES to
NR_SLAB_RECLAIMABLE, which is also considered in MemAvailable calculation and
overcommit decisions.
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Roman Gushchin <guro@fb.com>
---
fs/dcache.c | 38 +++++++++-----------------------------
1 file changed, 9 insertions(+), 29 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index 0e8e5de3c48a..518c9ed8db8c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -257,24 +257,10 @@ static void __d_free(struct rcu_head *head)
kmem_cache_free(dentry_cache, dentry);
}
-static void __d_free_external_name(struct rcu_head *head)
-{
- struct external_name *name = container_of(head, struct external_name,
- u.head);
-
- mod_node_page_state(page_pgdat(virt_to_page(name)),
- NR_INDIRECTLY_RECLAIMABLE_BYTES,
- -ksize(name));
-
- kfree(name);
-}
-
static void __d_free_external(struct rcu_head *head)
{
struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
-
- __d_free_external_name(&external_name(dentry)->u.head);
-
+ kfree(external_name(dentry));
kmem_cache_free(dentry_cache, dentry);
}
@@ -305,7 +291,7 @@ void release_dentry_name_snapshot(struct name_snapshot *name)
struct external_name *p;
p = container_of(name->name, struct external_name, name[0]);
if (unlikely(atomic_dec_and_test(&p->u.count)))
- call_rcu(&p->u.head, __d_free_external_name);
+ kfree_rcu(p, u.head);
}
}
EXPORT_SYMBOL(release_dentry_name_snapshot);
@@ -1608,7 +1594,6 @@ EXPORT_SYMBOL(d_invalidate);
struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
{
- struct external_name *ext = NULL;
struct dentry *dentry;
char *dname;
int err;
@@ -1629,14 +1614,15 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
dname = dentry->d_iname;
} else if (name->len > DNAME_INLINE_LEN-1) {
size_t size = offsetof(struct external_name, name[1]);
-
- ext = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT);
- if (!ext) {
+ struct external_name *p = kmalloc(size + name->len,
+ GFP_KERNEL_ACCOUNT |
+ __GFP_RECLAIMABLE);
+ if (!p) {
kmem_cache_free(dentry_cache, dentry);
return NULL;
}
- atomic_set(&ext->u.count, 1);
- dname = ext->name;
+ atomic_set(&p->u.count, 1);
+ dname = p->name;
} else {
dname = dentry->d_iname;
}
@@ -1675,12 +1661,6 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
}
}
- if (unlikely(ext)) {
- pg_data_t *pgdat = page_pgdat(virt_to_page(ext));
- mod_node_page_state(pgdat, NR_INDIRECTLY_RECLAIMABLE_BYTES,
- ksize(ext));
- }
-
this_cpu_inc(nr_dentry);
return dentry;
@@ -2761,7 +2741,7 @@ static void copy_name(struct dentry *dentry, struct dentry *target)
dentry->d_name.hash_len = target->d_name.hash_len;
}
if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
- call_rcu(&old_name->u.head, __d_free_external_name);
+ kfree_rcu(old_name, u.head);
}
/*
--
2.18.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v4 4/6] mm: rename and change semantics of nr_indirectly_reclaimable_bytes
2018-07-31 9:06 [PATCH v4 0/6] kmalloc-reclaimable caches Vlastimil Babka
` (2 preceding siblings ...)
2018-07-31 9:06 ` [PATCH v4 3/6] dcache: allocate external names from reclaimable kmalloc caches Vlastimil Babka
@ 2018-07-31 9:06 ` Vlastimil Babka
2018-07-31 9:06 ` [PATCH v4 5/6] mm, proc: add KReclaimable to /proc/meminfo Vlastimil Babka
2018-07-31 9:06 ` [PATCH v4 6/6] mm, slab: shorten kmalloc cache names for large sizes Vlastimil Babka
5 siblings, 0 replies; 7+ messages in thread
From: Vlastimil Babka @ 2018-07-31 9:06 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-mm, linux-kernel, linux-api, Roman Gushchin, Michal Hocko,
Johannes Weiner, Christoph Lameter, David Rientjes, Joonsoo Kim,
Mel Gorman, Matthew Wilcox, Vlastimil Babka, Vijayanand Jitta,
Laura Abbott, Sumit Semwal
The vmstat counter NR_INDIRECTLY_RECLAIMABLE_BYTES was introduced by commit
eb59254608bc ("mm: introduce NR_INDIRECTLY_RECLAIMABLE_BYTES") with the goal of
accounting objects that can be reclaimed, but cannot be allocated via a
SLAB_RECLAIM_ACCOUNT cache. This is now possible via kmalloc() with
__GFP_RECLAIMABLE flag, and the dcache external names user is converted.
The counter is however still useful for accounting direct page allocations
(i.e. not slab) with a shrinker, such as the ION page pool. So keep it, and:
- change granularity to pages to be more like other counters; sub-page
allocations should be able to use kmalloc
- rename the counter to NR_KERNEL_MISC_RECLAIMABLE
- expose the counter again in vmstat as "nr_kernel_misc_reclaimable"; we can
again remove the check for not printing "hidden" counters
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Vijayanand Jitta <vjitta@codeaurora.org>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: Roman Gushchin <guro@fb.com>
---
drivers/staging/android/ion/ion_page_pool.c | 8 ++++----
include/linux/mmzone.h | 2 +-
mm/page_alloc.c | 19 +++++++------------
mm/util.c | 3 +--
mm/vmstat.c | 6 +-----
5 files changed, 14 insertions(+), 24 deletions(-)
diff --git a/drivers/staging/android/ion/ion_page_pool.c b/drivers/staging/android/ion/ion_page_pool.c
index 9bc56eb48d2a..0d2a95957ee8 100644
--- a/drivers/staging/android/ion/ion_page_pool.c
+++ b/drivers/staging/android/ion/ion_page_pool.c
@@ -33,8 +33,8 @@ static void ion_page_pool_add(struct ion_page_pool *pool, struct page *page)
pool->low_count++;
}
- mod_node_page_state(page_pgdat(page), NR_INDIRECTLY_RECLAIMABLE_BYTES,
- (1 << (PAGE_SHIFT + pool->order)));
+ mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE,
+ 1 << pool->order);
mutex_unlock(&pool->mutex);
}
@@ -53,8 +53,8 @@ static struct page *ion_page_pool_remove(struct ion_page_pool *pool, bool high)
}
list_del(&page->lru);
- mod_node_page_state(page_pgdat(page), NR_INDIRECTLY_RECLAIMABLE_BYTES,
- -(1 << (PAGE_SHIFT + pool->order)));
+ mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE,
+ -(1 << pool->order));
return page;
}
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 32699b2dc52a..c2f6bc4c9e8a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -180,7 +180,7 @@ enum node_stat_item {
NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
NR_DIRTIED, /* page dirtyings since bootup */
NR_WRITTEN, /* page writings since bootup */
- NR_INDIRECTLY_RECLAIMABLE_BYTES, /* measured in bytes */
+ NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */
NR_VM_NODE_STAT_ITEMS
};
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5d800d61ddb7..91f75bf4404d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4704,6 +4704,7 @@ long si_mem_available(void)
unsigned long pagecache;
unsigned long wmark_low = 0;
unsigned long pages[NR_LRU_LISTS];
+ unsigned long reclaimable;
struct zone *zone;
int lru;
@@ -4729,19 +4730,13 @@ long si_mem_available(void)
available += pagecache;
/*
- * Part of the reclaimable slab consists of items that are in use,
- * and cannot be freed. Cap this estimate at the low watermark.
+ * Part of the reclaimable slab and other kernel memory consists of
+ * items that are in use, and cannot be freed. Cap this estimate at the
+ * low watermark.
*/
- available += global_node_page_state(NR_SLAB_RECLAIMABLE) -
- min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2,
- wmark_low);
-
- /*
- * Part of the kernel memory, which can be released under memory
- * pressure.
- */
- available += global_node_page_state(NR_INDIRECTLY_RECLAIMABLE_BYTES) >>
- PAGE_SHIFT;
+ reclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE) +
+ global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
+ available += reclaimable - min(reclaimable / 2, wmark_low);
if (available < 0)
available = 0;
diff --git a/mm/util.c b/mm/util.c
index 3351659200e6..891f0654e7b5 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -675,8 +675,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
* Part of the kernel memory, which can be released
* under memory pressure.
*/
- free += global_node_page_state(
- NR_INDIRECTLY_RECLAIMABLE_BYTES) >> PAGE_SHIFT;
+ free += global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
/*
* Leave reserved pages. The pages are not for anonymous pages.
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 8ba0870ecddd..c5e52f94ba5f 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1161,7 +1161,7 @@ const char * const vmstat_text[] = {
"nr_vmscan_immediate_reclaim",
"nr_dirtied",
"nr_written",
- "", /* nr_indirectly_reclaimable */
+ "nr_kernel_misc_reclaimable",
/* enum writeback_stat_item counters */
"nr_dirty_threshold",
@@ -1704,10 +1704,6 @@ static int vmstat_show(struct seq_file *m, void *arg)
unsigned long *l = arg;
unsigned long off = l - (unsigned long *)m->private;
- /* Skip hidden vmstat items. */
- if (*vmstat_text[off] == '\0')
- return 0;
-
seq_puts(m, vmstat_text[off]);
seq_put_decimal_ull(m, " ", *l);
seq_putc(m, '\n');
--
2.18.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v4 5/6] mm, proc: add KReclaimable to /proc/meminfo
2018-07-31 9:06 [PATCH v4 0/6] kmalloc-reclaimable caches Vlastimil Babka
` (3 preceding siblings ...)
2018-07-31 9:06 ` [PATCH v4 4/6] mm: rename and change semantics of nr_indirectly_reclaimable_bytes Vlastimil Babka
@ 2018-07-31 9:06 ` Vlastimil Babka
2018-07-31 9:06 ` [PATCH v4 6/6] mm, slab: shorten kmalloc cache names for large sizes Vlastimil Babka
5 siblings, 0 replies; 7+ messages in thread
From: Vlastimil Babka @ 2018-07-31 9:06 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-mm, linux-kernel, linux-api, Roman Gushchin, Michal Hocko,
Johannes Weiner, Christoph Lameter, David Rientjes, Joonsoo Kim,
Mel Gorman, Matthew Wilcox, Vlastimil Babka
The vmstat NR_KERNEL_MISC_RECLAIMABLE counter is for kernel non-slab
allocations that can be reclaimed via shrinker. In /proc/meminfo, we can show
the sum of all reclaimable kernel allocations (including slab) as
"KReclaimable". Add the same counter also to per-node meminfo under /sys
With this counter, users will have more complete information about
kernel memory usage. Non-slab reclaimable pages (currently just the ION
allocator) will not be missing from /proc/meminfo, making users wonder
where part of their memory went. More precisely, they already appear in
MemAvailable, but without the new counter, it's not obvious why the
value in MemAvailable doesn't fully correspond with the sum of other
counters participating in it.
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Roman Gushchin <guro@fb.com>
---
Documentation/filesystems/proc.txt | 4 ++++
drivers/base/node.c | 19 ++++++++++++-------
fs/proc/meminfo.c | 16 ++++++++--------
3 files changed, 24 insertions(+), 15 deletions(-)
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 520f6a84cf50..6a255f960ab5 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -858,6 +858,7 @@ Writeback: 0 kB
AnonPages: 861800 kB
Mapped: 280372 kB
Shmem: 644 kB
+KReclaimable: 168048 kB
Slab: 284364 kB
SReclaimable: 159856 kB
SUnreclaim: 124508 kB
@@ -921,6 +922,9 @@ AnonHugePages: Non-file backed huge pages mapped into userspace page tables
ShmemHugePages: Memory used by shared memory (shmem) and tmpfs allocated
with huge pages
ShmemPmdMapped: Shared memory mapped into userspace with huge pages
+KReclaimable: Kernel allocations that the kernel will attempt to reclaim
+ under memory pressure. Includes SReclaimable (below), and other
+ direct allocations with a shrinker.
Slab: in-kernel data structures cache
SReclaimable: Part of Slab, that might be reclaimed, such as caches
SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure
diff --git a/drivers/base/node.c b/drivers/base/node.c
index a5e821d09656..81cef8031eae 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -67,8 +67,11 @@ static ssize_t node_read_meminfo(struct device *dev,
int nid = dev->id;
struct pglist_data *pgdat = NODE_DATA(nid);
struct sysinfo i;
+ unsigned long sreclaimable, sunreclaimable;
si_meminfo_node(&i, nid);
+ sreclaimable = node_page_state(pgdat, NR_SLAB_RECLAIMABLE);
+ sunreclaimable = node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE);
n = sprintf(buf,
"Node %d MemTotal: %8lu kB\n"
"Node %d MemFree: %8lu kB\n"
@@ -118,6 +121,7 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
"Node %d WritebackTmp: %8lu kB\n"
+ "Node %d KReclaimable: %8lu kB\n"
"Node %d Slab: %8lu kB\n"
"Node %d SReclaimable: %8lu kB\n"
"Node %d SUnreclaim: %8lu kB\n"
@@ -138,20 +142,21 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
- nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE) +
- node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)),
- nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE)),
+ nid, K(sreclaimable +
+ node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)),
+ nid, K(sreclaimable + sunreclaimable),
+ nid, K(sreclaimable),
+ nid, K(sunreclaimable)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)),
+ ,
nid, K(node_page_state(pgdat, NR_ANON_THPS) *
HPAGE_PMD_NR),
nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
HPAGE_PMD_NR),
nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
- HPAGE_PMD_NR));
-#else
- nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)));
+ HPAGE_PMD_NR)
#endif
+ );
n += hugetlb_report_node_meminfo(nid, buf + n);
return n;
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 2fb04846ed11..61a18477bc07 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -37,6 +37,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
long cached;
long available;
unsigned long pages[NR_LRU_LISTS];
+ unsigned long sreclaimable, sunreclaim;
int lru;
si_meminfo(&i);
@@ -52,6 +53,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
available = si_mem_available();
+ sreclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE);
+ sunreclaim = global_node_page_state(NR_SLAB_UNRECLAIMABLE);
show_val_kb(m, "MemTotal: ", i.totalram);
show_val_kb(m, "MemFree: ", i.freeram);
@@ -93,14 +96,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "Mapped: ",
global_node_page_state(NR_FILE_MAPPED));
show_val_kb(m, "Shmem: ", i.sharedram);
- show_val_kb(m, "Slab: ",
- global_node_page_state(NR_SLAB_RECLAIMABLE) +
- global_node_page_state(NR_SLAB_UNRECLAIMABLE));
-
- show_val_kb(m, "SReclaimable: ",
- global_node_page_state(NR_SLAB_RECLAIMABLE));
- show_val_kb(m, "SUnreclaim: ",
- global_node_page_state(NR_SLAB_UNRECLAIMABLE));
+ show_val_kb(m, "KReclaimable: ", sreclaimable +
+ global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE));
+ show_val_kb(m, "Slab: ", sreclaimable + sunreclaim);
+ show_val_kb(m, "SReclaimable: ", sreclaimable);
+ show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB));
show_val_kb(m, "PageTables: ",
--
2.18.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v4 6/6] mm, slab: shorten kmalloc cache names for large sizes
2018-07-31 9:06 [PATCH v4 0/6] kmalloc-reclaimable caches Vlastimil Babka
` (4 preceding siblings ...)
2018-07-31 9:06 ` [PATCH v4 5/6] mm, proc: add KReclaimable to /proc/meminfo Vlastimil Babka
@ 2018-07-31 9:06 ` Vlastimil Babka
5 siblings, 0 replies; 7+ messages in thread
From: Vlastimil Babka @ 2018-07-31 9:06 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-mm, linux-kernel, linux-api, Roman Gushchin, Michal Hocko,
Johannes Weiner, Christoph Lameter, David Rientjes, Joonsoo Kim,
Mel Gorman, Matthew Wilcox, Vlastimil Babka
Kmalloc cache names can get quite long for large object sizes, when the sizes
are expressed in bytes. Use 'k' and 'M' prefixes to make the names as short
as possible e.g. in /proc/slabinfo. This works, as we mostly use power-of-two
sizes, with exceptions only below 1k.
Example: 'kmalloc-4194304' becomes 'kmalloc-4M'
Suggested-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: Roman Gushchin <guro@fb.com>
---
mm/slab_common.c | 38 ++++++++++++++++++++++++++------------
1 file changed, 26 insertions(+), 12 deletions(-)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 03f40b273ea3..a07fcb2551f6 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1050,15 +1050,15 @@ const struct kmalloc_info_struct kmalloc_info[] __initconst = {
{"kmalloc-16", 16}, {"kmalloc-32", 32},
{"kmalloc-64", 64}, {"kmalloc-128", 128},
{"kmalloc-256", 256}, {"kmalloc-512", 512},
- {"kmalloc-1024", 1024}, {"kmalloc-2048", 2048},
- {"kmalloc-4096", 4096}, {"kmalloc-8192", 8192},
- {"kmalloc-16384", 16384}, {"kmalloc-32768", 32768},
- {"kmalloc-65536", 65536}, {"kmalloc-131072", 131072},
- {"kmalloc-262144", 262144}, {"kmalloc-524288", 524288},
- {"kmalloc-1048576", 1048576}, {"kmalloc-2097152", 2097152},
- {"kmalloc-4194304", 4194304}, {"kmalloc-8388608", 8388608},
- {"kmalloc-16777216", 16777216}, {"kmalloc-33554432", 33554432},
- {"kmalloc-67108864", 67108864}
+ {"kmalloc-1k", 1024}, {"kmalloc-2k", 2048},
+ {"kmalloc-4k", 4096}, {"kmalloc-8k", 8192},
+ {"kmalloc-16k", 16384}, {"kmalloc-32k", 32768},
+ {"kmalloc-64k", 65536}, {"kmalloc-128k", 131072},
+ {"kmalloc-256k", 262144}, {"kmalloc-512k", 524288},
+ {"kmalloc-1M", 1048576}, {"kmalloc-2M", 2097152},
+ {"kmalloc-4M", 4194304}, {"kmalloc-8M", 8388608},
+ {"kmalloc-16M", 16777216}, {"kmalloc-32M", 33554432},
+ {"kmalloc-64M", 67108864}
};
/*
@@ -1108,6 +1108,21 @@ void __init setup_kmalloc_cache_index_table(void)
}
}
+static const char *
+kmalloc_cache_name(const char *prefix, unsigned int size)
+{
+
+ static const char units[3] = "\0kM";
+ int idx = 0;
+
+ while (size >= 1024 && (size % 1024 == 0)) {
+ size /= 1024;
+ idx++;
+ }
+
+ return kasprintf(GFP_NOWAIT, "%s-%u%c", prefix, size, units[idx]);
+}
+
static void __init
new_kmalloc_cache(int idx, int type, slab_flags_t flags)
{
@@ -1115,7 +1130,7 @@ new_kmalloc_cache(int idx, int type, slab_flags_t flags)
if (type == KMALLOC_RECLAIM) {
flags |= SLAB_RECLAIM_ACCOUNT;
- name = kasprintf(GFP_NOWAIT, "kmalloc-rcl-%u",
+ name = kmalloc_cache_name("kmalloc-rcl",
kmalloc_info[idx].size);
BUG_ON(!name);
} else {
@@ -1164,8 +1179,7 @@ void __init create_kmalloc_caches(slab_flags_t flags)
if (s) {
unsigned int size = kmalloc_size(i);
- char *n = kasprintf(GFP_NOWAIT,
- "dma-kmalloc-%u", size);
+ const char *n = kmalloc_cache_name("dma-kmalloc", size);
BUG_ON(!n);
kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache(
--
2.18.0
^ permalink raw reply related [flat|nested] 7+ messages in thread