All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
@ 2014-08-21  8:11 ` Joonsoo Kim
  0 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-21  8:11 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, linux-mm,
	linux-kernel, Joonsoo Kim

Because of chicken and egg problem, initializaion of SLAB is really
complicated. We need to allocate cpu cache through SLAB to make
the kmem_cache works, but, before initialization of kmem_cache,
allocation through SLAB is impossible.

On the other hand, SLUB does initialization with more simple way. It
uses percpu allocator to allocate cpu cache so there is no chicken and
egg problem.

So, this patch try to use percpu allocator in SLAB. This simplify
initialization step in SLAB so that we could maintain SLAB code more
easily.

>From my testing, there is no performance difference.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
---
 include/linux/slab_def.h |   20 +---
 mm/slab.c                |  237 +++++++++++++++-------------------------------
 mm/slab.h                |    1 -
 3 files changed, 81 insertions(+), 177 deletions(-)

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 8235dfb..b869d16 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -8,6 +8,8 @@
  */
 
 struct kmem_cache {
+	struct array_cache __percpu *cpu_cache;
+
 /* 1) Cache tunables. Protected by slab_mutex */
 	unsigned int batchcount;
 	unsigned int limit;
@@ -71,23 +73,7 @@ struct kmem_cache {
 	struct memcg_cache_params *memcg_params;
 #endif
 
-/* 6) per-cpu/per-node data, touched during every alloc/free */
-	/*
-	 * We put array[] at the end of kmem_cache, because we want to size
-	 * this array to nr_cpu_ids slots instead of NR_CPUS
-	 * (see kmem_cache_init())
-	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
-	 * is statically defined, so we reserve the max number of cpus.
-	 *
-	 * We also need to guarantee that the list is able to accomodate a
-	 * pointer for each node since "nodelists" uses the remainder of
-	 * available pointers.
-	 */
-	struct kmem_cache_node **node;
-	struct array_cache *array[NR_CPUS + MAX_NUMNODES];
-	/*
-	 * Do not add fields after array[]
-	 */
+	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
 #endif	/* _LINUX_SLAB_DEF_H */
diff --git a/mm/slab.c b/mm/slab.c
index 5927a17..09b060e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -237,11 +237,10 @@ struct arraycache_init {
 /*
  * Need this for bootstrapping a per node allocator.
  */
-#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
+#define NUM_INIT_LISTS (2 * MAX_NUMNODES)
 static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
 #define	CACHE_CACHE 0
-#define	SIZE_AC MAX_NUMNODES
-#define	SIZE_NODE (2 * MAX_NUMNODES)
+#define	SIZE_NODE (MAX_NUMNODES)
 
 static int drain_freelist(struct kmem_cache *cache,
 			struct kmem_cache_node *n, int tofree);
@@ -253,7 +252,6 @@ static void cache_reap(struct work_struct *unused);
 
 static int slab_early_init = 1;
 
-#define INDEX_AC kmalloc_index(sizeof(struct arraycache_init))
 #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
 
 static void kmem_cache_node_init(struct kmem_cache_node *parent)
@@ -458,9 +456,6 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
 	return reciprocal_divide(offset, cache->reciprocal_buffer_size);
 }
 
-static struct arraycache_init initarray_generic =
-    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
-
 /* internal cache of cache description objs */
 static struct kmem_cache kmem_cache_boot = {
 	.batchcount = 1,
@@ -476,7 +471,7 @@ static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
 
 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
 {
-	return cachep->array[smp_processor_id()];
+	return this_cpu_ptr(cachep->cpu_cache);
 }
 
 static size_t calculate_freelist_size(int nr_objs, size_t align)
@@ -1096,9 +1091,6 @@ static void cpuup_canceled(long cpu)
 		struct alien_cache **alien;
 		LIST_HEAD(list);
 
-		/* cpu is dead; no one can alloc from it. */
-		nc = cachep->array[cpu];
-		cachep->array[cpu] = NULL;
 		n = get_node(cachep, node);
 
 		if (!n)
@@ -1108,6 +1100,9 @@ static void cpuup_canceled(long cpu)
 
 		/* Free limit for this kmem_cache_node */
 		n->free_limit -= cachep->batchcount;
+
+		/* cpu is dead; no one can alloc from it. */
+		nc = per_cpu_ptr(cachep->cpu_cache, cpu);
 		if (nc)
 			free_block(cachep, nc->entry, nc->avail, node, &list);
 
@@ -1135,7 +1130,6 @@ static void cpuup_canceled(long cpu)
 		}
 free_array_cache:
 		slabs_destroy(cachep, &list);
-		kfree(nc);
 	}
 	/*
 	 * In the previous loop, all the objects were freed to
@@ -1172,32 +1166,23 @@ static int cpuup_prepare(long cpu)
 	 * array caches
 	 */
 	list_for_each_entry(cachep, &slab_caches, list) {
-		struct array_cache *nc;
 		struct array_cache *shared = NULL;
 		struct alien_cache **alien = NULL;
 
-		nc = alloc_arraycache(node, cachep->limit,
-					cachep->batchcount, GFP_KERNEL);
-		if (!nc)
-			goto bad;
 		if (cachep->shared) {
 			shared = alloc_arraycache(node,
 				cachep->shared * cachep->batchcount,
 				0xbaadf00d, GFP_KERNEL);
-			if (!shared) {
-				kfree(nc);
+			if (!shared)
 				goto bad;
-			}
 		}
 		if (use_alien_caches) {
 			alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
 			if (!alien) {
 				kfree(shared);
-				kfree(nc);
 				goto bad;
 			}
 		}
-		cachep->array[cpu] = nc;
 		n = get_node(cachep, node);
 		BUG_ON(!n);
 
@@ -1389,15 +1374,6 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
 }
 
 /*
- * The memory after the last cpu cache pointer is used for the
- * the node pointer.
- */
-static void setup_node_pointer(struct kmem_cache *cachep)
-{
-	cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids];
-}
-
-/*
  * Initialisation.  Called after the page allocator have been initialised and
  * before smp_init().
  */
@@ -1408,7 +1384,6 @@ void __init kmem_cache_init(void)
 	BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
 					sizeof(struct rcu_head));
 	kmem_cache = &kmem_cache_boot;
-	setup_node_pointer(kmem_cache);
 
 	if (num_possible_nodes() == 1)
 		use_alien_caches = 0;
@@ -1416,8 +1391,6 @@ void __init kmem_cache_init(void)
 	for (i = 0; i < NUM_INIT_LISTS; i++)
 		kmem_cache_node_init(&init_kmem_cache_node[i]);
 
-	set_up_node(kmem_cache, CACHE_CACHE);
-
 	/*
 	 * Fragmentation resistance on low memory - only use bigger
 	 * page orders on machines with more than 32MB of memory if
@@ -1452,49 +1425,22 @@ void __init kmem_cache_init(void)
 	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
 	 */
 	create_boot_cache(kmem_cache, "kmem_cache",
-		offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+		offsetof(struct kmem_cache, node) +
 				  nr_node_ids * sizeof(struct kmem_cache_node *),
 				  SLAB_HWCACHE_ALIGN);
 	list_add(&kmem_cache->list, &slab_caches);
-
-	/* 2+3) create the kmalloc caches */
+	slab_state = PARTIAL;
 
 	/*
-	 * Initialize the caches that provide memory for the array cache and the
-	 * kmem_cache_node structures first.  Without this, further allocations will
-	 * bug.
+	 * Initialize the caches that provide memory for the  kmem_cache_node
+	 * structures first.  Without this, further allocations will bug.
 	 */
-
-	kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac",
-					kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS);
-
-	if (INDEX_AC != INDEX_NODE)
-		kmalloc_caches[INDEX_NODE] =
-			create_kmalloc_cache("kmalloc-node",
+	kmalloc_caches[INDEX_NODE] = create_kmalloc_cache("kmalloc-node",
 				kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
+	slab_state = PARTIAL_NODE;
 
 	slab_early_init = 0;
 
-	/* 4) Replace the bootstrap head arrays */
-	{
-		struct array_cache *ptr;
-
-		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
-
-		memcpy(ptr, cpu_cache_get(kmem_cache),
-		       sizeof(struct arraycache_init));
-
-		kmem_cache->array[smp_processor_id()] = ptr;
-
-		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
-
-		BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC])
-		       != &initarray_generic.cache);
-		memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
-		       sizeof(struct arraycache_init));
-
-		kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
-	}
 	/* 5) Replace the bootstrap kmem_cache_node */
 	{
 		int nid;
@@ -1502,13 +1448,8 @@ void __init kmem_cache_init(void)
 		for_each_online_node(nid) {
 			init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
 
-			init_list(kmalloc_caches[INDEX_AC],
-				  &init_kmem_cache_node[SIZE_AC + nid], nid);
-
-			if (INDEX_AC != INDEX_NODE) {
-				init_list(kmalloc_caches[INDEX_NODE],
+			init_list(kmalloc_caches[INDEX_NODE],
 					  &init_kmem_cache_node[SIZE_NODE + nid], nid);
-			}
 		}
 	}
 
@@ -2041,56 +1982,63 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
 	return left_over;
 }
 
+static struct array_cache __percpu *__alloc_kmem_cache_cpus(
+		struct kmem_cache *cachep, int entries, int batchcount)
+{
+	int cpu;
+	size_t size;
+	struct array_cache __percpu *cpu_cache;
+
+	size = sizeof(void *) * entries + sizeof(struct array_cache);
+	cpu_cache = __alloc_percpu(size, 0);
+
+	if (!cpu_cache)
+		return NULL;
+
+	for_each_possible_cpu(cpu) {
+		init_arraycache(per_cpu_ptr(cpu_cache, cpu),
+				entries, batchcount);
+	}
+
+	return cpu_cache;
+}
+
+static int alloc_kmem_cache_cpus(struct kmem_cache *cachep, int entries,
+				int batchcount)
+{
+	cachep->cpu_cache = __alloc_kmem_cache_cpus(cachep, entries,
+							batchcount);
+	if (!cachep->cpu_cache)
+		return 1;
+
+	return 0;
+}
+
 static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	if (slab_state >= FULL)
 		return enable_cpucache(cachep, gfp);
 
+	if (alloc_kmem_cache_cpus(cachep, 1, 1))
+		return 1;
+
 	if (slab_state == DOWN) {
-		/*
-		 * Note: Creation of first cache (kmem_cache).
-		 * The setup_node is taken care
-		 * of by the caller of __kmem_cache_create
-		 */
-		cachep->array[smp_processor_id()] = &initarray_generic.cache;
-		slab_state = PARTIAL;
+		/* Creation of first cache (kmem_cache). */
+		set_up_node(kmem_cache, CACHE_CACHE);
 	} else if (slab_state == PARTIAL) {
-		/*
-		 * Note: the second kmem_cache_create must create the cache
-		 * that's used by kmalloc(24), otherwise the creation of
-		 * further caches will BUG().
-		 */
-		cachep->array[smp_processor_id()] = &initarray_generic.cache;
-
-		/*
-		 * If the cache that's used by kmalloc(sizeof(kmem_cache_node)) is
-		 * the second cache, then we need to set up all its node/,
-		 * otherwise the creation of further caches will BUG().
-		 */
-		set_up_node(cachep, SIZE_AC);
-		if (INDEX_AC == INDEX_NODE)
-			slab_state = PARTIAL_NODE;
-		else
-			slab_state = PARTIAL_ARRAYCACHE;
+		/* For kmem_cache_node */
+		set_up_node(cachep, SIZE_NODE);
 	} else {
-		/* Remaining boot caches */
-		cachep->array[smp_processor_id()] =
-			kmalloc(sizeof(struct arraycache_init), gfp);
+		int node;
 
-		if (slab_state == PARTIAL_ARRAYCACHE) {
-			set_up_node(cachep, SIZE_NODE);
-			slab_state = PARTIAL_NODE;
-		} else {
-			int node;
-			for_each_online_node(node) {
-				cachep->node[node] =
-				    kmalloc_node(sizeof(struct kmem_cache_node),
-						gfp, node);
-				BUG_ON(!cachep->node[node]);
-				kmem_cache_node_init(cachep->node[node]);
-			}
+		for_each_online_node(node) {
+			cachep->node[node] = kmalloc_node(
+				sizeof(struct kmem_cache_node), gfp, node);
+			BUG_ON(!cachep->node[node]);
+			kmem_cache_node_init(cachep->node[node]);
 		}
 	}
+
 	cachep->node[numa_mem_id()]->next_reap =
 			jiffies + REAPTIMEOUT_NODE +
 			((unsigned long)cachep) % REAPTIMEOUT_NODE;
@@ -2194,7 +2142,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 	else
 		gfp = GFP_NOWAIT;
 
-	setup_node_pointer(cachep);
 #if DEBUG
 
 	/*
@@ -2451,8 +2398,7 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
 	if (rc)
 		return rc;
 
-	for_each_online_cpu(i)
-	    kfree(cachep->array[i]);
+	free_percpu(cachep->cpu_cache);
 
 	/* NUMA: free the node structures */
 	for_each_kmem_cache_node(cachep, i, n) {
@@ -3700,72 +3646,45 @@ fail:
 	return -ENOMEM;
 }
 
-struct ccupdate_struct {
-	struct kmem_cache *cachep;
-	struct array_cache *new[0];
-};
-
-static void do_ccupdate_local(void *info)
-{
-	struct ccupdate_struct *new = info;
-	struct array_cache *old;
-
-	check_irq_off();
-	old = cpu_cache_get(new->cachep);
-
-	new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
-	new->new[smp_processor_id()] = old;
-}
-
 /* Always called with the slab_mutex held */
 static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
 				int batchcount, int shared, gfp_t gfp)
 {
-	struct ccupdate_struct *new;
-	int i;
+	struct array_cache __percpu *cpu_cache, *prev;
+	int cpu;
 
-	new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
-		      gfp);
-	if (!new)
+	cpu_cache = __alloc_kmem_cache_cpus(cachep, limit, batchcount);
+	if (!cpu_cache)
 		return -ENOMEM;
 
-	for_each_online_cpu(i) {
-		new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
-						batchcount, gfp);
-		if (!new->new[i]) {
-			for (i--; i >= 0; i--)
-				kfree(new->new[i]);
-			kfree(new);
-			return -ENOMEM;
-		}
-	}
-	new->cachep = cachep;
-
-	on_each_cpu(do_ccupdate_local, (void *)new, 1);
+	prev = cachep->cpu_cache;
+	cachep->cpu_cache = cpu_cache;
+	kick_all_cpus_sync();
 
 	check_irq_on();
 	cachep->batchcount = batchcount;
 	cachep->limit = limit;
 	cachep->shared = shared;
 
-	for_each_online_cpu(i) {
+	if (!prev)
+		goto alloc_node;
+
+	for_each_online_cpu(cpu) {
 		LIST_HEAD(list);
-		struct array_cache *ccold = new->new[i];
 		int node;
 		struct kmem_cache_node *n;
+		struct array_cache *ac = per_cpu_ptr(prev, cpu);
 
-		if (!ccold)
-			continue;
-
-		node = cpu_to_mem(i);
+		node = cpu_to_mem(cpu);
 		n = get_node(cachep, node);
 		spin_lock_irq(&n->list_lock);
-		free_block(cachep, ccold->entry, ccold->avail, node, &list);
+		free_block(cachep, ac->entry, ac->avail, node, &list);
 		spin_unlock_irq(&n->list_lock);
 		slabs_destroy(cachep, &list);
-		kfree(ccold);
 	}
-	kfree(new);
+	free_percpu(prev);
+
+alloc_node:
 	return alloc_kmem_cache_node(cachep, gfp);
 }
 
diff --git a/mm/slab.h b/mm/slab.h
index bd1c54a..5cb4649 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -48,7 +48,6 @@ struct kmem_cache {
 enum slab_state {
 	DOWN,			/* No slab functionality yet */
 	PARTIAL,		/* SLUB: kmem_cache_node available */
-	PARTIAL_ARRAYCACHE,	/* SLAB: kmalloc size for arraycache available */
 	PARTIAL_NODE,		/* SLAB: kmalloc size for node struct available */
 	UP,			/* Slab caches usable but not all extras yet */
 	FULL			/* Everything is working */
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
@ 2014-08-21  8:11 ` Joonsoo Kim
  0 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-21  8:11 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, linux-mm,
	linux-kernel, Joonsoo Kim

Because of chicken and egg problem, initializaion of SLAB is really
complicated. We need to allocate cpu cache through SLAB to make
the kmem_cache works, but, before initialization of kmem_cache,
allocation through SLAB is impossible.

On the other hand, SLUB does initialization with more simple way. It
uses percpu allocator to allocate cpu cache so there is no chicken and
egg problem.

So, this patch try to use percpu allocator in SLAB. This simplify
initialization step in SLAB so that we could maintain SLAB code more
easily.

>From my testing, there is no performance difference.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
---
 include/linux/slab_def.h |   20 +---
 mm/slab.c                |  237 +++++++++++++++-------------------------------
 mm/slab.h                |    1 -
 3 files changed, 81 insertions(+), 177 deletions(-)

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 8235dfb..b869d16 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -8,6 +8,8 @@
  */
 
 struct kmem_cache {
+	struct array_cache __percpu *cpu_cache;
+
 /* 1) Cache tunables. Protected by slab_mutex */
 	unsigned int batchcount;
 	unsigned int limit;
@@ -71,23 +73,7 @@ struct kmem_cache {
 	struct memcg_cache_params *memcg_params;
 #endif
 
-/* 6) per-cpu/per-node data, touched during every alloc/free */
-	/*
-	 * We put array[] at the end of kmem_cache, because we want to size
-	 * this array to nr_cpu_ids slots instead of NR_CPUS
-	 * (see kmem_cache_init())
-	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
-	 * is statically defined, so we reserve the max number of cpus.
-	 *
-	 * We also need to guarantee that the list is able to accomodate a
-	 * pointer for each node since "nodelists" uses the remainder of
-	 * available pointers.
-	 */
-	struct kmem_cache_node **node;
-	struct array_cache *array[NR_CPUS + MAX_NUMNODES];
-	/*
-	 * Do not add fields after array[]
-	 */
+	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
 #endif	/* _LINUX_SLAB_DEF_H */
diff --git a/mm/slab.c b/mm/slab.c
index 5927a17..09b060e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -237,11 +237,10 @@ struct arraycache_init {
 /*
  * Need this for bootstrapping a per node allocator.
  */
-#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
+#define NUM_INIT_LISTS (2 * MAX_NUMNODES)
 static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
 #define	CACHE_CACHE 0
-#define	SIZE_AC MAX_NUMNODES
-#define	SIZE_NODE (2 * MAX_NUMNODES)
+#define	SIZE_NODE (MAX_NUMNODES)
 
 static int drain_freelist(struct kmem_cache *cache,
 			struct kmem_cache_node *n, int tofree);
@@ -253,7 +252,6 @@ static void cache_reap(struct work_struct *unused);
 
 static int slab_early_init = 1;
 
-#define INDEX_AC kmalloc_index(sizeof(struct arraycache_init))
 #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
 
 static void kmem_cache_node_init(struct kmem_cache_node *parent)
@@ -458,9 +456,6 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
 	return reciprocal_divide(offset, cache->reciprocal_buffer_size);
 }
 
-static struct arraycache_init initarray_generic =
-    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
-
 /* internal cache of cache description objs */
 static struct kmem_cache kmem_cache_boot = {
 	.batchcount = 1,
@@ -476,7 +471,7 @@ static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
 
 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
 {
-	return cachep->array[smp_processor_id()];
+	return this_cpu_ptr(cachep->cpu_cache);
 }
 
 static size_t calculate_freelist_size(int nr_objs, size_t align)
@@ -1096,9 +1091,6 @@ static void cpuup_canceled(long cpu)
 		struct alien_cache **alien;
 		LIST_HEAD(list);
 
-		/* cpu is dead; no one can alloc from it. */
-		nc = cachep->array[cpu];
-		cachep->array[cpu] = NULL;
 		n = get_node(cachep, node);
 
 		if (!n)
@@ -1108,6 +1100,9 @@ static void cpuup_canceled(long cpu)
 
 		/* Free limit for this kmem_cache_node */
 		n->free_limit -= cachep->batchcount;
+
+		/* cpu is dead; no one can alloc from it. */
+		nc = per_cpu_ptr(cachep->cpu_cache, cpu);
 		if (nc)
 			free_block(cachep, nc->entry, nc->avail, node, &list);
 
@@ -1135,7 +1130,6 @@ static void cpuup_canceled(long cpu)
 		}
 free_array_cache:
 		slabs_destroy(cachep, &list);
-		kfree(nc);
 	}
 	/*
 	 * In the previous loop, all the objects were freed to
@@ -1172,32 +1166,23 @@ static int cpuup_prepare(long cpu)
 	 * array caches
 	 */
 	list_for_each_entry(cachep, &slab_caches, list) {
-		struct array_cache *nc;
 		struct array_cache *shared = NULL;
 		struct alien_cache **alien = NULL;
 
-		nc = alloc_arraycache(node, cachep->limit,
-					cachep->batchcount, GFP_KERNEL);
-		if (!nc)
-			goto bad;
 		if (cachep->shared) {
 			shared = alloc_arraycache(node,
 				cachep->shared * cachep->batchcount,
 				0xbaadf00d, GFP_KERNEL);
-			if (!shared) {
-				kfree(nc);
+			if (!shared)
 				goto bad;
-			}
 		}
 		if (use_alien_caches) {
 			alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
 			if (!alien) {
 				kfree(shared);
-				kfree(nc);
 				goto bad;
 			}
 		}
-		cachep->array[cpu] = nc;
 		n = get_node(cachep, node);
 		BUG_ON(!n);
 
@@ -1389,15 +1374,6 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
 }
 
 /*
- * The memory after the last cpu cache pointer is used for the
- * the node pointer.
- */
-static void setup_node_pointer(struct kmem_cache *cachep)
-{
-	cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids];
-}
-
-/*
  * Initialisation.  Called after the page allocator have been initialised and
  * before smp_init().
  */
@@ -1408,7 +1384,6 @@ void __init kmem_cache_init(void)
 	BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
 					sizeof(struct rcu_head));
 	kmem_cache = &kmem_cache_boot;
-	setup_node_pointer(kmem_cache);
 
 	if (num_possible_nodes() == 1)
 		use_alien_caches = 0;
@@ -1416,8 +1391,6 @@ void __init kmem_cache_init(void)
 	for (i = 0; i < NUM_INIT_LISTS; i++)
 		kmem_cache_node_init(&init_kmem_cache_node[i]);
 
-	set_up_node(kmem_cache, CACHE_CACHE);
-
 	/*
 	 * Fragmentation resistance on low memory - only use bigger
 	 * page orders on machines with more than 32MB of memory if
@@ -1452,49 +1425,22 @@ void __init kmem_cache_init(void)
 	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
 	 */
 	create_boot_cache(kmem_cache, "kmem_cache",
-		offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+		offsetof(struct kmem_cache, node) +
 				  nr_node_ids * sizeof(struct kmem_cache_node *),
 				  SLAB_HWCACHE_ALIGN);
 	list_add(&kmem_cache->list, &slab_caches);
-
-	/* 2+3) create the kmalloc caches */
+	slab_state = PARTIAL;
 
 	/*
-	 * Initialize the caches that provide memory for the array cache and the
-	 * kmem_cache_node structures first.  Without this, further allocations will
-	 * bug.
+	 * Initialize the caches that provide memory for the  kmem_cache_node
+	 * structures first.  Without this, further allocations will bug.
 	 */
-
-	kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac",
-					kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS);
-
-	if (INDEX_AC != INDEX_NODE)
-		kmalloc_caches[INDEX_NODE] =
-			create_kmalloc_cache("kmalloc-node",
+	kmalloc_caches[INDEX_NODE] = create_kmalloc_cache("kmalloc-node",
 				kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
+	slab_state = PARTIAL_NODE;
 
 	slab_early_init = 0;
 
-	/* 4) Replace the bootstrap head arrays */
-	{
-		struct array_cache *ptr;
-
-		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
-
-		memcpy(ptr, cpu_cache_get(kmem_cache),
-		       sizeof(struct arraycache_init));
-
-		kmem_cache->array[smp_processor_id()] = ptr;
-
-		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
-
-		BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC])
-		       != &initarray_generic.cache);
-		memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
-		       sizeof(struct arraycache_init));
-
-		kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
-	}
 	/* 5) Replace the bootstrap kmem_cache_node */
 	{
 		int nid;
@@ -1502,13 +1448,8 @@ void __init kmem_cache_init(void)
 		for_each_online_node(nid) {
 			init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
 
-			init_list(kmalloc_caches[INDEX_AC],
-				  &init_kmem_cache_node[SIZE_AC + nid], nid);
-
-			if (INDEX_AC != INDEX_NODE) {
-				init_list(kmalloc_caches[INDEX_NODE],
+			init_list(kmalloc_caches[INDEX_NODE],
 					  &init_kmem_cache_node[SIZE_NODE + nid], nid);
-			}
 		}
 	}
 
@@ -2041,56 +1982,63 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
 	return left_over;
 }
 
+static struct array_cache __percpu *__alloc_kmem_cache_cpus(
+		struct kmem_cache *cachep, int entries, int batchcount)
+{
+	int cpu;
+	size_t size;
+	struct array_cache __percpu *cpu_cache;
+
+	size = sizeof(void *) * entries + sizeof(struct array_cache);
+	cpu_cache = __alloc_percpu(size, 0);
+
+	if (!cpu_cache)
+		return NULL;
+
+	for_each_possible_cpu(cpu) {
+		init_arraycache(per_cpu_ptr(cpu_cache, cpu),
+				entries, batchcount);
+	}
+
+	return cpu_cache;
+}
+
+static int alloc_kmem_cache_cpus(struct kmem_cache *cachep, int entries,
+				int batchcount)
+{
+	cachep->cpu_cache = __alloc_kmem_cache_cpus(cachep, entries,
+							batchcount);
+	if (!cachep->cpu_cache)
+		return 1;
+
+	return 0;
+}
+
 static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	if (slab_state >= FULL)
 		return enable_cpucache(cachep, gfp);
 
+	if (alloc_kmem_cache_cpus(cachep, 1, 1))
+		return 1;
+
 	if (slab_state == DOWN) {
-		/*
-		 * Note: Creation of first cache (kmem_cache).
-		 * The setup_node is taken care
-		 * of by the caller of __kmem_cache_create
-		 */
-		cachep->array[smp_processor_id()] = &initarray_generic.cache;
-		slab_state = PARTIAL;
+		/* Creation of first cache (kmem_cache). */
+		set_up_node(kmem_cache, CACHE_CACHE);
 	} else if (slab_state == PARTIAL) {
-		/*
-		 * Note: the second kmem_cache_create must create the cache
-		 * that's used by kmalloc(24), otherwise the creation of
-		 * further caches will BUG().
-		 */
-		cachep->array[smp_processor_id()] = &initarray_generic.cache;
-
-		/*
-		 * If the cache that's used by kmalloc(sizeof(kmem_cache_node)) is
-		 * the second cache, then we need to set up all its node/,
-		 * otherwise the creation of further caches will BUG().
-		 */
-		set_up_node(cachep, SIZE_AC);
-		if (INDEX_AC == INDEX_NODE)
-			slab_state = PARTIAL_NODE;
-		else
-			slab_state = PARTIAL_ARRAYCACHE;
+		/* For kmem_cache_node */
+		set_up_node(cachep, SIZE_NODE);
 	} else {
-		/* Remaining boot caches */
-		cachep->array[smp_processor_id()] =
-			kmalloc(sizeof(struct arraycache_init), gfp);
+		int node;
 
-		if (slab_state == PARTIAL_ARRAYCACHE) {
-			set_up_node(cachep, SIZE_NODE);
-			slab_state = PARTIAL_NODE;
-		} else {
-			int node;
-			for_each_online_node(node) {
-				cachep->node[node] =
-				    kmalloc_node(sizeof(struct kmem_cache_node),
-						gfp, node);
-				BUG_ON(!cachep->node[node]);
-				kmem_cache_node_init(cachep->node[node]);
-			}
+		for_each_online_node(node) {
+			cachep->node[node] = kmalloc_node(
+				sizeof(struct kmem_cache_node), gfp, node);
+			BUG_ON(!cachep->node[node]);
+			kmem_cache_node_init(cachep->node[node]);
 		}
 	}
+
 	cachep->node[numa_mem_id()]->next_reap =
 			jiffies + REAPTIMEOUT_NODE +
 			((unsigned long)cachep) % REAPTIMEOUT_NODE;
@@ -2194,7 +2142,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 	else
 		gfp = GFP_NOWAIT;
 
-	setup_node_pointer(cachep);
 #if DEBUG
 
 	/*
@@ -2451,8 +2398,7 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
 	if (rc)
 		return rc;
 
-	for_each_online_cpu(i)
-	    kfree(cachep->array[i]);
+	free_percpu(cachep->cpu_cache);
 
 	/* NUMA: free the node structures */
 	for_each_kmem_cache_node(cachep, i, n) {
@@ -3700,72 +3646,45 @@ fail:
 	return -ENOMEM;
 }
 
-struct ccupdate_struct {
-	struct kmem_cache *cachep;
-	struct array_cache *new[0];
-};
-
-static void do_ccupdate_local(void *info)
-{
-	struct ccupdate_struct *new = info;
-	struct array_cache *old;
-
-	check_irq_off();
-	old = cpu_cache_get(new->cachep);
-
-	new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
-	new->new[smp_processor_id()] = old;
-}
-
 /* Always called with the slab_mutex held */
 static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
 				int batchcount, int shared, gfp_t gfp)
 {
-	struct ccupdate_struct *new;
-	int i;
+	struct array_cache __percpu *cpu_cache, *prev;
+	int cpu;
 
-	new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
-		      gfp);
-	if (!new)
+	cpu_cache = __alloc_kmem_cache_cpus(cachep, limit, batchcount);
+	if (!cpu_cache)
 		return -ENOMEM;
 
-	for_each_online_cpu(i) {
-		new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
-						batchcount, gfp);
-		if (!new->new[i]) {
-			for (i--; i >= 0; i--)
-				kfree(new->new[i]);
-			kfree(new);
-			return -ENOMEM;
-		}
-	}
-	new->cachep = cachep;
-
-	on_each_cpu(do_ccupdate_local, (void *)new, 1);
+	prev = cachep->cpu_cache;
+	cachep->cpu_cache = cpu_cache;
+	kick_all_cpus_sync();
 
 	check_irq_on();
 	cachep->batchcount = batchcount;
 	cachep->limit = limit;
 	cachep->shared = shared;
 
-	for_each_online_cpu(i) {
+	if (!prev)
+		goto alloc_node;
+
+	for_each_online_cpu(cpu) {
 		LIST_HEAD(list);
-		struct array_cache *ccold = new->new[i];
 		int node;
 		struct kmem_cache_node *n;
+		struct array_cache *ac = per_cpu_ptr(prev, cpu);
 
-		if (!ccold)
-			continue;
-
-		node = cpu_to_mem(i);
+		node = cpu_to_mem(cpu);
 		n = get_node(cachep, node);
 		spin_lock_irq(&n->list_lock);
-		free_block(cachep, ccold->entry, ccold->avail, node, &list);
+		free_block(cachep, ac->entry, ac->avail, node, &list);
 		spin_unlock_irq(&n->list_lock);
 		slabs_destroy(cachep, &list);
-		kfree(ccold);
 	}
-	kfree(new);
+	free_percpu(prev);
+
+alloc_node:
 	return alloc_kmem_cache_node(cachep, gfp);
 }
 
diff --git a/mm/slab.h b/mm/slab.h
index bd1c54a..5cb4649 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -48,7 +48,6 @@ struct kmem_cache {
 enum slab_state {
 	DOWN,			/* No slab functionality yet */
 	PARTIAL,		/* SLUB: kmem_cache_node available */
-	PARTIAL_ARRAYCACHE,	/* SLAB: kmalloc size for arraycache available */
 	PARTIAL_NODE,		/* SLAB: kmalloc size for node struct available */
 	UP,			/* Slab caches usable but not all extras yet */
 	FULL			/* Everything is working */
-- 
1.7.9.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 2/3] mm/slab_common: commonize slab merge logic
  2014-08-21  8:11 ` Joonsoo Kim
@ 2014-08-21  8:11   ` Joonsoo Kim
  -1 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-21  8:11 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, linux-mm,
	linux-kernel, Joonsoo Kim

Slab merge is good feature to reduce fragmentation. Now, it is only
applied to SLUB, but, it would be good to apply it to SLAB. This patch
is preparation step to apply slab merge to SLAB by commonizing slab
merge logic.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
---
 mm/slab.h        |   15 +++++++++
 mm/slab_common.c |   86 +++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/slub.c        |   91 ++----------------------------------------------------
 3 files changed, 103 insertions(+), 89 deletions(-)

diff --git a/mm/slab.h b/mm/slab.h
index 5cb4649..7c6e1ed 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -85,15 +85,30 @@ extern void create_boot_cache(struct kmem_cache *, const char *name,
 			size_t size, unsigned long flags);
 
 struct mem_cgroup;
+
+int slab_unmergeable(struct kmem_cache *s);
+struct kmem_cache *find_mergeable(size_t size, size_t align,
+		unsigned long flags, const char *name, void (*ctor)(void *));
 #ifdef CONFIG_SLUB
 struct kmem_cache *
 __kmem_cache_alias(const char *name, size_t size, size_t align,
 		   unsigned long flags, void (*ctor)(void *));
+
+unsigned long kmem_cache_flags(unsigned long object_size,
+	unsigned long flags, const char *name,
+	void (*ctor)(void *));
 #else
 static inline struct kmem_cache *
 __kmem_cache_alias(const char *name, size_t size, size_t align,
 		   unsigned long flags, void (*ctor)(void *))
 { return NULL; }
+
+static inline unsigned long kmem_cache_flags(unsigned long object_size,
+	unsigned long flags, const char *name,
+	void (*ctor)(void *))
+{
+	return flags;
+}
 #endif
 
 
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2088904..65a5811 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -31,6 +31,29 @@ DEFINE_MUTEX(slab_mutex);
 struct kmem_cache *kmem_cache;
 
 /*
+ * Set of flags that will prevent slab merging
+ */
+#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
+		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
+		SLAB_FAILSLAB)
+
+#define SLAB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
+		SLAB_CACHE_DMA | SLAB_NOTRACK)
+
+/*
+ * Merge control. If this is set then no merging of slab caches will occur.
+ * (Could be removed. This was introduced to pacify the merge skeptics.)
+ */
+static int slab_nomerge;
+
+static int __init setup_slab_nomerge(char *str)
+{
+	slab_nomerge = 1;
+	return 1;
+}
+__setup("slub_nomerge", setup_slab_nomerge);
+
+/*
  * Determine the size of a slab object
  */
 unsigned int kmem_cache_size(struct kmem_cache *s)
@@ -115,6 +138,69 @@ out:
 #endif
 
 /*
+ * Find a mergeable slab cache
+ */
+int slab_unmergeable(struct kmem_cache *s)
+{
+	if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
+		return 1;
+
+	if (!is_root_cache(s))
+		return 1;
+
+	if (s->ctor)
+		return 1;
+
+	/*
+	 * We may have set a slab to be unmergeable during bootstrap.
+	 */
+	if (s->refcount < 0)
+		return 1;
+
+	return 0;
+}
+
+struct kmem_cache *find_mergeable(size_t size, size_t align,
+		unsigned long flags, const char *name, void (*ctor)(void *))
+{
+	struct kmem_cache *s;
+
+	if (slab_nomerge || (flags & SLAB_NEVER_MERGE))
+		return NULL;
+
+	if (ctor)
+		return NULL;
+
+	size = ALIGN(size, sizeof(void *));
+	align = calculate_alignment(flags, align, size);
+	size = ALIGN(size, align);
+	flags = kmem_cache_flags(size, flags, name, NULL);
+
+	list_for_each_entry(s, &slab_caches, list) {
+		if (slab_unmergeable(s))
+			continue;
+
+		if (size > s->size)
+			continue;
+
+		if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
+			continue;
+		/*
+		 * Check if alignment is compatible.
+		 * Courtesy of Adrian Drzewiecki
+		 */
+		if ((s->size & ~(align - 1)) != s->size)
+			continue;
+
+		if (s->size - size >= sizeof(void *))
+			continue;
+
+		return s;
+	}
+	return NULL;
+}
+
+/*
  * Figure out what the alignment of the objects will be given a set of
  * flags, a user specified alignment and the size of the objects.
  */
diff --git a/mm/slub.c b/mm/slub.c
index 3e8afcc..b29e835 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -169,16 +169,6 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
  */
 #define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
 
-/*
- * Set of flags that will prevent slab merging
- */
-#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
-		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
-		SLAB_FAILSLAB)
-
-#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
-		SLAB_CACHE_DMA | SLAB_NOTRACK)
-
 #define OO_SHIFT	16
 #define OO_MASK		((1 << OO_SHIFT) - 1)
 #define MAX_OBJS_PER_PAGE	32767 /* since page.objects is u15 */
@@ -1176,7 +1166,7 @@ out:
 
 __setup("slub_debug", setup_slub_debug);
 
-static unsigned long kmem_cache_flags(unsigned long object_size,
+unsigned long kmem_cache_flags(unsigned long object_size,
 	unsigned long flags, const char *name,
 	void (*ctor)(void *))
 {
@@ -1208,7 +1198,7 @@ static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
 					struct page *page) {}
 static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
 					struct page *page) {}
-static inline unsigned long kmem_cache_flags(unsigned long object_size,
+unsigned long kmem_cache_flags(unsigned long object_size,
 	unsigned long flags, const char *name,
 	void (*ctor)(void *))
 {
@@ -2707,12 +2697,6 @@ static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
 static int slub_min_objects;
 
 /*
- * Merge control. If this is set then no merging of slab caches will occur.
- * (Could be removed. This was introduced to pacify the merge skeptics.)
- */
-static int slub_nomerge;
-
-/*
  * Calculate the order of allocation given an slab object size.
  *
  * The order of allocation has significant impact on performance and other
@@ -3240,14 +3224,6 @@ static int __init setup_slub_min_objects(char *str)
 
 __setup("slub_min_objects=", setup_slub_min_objects);
 
-static int __init setup_slub_nomerge(char *str)
-{
-	slub_nomerge = 1;
-	return 1;
-}
-
-__setup("slub_nomerge", setup_slub_nomerge);
-
 void *__kmalloc(size_t size, gfp_t flags)
 {
 	struct kmem_cache *s;
@@ -3625,69 +3601,6 @@ void __init kmem_cache_init_late(void)
 {
 }
 
-/*
- * Find a mergeable slab cache
- */
-static int slab_unmergeable(struct kmem_cache *s)
-{
-	if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
-		return 1;
-
-	if (!is_root_cache(s))
-		return 1;
-
-	if (s->ctor)
-		return 1;
-
-	/*
-	 * We may have set a slab to be unmergeable during bootstrap.
-	 */
-	if (s->refcount < 0)
-		return 1;
-
-	return 0;
-}
-
-static struct kmem_cache *find_mergeable(size_t size, size_t align,
-		unsigned long flags, const char *name, void (*ctor)(void *))
-{
-	struct kmem_cache *s;
-
-	if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
-		return NULL;
-
-	if (ctor)
-		return NULL;
-
-	size = ALIGN(size, sizeof(void *));
-	align = calculate_alignment(flags, align, size);
-	size = ALIGN(size, align);
-	flags = kmem_cache_flags(size, flags, name, NULL);
-
-	list_for_each_entry(s, &slab_caches, list) {
-		if (slab_unmergeable(s))
-			continue;
-
-		if (size > s->size)
-			continue;
-
-		if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
-			continue;
-		/*
-		 * Check if alignment is compatible.
-		 * Courtesy of Adrian Drzewiecki
-		 */
-		if ((s->size & ~(align - 1)) != s->size)
-			continue;
-
-		if (s->size - size >= sizeof(void *))
-			continue;
-
-		return s;
-	}
-	return NULL;
-}
-
 struct kmem_cache *
 __kmem_cache_alias(const char *name, size_t size, size_t align,
 		   unsigned long flags, void (*ctor)(void *))
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 2/3] mm/slab_common: commonize slab merge logic
@ 2014-08-21  8:11   ` Joonsoo Kim
  0 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-21  8:11 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, linux-mm,
	linux-kernel, Joonsoo Kim

Slab merge is good feature to reduce fragmentation. Now, it is only
applied to SLUB, but, it would be good to apply it to SLAB. This patch
is preparation step to apply slab merge to SLAB by commonizing slab
merge logic.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
---
 mm/slab.h        |   15 +++++++++
 mm/slab_common.c |   86 +++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/slub.c        |   91 ++----------------------------------------------------
 3 files changed, 103 insertions(+), 89 deletions(-)

diff --git a/mm/slab.h b/mm/slab.h
index 5cb4649..7c6e1ed 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -85,15 +85,30 @@ extern void create_boot_cache(struct kmem_cache *, const char *name,
 			size_t size, unsigned long flags);
 
 struct mem_cgroup;
+
+int slab_unmergeable(struct kmem_cache *s);
+struct kmem_cache *find_mergeable(size_t size, size_t align,
+		unsigned long flags, const char *name, void (*ctor)(void *));
 #ifdef CONFIG_SLUB
 struct kmem_cache *
 __kmem_cache_alias(const char *name, size_t size, size_t align,
 		   unsigned long flags, void (*ctor)(void *));
+
+unsigned long kmem_cache_flags(unsigned long object_size,
+	unsigned long flags, const char *name,
+	void (*ctor)(void *));
 #else
 static inline struct kmem_cache *
 __kmem_cache_alias(const char *name, size_t size, size_t align,
 		   unsigned long flags, void (*ctor)(void *))
 { return NULL; }
+
+static inline unsigned long kmem_cache_flags(unsigned long object_size,
+	unsigned long flags, const char *name,
+	void (*ctor)(void *))
+{
+	return flags;
+}
 #endif
 
 
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2088904..65a5811 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -31,6 +31,29 @@ DEFINE_MUTEX(slab_mutex);
 struct kmem_cache *kmem_cache;
 
 /*
+ * Set of flags that will prevent slab merging
+ */
+#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
+		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
+		SLAB_FAILSLAB)
+
+#define SLAB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
+		SLAB_CACHE_DMA | SLAB_NOTRACK)
+
+/*
+ * Merge control. If this is set then no merging of slab caches will occur.
+ * (Could be removed. This was introduced to pacify the merge skeptics.)
+ */
+static int slab_nomerge;
+
+static int __init setup_slab_nomerge(char *str)
+{
+	slab_nomerge = 1;
+	return 1;
+}
+__setup("slub_nomerge", setup_slab_nomerge);
+
+/*
  * Determine the size of a slab object
  */
 unsigned int kmem_cache_size(struct kmem_cache *s)
@@ -115,6 +138,69 @@ out:
 #endif
 
 /*
+ * Find a mergeable slab cache
+ */
+int slab_unmergeable(struct kmem_cache *s)
+{
+	if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
+		return 1;
+
+	if (!is_root_cache(s))
+		return 1;
+
+	if (s->ctor)
+		return 1;
+
+	/*
+	 * We may have set a slab to be unmergeable during bootstrap.
+	 */
+	if (s->refcount < 0)
+		return 1;
+
+	return 0;
+}
+
+struct kmem_cache *find_mergeable(size_t size, size_t align,
+		unsigned long flags, const char *name, void (*ctor)(void *))
+{
+	struct kmem_cache *s;
+
+	if (slab_nomerge || (flags & SLAB_NEVER_MERGE))
+		return NULL;
+
+	if (ctor)
+		return NULL;
+
+	size = ALIGN(size, sizeof(void *));
+	align = calculate_alignment(flags, align, size);
+	size = ALIGN(size, align);
+	flags = kmem_cache_flags(size, flags, name, NULL);
+
+	list_for_each_entry(s, &slab_caches, list) {
+		if (slab_unmergeable(s))
+			continue;
+
+		if (size > s->size)
+			continue;
+
+		if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
+			continue;
+		/*
+		 * Check if alignment is compatible.
+		 * Courtesy of Adrian Drzewiecki
+		 */
+		if ((s->size & ~(align - 1)) != s->size)
+			continue;
+
+		if (s->size - size >= sizeof(void *))
+			continue;
+
+		return s;
+	}
+	return NULL;
+}
+
+/*
  * Figure out what the alignment of the objects will be given a set of
  * flags, a user specified alignment and the size of the objects.
  */
diff --git a/mm/slub.c b/mm/slub.c
index 3e8afcc..b29e835 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -169,16 +169,6 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
  */
 #define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
 
-/*
- * Set of flags that will prevent slab merging
- */
-#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
-		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
-		SLAB_FAILSLAB)
-
-#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
-		SLAB_CACHE_DMA | SLAB_NOTRACK)
-
 #define OO_SHIFT	16
 #define OO_MASK		((1 << OO_SHIFT) - 1)
 #define MAX_OBJS_PER_PAGE	32767 /* since page.objects is u15 */
@@ -1176,7 +1166,7 @@ out:
 
 __setup("slub_debug", setup_slub_debug);
 
-static unsigned long kmem_cache_flags(unsigned long object_size,
+unsigned long kmem_cache_flags(unsigned long object_size,
 	unsigned long flags, const char *name,
 	void (*ctor)(void *))
 {
@@ -1208,7 +1198,7 @@ static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
 					struct page *page) {}
 static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
 					struct page *page) {}
-static inline unsigned long kmem_cache_flags(unsigned long object_size,
+unsigned long kmem_cache_flags(unsigned long object_size,
 	unsigned long flags, const char *name,
 	void (*ctor)(void *))
 {
@@ -2707,12 +2697,6 @@ static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
 static int slub_min_objects;
 
 /*
- * Merge control. If this is set then no merging of slab caches will occur.
- * (Could be removed. This was introduced to pacify the merge skeptics.)
- */
-static int slub_nomerge;
-
-/*
  * Calculate the order of allocation given an slab object size.
  *
  * The order of allocation has significant impact on performance and other
@@ -3240,14 +3224,6 @@ static int __init setup_slub_min_objects(char *str)
 
 __setup("slub_min_objects=", setup_slub_min_objects);
 
-static int __init setup_slub_nomerge(char *str)
-{
-	slub_nomerge = 1;
-	return 1;
-}
-
-__setup("slub_nomerge", setup_slub_nomerge);
-
 void *__kmalloc(size_t size, gfp_t flags)
 {
 	struct kmem_cache *s;
@@ -3625,69 +3601,6 @@ void __init kmem_cache_init_late(void)
 {
 }
 
-/*
- * Find a mergeable slab cache
- */
-static int slab_unmergeable(struct kmem_cache *s)
-{
-	if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
-		return 1;
-
-	if (!is_root_cache(s))
-		return 1;
-
-	if (s->ctor)
-		return 1;
-
-	/*
-	 * We may have set a slab to be unmergeable during bootstrap.
-	 */
-	if (s->refcount < 0)
-		return 1;
-
-	return 0;
-}
-
-static struct kmem_cache *find_mergeable(size_t size, size_t align,
-		unsigned long flags, const char *name, void (*ctor)(void *))
-{
-	struct kmem_cache *s;
-
-	if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
-		return NULL;
-
-	if (ctor)
-		return NULL;
-
-	size = ALIGN(size, sizeof(void *));
-	align = calculate_alignment(flags, align, size);
-	size = ALIGN(size, align);
-	flags = kmem_cache_flags(size, flags, name, NULL);
-
-	list_for_each_entry(s, &slab_caches, list) {
-		if (slab_unmergeable(s))
-			continue;
-
-		if (size > s->size)
-			continue;
-
-		if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
-			continue;
-		/*
-		 * Check if alignment is compatible.
-		 * Courtesy of Adrian Drzewiecki
-		 */
-		if ((s->size & ~(align - 1)) != s->size)
-			continue;
-
-		if (s->size - size >= sizeof(void *))
-			continue;
-
-		return s;
-	}
-	return NULL;
-}
-
 struct kmem_cache *
 __kmem_cache_alias(const char *name, size_t size, size_t align,
 		   unsigned long flags, void (*ctor)(void *))
-- 
1.7.9.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 3/3] mm/slab: support slab merge
  2014-08-21  8:11 ` Joonsoo Kim
@ 2014-08-21  8:11   ` Joonsoo Kim
  -1 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-21  8:11 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, linux-mm,
	linux-kernel, Joonsoo Kim

Slab merge is good feature to reduce fragmentation. If new creating slab
have similar size and property with exsitent slab, this feature reuse
it rather than creating new one. As a result, objects are packed into
fewer slabs so that fragmentation is reduced.

Below is result of my testing.

* After boot, sleep 20; cat /proc/meminfo | grep Slab

<Before>
Slab: 25136 kB

<After>
Slab: 24364 kB

We can save 3% memory used by slab.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
---
 mm/slab.c |   20 ++++++++++++++++++++
 mm/slab.h |    2 +-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/mm/slab.c b/mm/slab.c
index 09b060e..a1cc1c9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2052,6 +2052,26 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
 	return 0;
 }
 
+unsigned long kmem_cache_flags(unsigned long object_size,
+	unsigned long flags, const char *name,
+	void (*ctor)(void *))
+{
+	return flags;
+}
+
+struct kmem_cache *
+__kmem_cache_alias(const char *name, size_t size, size_t align,
+		   unsigned long flags, void (*ctor)(void *))
+{
+	struct kmem_cache *cachep;
+
+	cachep = find_mergeable(size, align, flags, name, ctor);
+	if (cachep)
+		cachep->refcount++;
+
+	return cachep;
+}
+
 /**
  * __kmem_cache_create - Create a cache.
  * @cachep: cache management descriptor
diff --git a/mm/slab.h b/mm/slab.h
index 7c6e1ed..13845d0 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -89,7 +89,7 @@ struct mem_cgroup;
 int slab_unmergeable(struct kmem_cache *s);
 struct kmem_cache *find_mergeable(size_t size, size_t align,
 		unsigned long flags, const char *name, void (*ctor)(void *));
-#ifdef CONFIG_SLUB
+#ifndef CONFIG_SLOB
 struct kmem_cache *
 __kmem_cache_alias(const char *name, size_t size, size_t align,
 		   unsigned long flags, void (*ctor)(void *));
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 3/3] mm/slab: support slab merge
@ 2014-08-21  8:11   ` Joonsoo Kim
  0 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-21  8:11 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, linux-mm,
	linux-kernel, Joonsoo Kim

Slab merge is good feature to reduce fragmentation. If new creating slab
have similar size and property with exsitent slab, this feature reuse
it rather than creating new one. As a result, objects are packed into
fewer slabs so that fragmentation is reduced.

Below is result of my testing.

* After boot, sleep 20; cat /proc/meminfo | grep Slab

<Before>
Slab: 25136 kB

<After>
Slab: 24364 kB

We can save 3% memory used by slab.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
---
 mm/slab.c |   20 ++++++++++++++++++++
 mm/slab.h |    2 +-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/mm/slab.c b/mm/slab.c
index 09b060e..a1cc1c9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2052,6 +2052,26 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
 	return 0;
 }
 
+unsigned long kmem_cache_flags(unsigned long object_size,
+	unsigned long flags, const char *name,
+	void (*ctor)(void *))
+{
+	return flags;
+}
+
+struct kmem_cache *
+__kmem_cache_alias(const char *name, size_t size, size_t align,
+		   unsigned long flags, void (*ctor)(void *))
+{
+	struct kmem_cache *cachep;
+
+	cachep = find_mergeable(size, align, flags, name, ctor);
+	if (cachep)
+		cachep->refcount++;
+
+	return cachep;
+}
+
 /**
  * __kmem_cache_create - Create a cache.
  * @cachep: cache management descriptor
diff --git a/mm/slab.h b/mm/slab.h
index 7c6e1ed..13845d0 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -89,7 +89,7 @@ struct mem_cgroup;
 int slab_unmergeable(struct kmem_cache *s);
 struct kmem_cache *find_mergeable(size_t size, size_t align,
 		unsigned long flags, const char *name, void (*ctor)(void *));
-#ifdef CONFIG_SLUB
+#ifndef CONFIG_SLOB
 struct kmem_cache *
 __kmem_cache_alias(const char *name, size_t size, size_t align,
 		   unsigned long flags, void (*ctor)(void *));
-- 
1.7.9.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
  2014-08-21  8:11 ` Joonsoo Kim
@ 2014-08-21 14:21   ` Christoph Lameter
  -1 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-21 14:21 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, Tejun Heo,
	linux-kernel

On Thu, 21 Aug 2014, Joonsoo Kim wrote:

> So, this patch try to use percpu allocator in SLAB. This simplify
> initialization step in SLAB so that we could maintain SLAB code more
> easily.

I thought about this a couple of times but the amount of memory used for
the per cpu arrays can be huge. In contrast to slub which needs just a
few pointers, slab requires one pointer per object that can be in the
local cache. CC Tj.

Lets say we have 300 caches and we allow 1000 objects to be cached per
cpu. That is 300k pointers per cpu. 1.2M on 32 bit. 2.4M per cpu on
64bit.


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
@ 2014-08-21 14:21   ` Christoph Lameter
  0 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-21 14:21 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, Tejun Heo,
	linux-kernel

On Thu, 21 Aug 2014, Joonsoo Kim wrote:

> So, this patch try to use percpu allocator in SLAB. This simplify
> initialization step in SLAB so that we could maintain SLAB code more
> easily.

I thought about this a couple of times but the amount of memory used for
the per cpu arrays can be huge. In contrast to slub which needs just a
few pointers, slab requires one pointer per object that can be in the
local cache. CC Tj.

Lets say we have 300 caches and we allow 1000 objects to be cached per
cpu. That is 300k pointers per cpu. 1.2M on 32 bit. 2.4M per cpu on
64bit.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/3] mm/slab_common: commonize slab merge logic
  2014-08-21  8:11   ` Joonsoo Kim
@ 2014-08-21 14:22     ` Christoph Lameter
  -1 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-21 14:22 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Thu, 21 Aug 2014, Joonsoo Kim wrote:

> Slab merge is good feature to reduce fragmentation. Now, it is only
> applied to SLUB, but, it would be good to apply it to SLAB. This patch
> is preparation step to apply slab merge to SLAB by commonizing slab
> merge logic.

Oh. Wow. Never thought that would be possible. Need to have some more time
to review this though.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/3] mm/slab_common: commonize slab merge logic
@ 2014-08-21 14:22     ` Christoph Lameter
  0 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-21 14:22 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Thu, 21 Aug 2014, Joonsoo Kim wrote:

> Slab merge is good feature to reduce fragmentation. Now, it is only
> applied to SLUB, but, it would be good to apply it to SLAB. This patch
> is preparation step to apply slab merge to SLAB by commonizing slab
> merge logic.

Oh. Wow. Never thought that would be possible. Need to have some more time
to review this though.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
  2014-08-21 14:21   ` Christoph Lameter
@ 2014-08-25  8:26     ` Joonsoo Kim
  -1 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-25  8:26 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, Tejun Heo,
	linux-kernel

On Thu, Aug 21, 2014 at 09:21:30AM -0500, Christoph Lameter wrote:
> On Thu, 21 Aug 2014, Joonsoo Kim wrote:
> 
> > So, this patch try to use percpu allocator in SLAB. This simplify
> > initialization step in SLAB so that we could maintain SLAB code more
> > easily.
> 
> I thought about this a couple of times but the amount of memory used for
> the per cpu arrays can be huge. In contrast to slub which needs just a
> few pointers, slab requires one pointer per object that can be in the
> local cache. CC Tj.
> 
> Lets say we have 300 caches and we allow 1000 objects to be cached per
> cpu. That is 300k pointers per cpu. 1.2M on 32 bit. 2.4M per cpu on
> 64bit.

Hello, Christoph.

Amount of memory we need to keep pointers for object is same in any case.
I know that percpu allocator occupy vmalloc space, so maybe we could
exhaust vmalloc space on 32 bit. 64 bit has no problem on it.
How many cores does largest 32 bit system have? Is it possible
to exhaust vmalloc space if we use percpu allocator?

Thanks.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
@ 2014-08-25  8:26     ` Joonsoo Kim
  0 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-25  8:26 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, Tejun Heo,
	linux-kernel

On Thu, Aug 21, 2014 at 09:21:30AM -0500, Christoph Lameter wrote:
> On Thu, 21 Aug 2014, Joonsoo Kim wrote:
> 
> > So, this patch try to use percpu allocator in SLAB. This simplify
> > initialization step in SLAB so that we could maintain SLAB code more
> > easily.
> 
> I thought about this a couple of times but the amount of memory used for
> the per cpu arrays can be huge. In contrast to slub which needs just a
> few pointers, slab requires one pointer per object that can be in the
> local cache. CC Tj.
> 
> Lets say we have 300 caches and we allow 1000 objects to be cached per
> cpu. That is 300k pointers per cpu. 1.2M on 32 bit. 2.4M per cpu on
> 64bit.

Hello, Christoph.

Amount of memory we need to keep pointers for object is same in any case.
I know that percpu allocator occupy vmalloc space, so maybe we could
exhaust vmalloc space on 32 bit. 64 bit has no problem on it.
How many cores does largest 32 bit system have? Is it possible
to exhaust vmalloc space if we use percpu allocator?

Thanks.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/3] mm/slab_common: commonize slab merge logic
  2014-08-21 14:22     ` Christoph Lameter
@ 2014-08-25  8:26       ` Joonsoo Kim
  -1 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-25  8:26 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Thu, Aug 21, 2014 at 09:22:35AM -0500, Christoph Lameter wrote:
> On Thu, 21 Aug 2014, Joonsoo Kim wrote:
> 
> > Slab merge is good feature to reduce fragmentation. Now, it is only
> > applied to SLUB, but, it would be good to apply it to SLAB. This patch
> > is preparation step to apply slab merge to SLAB by commonizing slab
> > merge logic.
> 
> Oh. Wow. Never thought that would be possible. Need to have some more time
> to review this though.

Yes, please review it. :)

Thanks.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/3] mm/slab_common: commonize slab merge logic
@ 2014-08-25  8:26       ` Joonsoo Kim
  0 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-25  8:26 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Thu, Aug 21, 2014 at 09:22:35AM -0500, Christoph Lameter wrote:
> On Thu, 21 Aug 2014, Joonsoo Kim wrote:
> 
> > Slab merge is good feature to reduce fragmentation. Now, it is only
> > applied to SLUB, but, it would be good to apply it to SLAB. This patch
> > is preparation step to apply slab merge to SLAB by commonizing slab
> > merge logic.
> 
> Oh. Wow. Never thought that would be possible. Need to have some more time
> to review this though.

Yes, please review it. :)

Thanks.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
  2014-08-25  8:26     ` Joonsoo Kim
@ 2014-08-25 13:13       ` Christoph Lameter
  -1 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-25 13:13 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, Tejun Heo,
	linux-kernel

On Mon, 25 Aug 2014, Joonsoo Kim wrote:

> On Thu, Aug 21, 2014 at 09:21:30AM -0500, Christoph Lameter wrote:
> > On Thu, 21 Aug 2014, Joonsoo Kim wrote:
> >
> > > So, this patch try to use percpu allocator in SLAB. This simplify
> > > initialization step in SLAB so that we could maintain SLAB code more
> > > easily.
> >
> > I thought about this a couple of times but the amount of memory used for
> > the per cpu arrays can be huge. In contrast to slub which needs just a
> > few pointers, slab requires one pointer per object that can be in the
> > local cache. CC Tj.
> >
> > Lets say we have 300 caches and we allow 1000 objects to be cached per
> > cpu. That is 300k pointers per cpu. 1.2M on 32 bit. 2.4M per cpu on
> > 64bit.
>
> Amount of memory we need to keep pointers for object is same in any case.

What case? SLUB uses a linked list and therefore does not have these
storage requirements.

> I know that percpu allocator occupy vmalloc space, so maybe we could
> exhaust vmalloc space on 32 bit. 64 bit has no problem on it.
> How many cores does largest 32 bit system have? Is it possible
> to exhaust vmalloc space if we use percpu allocator?

There were NUMA systems on x86 a while back (not sure if they still
exists) with 128 or so processors.

Some people boot 32 bit kernels on contemporary servers. The Intel ones
max out at 18 cores (36 hyperthreaded). I think they support up to 8
scokets. So 8 * 36?


Its different on other platforms with much higher numbers. Power can
easily go up to hundreds of hardware threads and SGI Altixes 7 yearsago
where at 8000 or so.


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
@ 2014-08-25 13:13       ` Christoph Lameter
  0 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-25 13:13 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, Tejun Heo,
	linux-kernel

On Mon, 25 Aug 2014, Joonsoo Kim wrote:

> On Thu, Aug 21, 2014 at 09:21:30AM -0500, Christoph Lameter wrote:
> > On Thu, 21 Aug 2014, Joonsoo Kim wrote:
> >
> > > So, this patch try to use percpu allocator in SLAB. This simplify
> > > initialization step in SLAB so that we could maintain SLAB code more
> > > easily.
> >
> > I thought about this a couple of times but the amount of memory used for
> > the per cpu arrays can be huge. In contrast to slub which needs just a
> > few pointers, slab requires one pointer per object that can be in the
> > local cache. CC Tj.
> >
> > Lets say we have 300 caches and we allow 1000 objects to be cached per
> > cpu. That is 300k pointers per cpu. 1.2M on 32 bit. 2.4M per cpu on
> > 64bit.
>
> Amount of memory we need to keep pointers for object is same in any case.

What case? SLUB uses a linked list and therefore does not have these
storage requirements.

> I know that percpu allocator occupy vmalloc space, so maybe we could
> exhaust vmalloc space on 32 bit. 64 bit has no problem on it.
> How many cores does largest 32 bit system have? Is it possible
> to exhaust vmalloc space if we use percpu allocator?

There were NUMA systems on x86 a while back (not sure if they still
exists) with 128 or so processors.

Some people boot 32 bit kernels on contemporary servers. The Intel ones
max out at 18 cores (36 hyperthreaded). I think they support up to 8
scokets. So 8 * 36?


Its different on other platforms with much higher numbers. Power can
easily go up to hundreds of hardware threads and SGI Altixes 7 yearsago
where at 8000 or so.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/3] mm/slab_common: commonize slab merge logic
  2014-08-21  8:11   ` Joonsoo Kim
@ 2014-08-25 15:27     ` Christoph Lameter
  -1 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-25 15:27 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Thu, 21 Aug 2014, Joonsoo Kim wrote:

> +static int __init setup_slab_nomerge(char *str)
> +{
> +	slab_nomerge = 1;
> +	return 1;
> +}
> +__setup("slub_nomerge", setup_slab_nomerge);

Uhh.. You would have to specify "slub_nomerge" to get slab to not merge
slab caches?

Otherwise this is a straightforward move into the common area.


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/3] mm/slab_common: commonize slab merge logic
@ 2014-08-25 15:27     ` Christoph Lameter
  0 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-25 15:27 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Thu, 21 Aug 2014, Joonsoo Kim wrote:

> +static int __init setup_slab_nomerge(char *str)
> +{
> +	slab_nomerge = 1;
> +	return 1;
> +}
> +__setup("slub_nomerge", setup_slab_nomerge);

Uhh.. You would have to specify "slub_nomerge" to get slab to not merge
slab caches?

Otherwise this is a straightforward move into the common area.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 3/3] mm/slab: support slab merge
  2014-08-21  8:11   ` Joonsoo Kim
@ 2014-08-25 15:29     ` Christoph Lameter
  -1 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-25 15:29 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Thu, 21 Aug 2014, Joonsoo Kim wrote:

> diff --git a/mm/slab.c b/mm/slab.c
> index 09b060e..a1cc1c9 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -2052,6 +2052,26 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
>  	return 0;
>  }
>
> +unsigned long kmem_cache_flags(unsigned long object_size,
> +	unsigned long flags, const char *name,
> +	void (*ctor)(void *))
> +{
> +	return flags;
> +}
> +
> +struct kmem_cache *
> +__kmem_cache_alias(const char *name, size_t size, size_t align,
> +		   unsigned long flags, void (*ctor)(void *))
> +{
> +	struct kmem_cache *cachep;
> +
> +	cachep = find_mergeable(size, align, flags, name, ctor);
> +	if (cachep)
> +		cachep->refcount++;
> +
> +	return cachep;
> +}
> +

These could be commonized as well. Make refcount a common field and then
the same function can be used for both caches.


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 3/3] mm/slab: support slab merge
@ 2014-08-25 15:29     ` Christoph Lameter
  0 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-25 15:29 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Thu, 21 Aug 2014, Joonsoo Kim wrote:

> diff --git a/mm/slab.c b/mm/slab.c
> index 09b060e..a1cc1c9 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -2052,6 +2052,26 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
>  	return 0;
>  }
>
> +unsigned long kmem_cache_flags(unsigned long object_size,
> +	unsigned long flags, const char *name,
> +	void (*ctor)(void *))
> +{
> +	return flags;
> +}
> +
> +struct kmem_cache *
> +__kmem_cache_alias(const char *name, size_t size, size_t align,
> +		   unsigned long flags, void (*ctor)(void *))
> +{
> +	struct kmem_cache *cachep;
> +
> +	cachep = find_mergeable(size, align, flags, name, ctor);
> +	if (cachep)
> +		cachep->refcount++;
> +
> +	return cachep;
> +}
> +

These could be commonized as well. Make refcount a common field and then
the same function can be used for both caches.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
  2014-08-25 13:13       ` Christoph Lameter
@ 2014-08-26  2:19         ` Joonsoo Kim
  -1 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-26  2:19 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, Tejun Heo,
	linux-kernel

On Mon, Aug 25, 2014 at 08:13:58AM -0500, Christoph Lameter wrote:
> On Mon, 25 Aug 2014, Joonsoo Kim wrote:
> 
> > On Thu, Aug 21, 2014 at 09:21:30AM -0500, Christoph Lameter wrote:
> > > On Thu, 21 Aug 2014, Joonsoo Kim wrote:
> > >
> > > > So, this patch try to use percpu allocator in SLAB. This simplify
> > > > initialization step in SLAB so that we could maintain SLAB code more
> > > > easily.
> > >
> > > I thought about this a couple of times but the amount of memory used for
> > > the per cpu arrays can be huge. In contrast to slub which needs just a
> > > few pointers, slab requires one pointer per object that can be in the
> > > local cache. CC Tj.
> > >
> > > Lets say we have 300 caches and we allow 1000 objects to be cached per
> > > cpu. That is 300k pointers per cpu. 1.2M on 32 bit. 2.4M per cpu on
> > > 64bit.
> >
> > Amount of memory we need to keep pointers for object is same in any case.
> 
> What case? SLUB uses a linked list and therefore does not have these
> storage requirements.

I misunderstand that you mentioned just memory usage. My *any case*
means memory usage of previous SLAB and SLAB with this percpu alloc
change. Sorry for confusion.

> 
> > I know that percpu allocator occupy vmalloc space, so maybe we could
> > exhaust vmalloc space on 32 bit. 64 bit has no problem on it.
> > How many cores does largest 32 bit system have? Is it possible
> > to exhaust vmalloc space if we use percpu allocator?
> 
> There were NUMA systems on x86 a while back (not sure if they still
> exists) with 128 or so processors.
> 
> Some people boot 32 bit kernels on contemporary servers. The Intel ones
> max out at 18 cores (36 hyperthreaded). I think they support up to 8
> scokets. So 8 * 36?
> 
> 
> Its different on other platforms with much higher numbers. Power can
> easily go up to hundreds of hardware threads and SGI Altixes 7 yearsago
> where at 8000 or so.

Okay... These large systems with 32 bit kernel could be break with this
change. I will do more investigation. Possibly, I will drop this patch. :)

Thanks.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
@ 2014-08-26  2:19         ` Joonsoo Kim
  0 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-26  2:19 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, Tejun Heo,
	linux-kernel

On Mon, Aug 25, 2014 at 08:13:58AM -0500, Christoph Lameter wrote:
> On Mon, 25 Aug 2014, Joonsoo Kim wrote:
> 
> > On Thu, Aug 21, 2014 at 09:21:30AM -0500, Christoph Lameter wrote:
> > > On Thu, 21 Aug 2014, Joonsoo Kim wrote:
> > >
> > > > So, this patch try to use percpu allocator in SLAB. This simplify
> > > > initialization step in SLAB so that we could maintain SLAB code more
> > > > easily.
> > >
> > > I thought about this a couple of times but the amount of memory used for
> > > the per cpu arrays can be huge. In contrast to slub which needs just a
> > > few pointers, slab requires one pointer per object that can be in the
> > > local cache. CC Tj.
> > >
> > > Lets say we have 300 caches and we allow 1000 objects to be cached per
> > > cpu. That is 300k pointers per cpu. 1.2M on 32 bit. 2.4M per cpu on
> > > 64bit.
> >
> > Amount of memory we need to keep pointers for object is same in any case.
> 
> What case? SLUB uses a linked list and therefore does not have these
> storage requirements.

I misunderstand that you mentioned just memory usage. My *any case*
means memory usage of previous SLAB and SLAB with this percpu alloc
change. Sorry for confusion.

> 
> > I know that percpu allocator occupy vmalloc space, so maybe we could
> > exhaust vmalloc space on 32 bit. 64 bit has no problem on it.
> > How many cores does largest 32 bit system have? Is it possible
> > to exhaust vmalloc space if we use percpu allocator?
> 
> There were NUMA systems on x86 a while back (not sure if they still
> exists) with 128 or so processors.
> 
> Some people boot 32 bit kernels on contemporary servers. The Intel ones
> max out at 18 cores (36 hyperthreaded). I think they support up to 8
> scokets. So 8 * 36?
> 
> 
> Its different on other platforms with much higher numbers. Power can
> easily go up to hundreds of hardware threads and SGI Altixes 7 yearsago
> where at 8000 or so.

Okay... These large systems with 32 bit kernel could be break with this
change. I will do more investigation. Possibly, I will drop this patch. :)

Thanks.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/3] mm/slab_common: commonize slab merge logic
  2014-08-25 15:27     ` Christoph Lameter
@ 2014-08-26  2:23       ` Joonsoo Kim
  -1 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-26  2:23 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Mon, Aug 25, 2014 at 10:27:58AM -0500, Christoph Lameter wrote:
> On Thu, 21 Aug 2014, Joonsoo Kim wrote:
> 
> > +static int __init setup_slab_nomerge(char *str)
> > +{
> > +	slab_nomerge = 1;
> > +	return 1;
> > +}
> > +__setup("slub_nomerge", setup_slab_nomerge);
> 
> Uhh.. You would have to specify "slub_nomerge" to get slab to not merge
> slab caches?

Should fix it. How about following change?

#ifdef CONFIG_SLUB
__setup("slub_nomerge", setup_slab_nomerge);
#endif

__setup("slab_nomerge", setup_slab_nomerge);

This makes "slab_nomerge" works for all SL[aou]B.

Thanks.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/3] mm/slab_common: commonize slab merge logic
@ 2014-08-26  2:23       ` Joonsoo Kim
  0 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-26  2:23 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Mon, Aug 25, 2014 at 10:27:58AM -0500, Christoph Lameter wrote:
> On Thu, 21 Aug 2014, Joonsoo Kim wrote:
> 
> > +static int __init setup_slab_nomerge(char *str)
> > +{
> > +	slab_nomerge = 1;
> > +	return 1;
> > +}
> > +__setup("slub_nomerge", setup_slab_nomerge);
> 
> Uhh.. You would have to specify "slub_nomerge" to get slab to not merge
> slab caches?

Should fix it. How about following change?

#ifdef CONFIG_SLUB
__setup("slub_nomerge", setup_slab_nomerge);
#endif

__setup("slab_nomerge", setup_slab_nomerge);

This makes "slab_nomerge" works for all SL[aou]B.

Thanks.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 3/3] mm/slab: support slab merge
  2014-08-25 15:29     ` Christoph Lameter
@ 2014-08-26  2:26       ` Joonsoo Kim
  -1 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-26  2:26 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Mon, Aug 25, 2014 at 10:29:19AM -0500, Christoph Lameter wrote:
> On Thu, 21 Aug 2014, Joonsoo Kim wrote:
> 
> > diff --git a/mm/slab.c b/mm/slab.c
> > index 09b060e..a1cc1c9 100644
> > --- a/mm/slab.c
> > +++ b/mm/slab.c
> > @@ -2052,6 +2052,26 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
> >  	return 0;
> >  }
> >
> > +unsigned long kmem_cache_flags(unsigned long object_size,
> > +	unsigned long flags, const char *name,
> > +	void (*ctor)(void *))
> > +{
> > +	return flags;
> > +}
> > +
> > +struct kmem_cache *
> > +__kmem_cache_alias(const char *name, size_t size, size_t align,
> > +		   unsigned long flags, void (*ctor)(void *))
> > +{
> > +	struct kmem_cache *cachep;
> > +
> > +	cachep = find_mergeable(size, align, flags, name, ctor);
> > +	if (cachep)
> > +		cachep->refcount++;
> > +
> > +	return cachep;
> > +}
> > +
> 
> These could be commonized as well. Make refcount a common field and then
> the same function can be used for both caches.

refcount is already common field. These can't be commonized, because
SLUB need some other SLUB specific processing related to debug flags
and object size change.

Thanks.


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 3/3] mm/slab: support slab merge
@ 2014-08-26  2:26       ` Joonsoo Kim
  0 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-08-26  2:26 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Mon, Aug 25, 2014 at 10:29:19AM -0500, Christoph Lameter wrote:
> On Thu, 21 Aug 2014, Joonsoo Kim wrote:
> 
> > diff --git a/mm/slab.c b/mm/slab.c
> > index 09b060e..a1cc1c9 100644
> > --- a/mm/slab.c
> > +++ b/mm/slab.c
> > @@ -2052,6 +2052,26 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
> >  	return 0;
> >  }
> >
> > +unsigned long kmem_cache_flags(unsigned long object_size,
> > +	unsigned long flags, const char *name,
> > +	void (*ctor)(void *))
> > +{
> > +	return flags;
> > +}
> > +
> > +struct kmem_cache *
> > +__kmem_cache_alias(const char *name, size_t size, size_t align,
> > +		   unsigned long flags, void (*ctor)(void *))
> > +{
> > +	struct kmem_cache *cachep;
> > +
> > +	cachep = find_mergeable(size, align, flags, name, ctor);
> > +	if (cachep)
> > +		cachep->refcount++;
> > +
> > +	return cachep;
> > +}
> > +
> 
> These could be commonized as well. Make refcount a common field and then
> the same function can be used for both caches.

refcount is already common field. These can't be commonized, because
SLUB need some other SLUB specific processing related to debug flags
and object size change.

Thanks.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
  2014-08-26  2:19         ` Joonsoo Kim
@ 2014-08-26 21:22           ` Christoph Lameter
  -1 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-26 21:22 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, Tejun Heo,
	linux-kernel

On Tue, 26 Aug 2014, Joonsoo Kim wrote:

> > What case? SLUB uses a linked list and therefore does not have these
> > storage requirements.
>
> I misunderstand that you mentioned just memory usage. My *any case*
> means memory usage of previous SLAB and SLAB with this percpu alloc
> change. Sorry for confusion.

Ok. True the total amount of memory used does not increase.

> > > I know that percpu allocator occupy vmalloc space, so maybe we could
> > > exhaust vmalloc space on 32 bit. 64 bit has no problem on it.
> > > How many cores does largest 32 bit system have? Is it possible
> > > to exhaust vmalloc space if we use percpu allocator?
> >
> > There were NUMA systems on x86 a while back (not sure if they still
> > exists) with 128 or so processors.
> >
> > Some people boot 32 bit kernels on contemporary servers. The Intel ones
> > max out at 18 cores (36 hyperthreaded). I think they support up to 8
> > scokets. So 8 * 36?
> >
> >
> > Its different on other platforms with much higher numbers. Power can
> > easily go up to hundreds of hardware threads and SGI Altixes 7 yearsago
> > where at 8000 or so.
>
> Okay... These large systems with 32 bit kernel could be break with this
> change. I will do more investigation. Possibly, I will drop this patch. :)

Wait the last system mentioned are 64 bit. SGI definitely. Power probably
too.


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
@ 2014-08-26 21:22           ` Christoph Lameter
  0 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-26 21:22 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, Tejun Heo,
	linux-kernel

On Tue, 26 Aug 2014, Joonsoo Kim wrote:

> > What case? SLUB uses a linked list and therefore does not have these
> > storage requirements.
>
> I misunderstand that you mentioned just memory usage. My *any case*
> means memory usage of previous SLAB and SLAB with this percpu alloc
> change. Sorry for confusion.

Ok. True the total amount of memory used does not increase.

> > > I know that percpu allocator occupy vmalloc space, so maybe we could
> > > exhaust vmalloc space on 32 bit. 64 bit has no problem on it.
> > > How many cores does largest 32 bit system have? Is it possible
> > > to exhaust vmalloc space if we use percpu allocator?
> >
> > There were NUMA systems on x86 a while back (not sure if they still
> > exists) with 128 or so processors.
> >
> > Some people boot 32 bit kernels on contemporary servers. The Intel ones
> > max out at 18 cores (36 hyperthreaded). I think they support up to 8
> > scokets. So 8 * 36?
> >
> >
> > Its different on other platforms with much higher numbers. Power can
> > easily go up to hundreds of hardware threads and SGI Altixes 7 yearsago
> > where at 8000 or so.
>
> Okay... These large systems with 32 bit kernel could be break with this
> change. I will do more investigation. Possibly, I will drop this patch. :)

Wait the last system mentioned are 64 bit. SGI definitely. Power probably
too.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/3] mm/slab_common: commonize slab merge logic
  2014-08-26  2:23       ` Joonsoo Kim
@ 2014-08-26 21:23         ` Christoph Lameter
  -1 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-26 21:23 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Tue, 26 Aug 2014, Joonsoo Kim wrote:

> On Mon, Aug 25, 2014 at 10:27:58AM -0500, Christoph Lameter wrote:
> > Uhh.. You would have to specify "slub_nomerge" to get slab to not merge
> > slab caches?
>
> Should fix it. How about following change?
>
> #ifdef CONFIG_SLUB
> __setup("slub_nomerge", setup_slab_nomerge);
> #endif
>
> __setup("slab_nomerge", setup_slab_nomerge);
>
> This makes "slab_nomerge" works for all SL[aou]B.

Ok. At some point we need to make slub_nomerge legacy then.


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/3] mm/slab_common: commonize slab merge logic
@ 2014-08-26 21:23         ` Christoph Lameter
  0 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-26 21:23 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Tue, 26 Aug 2014, Joonsoo Kim wrote:

> On Mon, Aug 25, 2014 at 10:27:58AM -0500, Christoph Lameter wrote:
> > Uhh.. You would have to specify "slub_nomerge" to get slab to not merge
> > slab caches?
>
> Should fix it. How about following change?
>
> #ifdef CONFIG_SLUB
> __setup("slub_nomerge", setup_slab_nomerge);
> #endif
>
> __setup("slab_nomerge", setup_slab_nomerge);
>
> This makes "slab_nomerge" works for all SL[aou]B.

Ok. At some point we need to make slub_nomerge legacy then.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
  2014-08-21  8:11 ` Joonsoo Kim
@ 2014-08-27 23:37   ` Christoph Lameter
  -1 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-27 23:37 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

One minor nit. Otherwise

Acked-by: Christoph Lameter <cl@linux.com>

On Thu, 21 Aug 2014, Joonsoo Kim wrote:

> @@ -2041,56 +1982,63 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
>  	return left_over;
>  }
>
> +static int alloc_kmem_cache_cpus(struct kmem_cache *cachep, int entries,
> +				int batchcount)
> +{
> +	cachep->cpu_cache = __alloc_kmem_cache_cpus(cachep, entries,
> +							batchcount);
> +	if (!cachep->cpu_cache)
> +		return 1;
> +
> +	return 0;
> +}

Do we really need this trivial function? It doesnt do anything useful as
far as I can tell.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
@ 2014-08-27 23:37   ` Christoph Lameter
  0 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-08-27 23:37 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

One minor nit. Otherwise

Acked-by: Christoph Lameter <cl@linux.com>

On Thu, 21 Aug 2014, Joonsoo Kim wrote:

> @@ -2041,56 +1982,63 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
>  	return left_over;
>  }
>
> +static int alloc_kmem_cache_cpus(struct kmem_cache *cachep, int entries,
> +				int batchcount)
> +{
> +	cachep->cpu_cache = __alloc_kmem_cache_cpus(cachep, entries,
> +							batchcount);
> +	if (!cachep->cpu_cache)
> +		return 1;
> +
> +	return 0;
> +}

Do we really need this trivial function? It doesnt do anything useful as
far as I can tell.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
  2014-08-27 23:37   ` Christoph Lameter
@ 2014-09-01  0:19     ` Joonsoo Kim
  -1 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-09-01  0:19 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Wed, Aug 27, 2014 at 06:37:33PM -0500, Christoph Lameter wrote:
> One minor nit. Otherwise
> 
> Acked-by: Christoph Lameter <cl@linux.com>
> 
> On Thu, 21 Aug 2014, Joonsoo Kim wrote:
> 
> > @@ -2041,56 +1982,63 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
> >  	return left_over;
> >  }
> >
> > +static int alloc_kmem_cache_cpus(struct kmem_cache *cachep, int entries,
> > +				int batchcount)
> > +{
> > +	cachep->cpu_cache = __alloc_kmem_cache_cpus(cachep, entries,
> > +							batchcount);
> > +	if (!cachep->cpu_cache)
> > +		return 1;
> > +
> > +	return 0;
> > +}
> 
> Do we really need this trivial function? It doesnt do anything useful as
> far as I can tell.

Hello,

You are right. I will remove it in next spin.

Thanks.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
@ 2014-09-01  0:19     ` Joonsoo Kim
  0 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-09-01  0:19 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andrew Morton, Pekka Enberg, David Rientjes, linux-mm, linux-kernel

On Wed, Aug 27, 2014 at 06:37:33PM -0500, Christoph Lameter wrote:
> One minor nit. Otherwise
> 
> Acked-by: Christoph Lameter <cl@linux.com>
> 
> On Thu, 21 Aug 2014, Joonsoo Kim wrote:
> 
> > @@ -2041,56 +1982,63 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
> >  	return left_over;
> >  }
> >
> > +static int alloc_kmem_cache_cpus(struct kmem_cache *cachep, int entries,
> > +				int batchcount)
> > +{
> > +	cachep->cpu_cache = __alloc_kmem_cache_cpus(cachep, entries,
> > +							batchcount);
> > +	if (!cachep->cpu_cache)
> > +		return 1;
> > +
> > +	return 0;
> > +}
> 
> Do we really need this trivial function? It doesnt do anything useful as
> far as I can tell.

Hello,

You are right. I will remove it in next spin.

Thanks.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [REGRESSION] [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
  2014-08-21  8:11 ` Joonsoo Kim
@ 2014-09-28  6:24   ` Jeremiah Mahler
  -1 siblings, 0 replies; 43+ messages in thread
From: Jeremiah Mahler @ 2014-09-28  6:24 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Christoph Lameter, Pekka Enberg, David Rientjes,
	linux-mm, linux-kernel

On Thu, Aug 21, 2014 at 05:11:13PM +0900, Joonsoo Kim wrote:
> Because of chicken and egg problem, initializaion of SLAB is really
> complicated. We need to allocate cpu cache through SLAB to make
> the kmem_cache works, but, before initialization of kmem_cache,
> allocation through SLAB is impossible.
> 
> On the other hand, SLUB does initialization with more simple way. It
> uses percpu allocator to allocate cpu cache so there is no chicken and
> egg problem.
> 
> So, this patch try to use percpu allocator in SLAB. This simplify
> initialization step in SLAB so that we could maintain SLAB code more
> easily.
> 
> From my testing, there is no performance difference.
> 
> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> ---
>  include/linux/slab_def.h |   20 +---
>  mm/slab.c                |  237 +++++++++++++++-------------------------------
>  mm/slab.h                |    1 -
>  3 files changed, 81 insertions(+), 177 deletions(-)
> 
> diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
> index 8235dfb..b869d16 100644
> --- a/include/linux/slab_def.h
> +++ b/include/linux/slab_def.h
> @@ -8,6 +8,8 @@
>   */
>  
>  struct kmem_cache {
> +	struct array_cache __percpu *cpu_cache;
> +
>  /* 1) Cache tunables. Protected by slab_mutex */
>  	unsigned int batchcount;
>  	unsigned int limit;
> @@ -71,23 +73,7 @@ struct kmem_cache {
>  	struct memcg_cache_params *memcg_params;
>  #endif
>  
> -/* 6) per-cpu/per-node data, touched during every alloc/free */
> -	/*
> -	 * We put array[] at the end of kmem_cache, because we want to size
> -	 * this array to nr_cpu_ids slots instead of NR_CPUS
> -	 * (see kmem_cache_init())
> -	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
> -	 * is statically defined, so we reserve the max number of cpus.
> -	 *
> -	 * We also need to guarantee that the list is able to accomodate a
> -	 * pointer for each node since "nodelists" uses the remainder of
> -	 * available pointers.
> -	 */
> -	struct kmem_cache_node **node;
> -	struct array_cache *array[NR_CPUS + MAX_NUMNODES];
> -	/*
> -	 * Do not add fields after array[]
> -	 */
> +	struct kmem_cache_node *node[MAX_NUMNODES];
>  };
>  
>  #endif	/* _LINUX_SLAB_DEF_H */
> diff --git a/mm/slab.c b/mm/slab.c
> index 5927a17..09b060e 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -237,11 +237,10 @@ struct arraycache_init {
>  /*
>   * Need this for bootstrapping a per node allocator.
>   */
> -#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
> +#define NUM_INIT_LISTS (2 * MAX_NUMNODES)
>  static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
>  #define	CACHE_CACHE 0
> -#define	SIZE_AC MAX_NUMNODES
> -#define	SIZE_NODE (2 * MAX_NUMNODES)
> +#define	SIZE_NODE (MAX_NUMNODES)
>  
>  static int drain_freelist(struct kmem_cache *cache,
>  			struct kmem_cache_node *n, int tofree);
> @@ -253,7 +252,6 @@ static void cache_reap(struct work_struct *unused);
>  
>  static int slab_early_init = 1;
>  
> -#define INDEX_AC kmalloc_index(sizeof(struct arraycache_init))
>  #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
>  
>  static void kmem_cache_node_init(struct kmem_cache_node *parent)
> @@ -458,9 +456,6 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
>  	return reciprocal_divide(offset, cache->reciprocal_buffer_size);
>  }
>  
> -static struct arraycache_init initarray_generic =
> -    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
> -
>  /* internal cache of cache description objs */
>  static struct kmem_cache kmem_cache_boot = {
>  	.batchcount = 1,
> @@ -476,7 +471,7 @@ static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
>  
>  static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
>  {
> -	return cachep->array[smp_processor_id()];
> +	return this_cpu_ptr(cachep->cpu_cache);
>  }
>  
>  static size_t calculate_freelist_size(int nr_objs, size_t align)
> @@ -1096,9 +1091,6 @@ static void cpuup_canceled(long cpu)
>  		struct alien_cache **alien;
>  		LIST_HEAD(list);
>  
> -		/* cpu is dead; no one can alloc from it. */
> -		nc = cachep->array[cpu];
> -		cachep->array[cpu] = NULL;
>  		n = get_node(cachep, node);
>  
>  		if (!n)
> @@ -1108,6 +1100,9 @@ static void cpuup_canceled(long cpu)
>  
>  		/* Free limit for this kmem_cache_node */
>  		n->free_limit -= cachep->batchcount;
> +
> +		/* cpu is dead; no one can alloc from it. */
> +		nc = per_cpu_ptr(cachep->cpu_cache, cpu);
>  		if (nc)
>  			free_block(cachep, nc->entry, nc->avail, node, &list);
>  
> @@ -1135,7 +1130,6 @@ static void cpuup_canceled(long cpu)
>  		}
>  free_array_cache:
>  		slabs_destroy(cachep, &list);
> -		kfree(nc);
>  	}
>  	/*
>  	 * In the previous loop, all the objects were freed to
> @@ -1172,32 +1166,23 @@ static int cpuup_prepare(long cpu)
>  	 * array caches
>  	 */
>  	list_for_each_entry(cachep, &slab_caches, list) {
> -		struct array_cache *nc;
>  		struct array_cache *shared = NULL;
>  		struct alien_cache **alien = NULL;
>  
> -		nc = alloc_arraycache(node, cachep->limit,
> -					cachep->batchcount, GFP_KERNEL);
> -		if (!nc)
> -			goto bad;
>  		if (cachep->shared) {
>  			shared = alloc_arraycache(node,
>  				cachep->shared * cachep->batchcount,
>  				0xbaadf00d, GFP_KERNEL);
> -			if (!shared) {
> -				kfree(nc);
> +			if (!shared)
>  				goto bad;
> -			}
>  		}
>  		if (use_alien_caches) {
>  			alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
>  			if (!alien) {
>  				kfree(shared);
> -				kfree(nc);
>  				goto bad;
>  			}
>  		}
> -		cachep->array[cpu] = nc;
>  		n = get_node(cachep, node);
>  		BUG_ON(!n);
>  
> @@ -1389,15 +1374,6 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
>  }
>  
>  /*
> - * The memory after the last cpu cache pointer is used for the
> - * the node pointer.
> - */
> -static void setup_node_pointer(struct kmem_cache *cachep)
> -{
> -	cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids];
> -}
> -
> -/*
>   * Initialisation.  Called after the page allocator have been initialised and
>   * before smp_init().
>   */
> @@ -1408,7 +1384,6 @@ void __init kmem_cache_init(void)
>  	BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
>  					sizeof(struct rcu_head));
>  	kmem_cache = &kmem_cache_boot;
> -	setup_node_pointer(kmem_cache);
>  
>  	if (num_possible_nodes() == 1)
>  		use_alien_caches = 0;
> @@ -1416,8 +1391,6 @@ void __init kmem_cache_init(void)
>  	for (i = 0; i < NUM_INIT_LISTS; i++)
>  		kmem_cache_node_init(&init_kmem_cache_node[i]);
>  
> -	set_up_node(kmem_cache, CACHE_CACHE);
> -
>  	/*
>  	 * Fragmentation resistance on low memory - only use bigger
>  	 * page orders on machines with more than 32MB of memory if
> @@ -1452,49 +1425,22 @@ void __init kmem_cache_init(void)
>  	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
>  	 */
>  	create_boot_cache(kmem_cache, "kmem_cache",
> -		offsetof(struct kmem_cache, array[nr_cpu_ids]) +
> +		offsetof(struct kmem_cache, node) +
>  				  nr_node_ids * sizeof(struct kmem_cache_node *),
>  				  SLAB_HWCACHE_ALIGN);
>  	list_add(&kmem_cache->list, &slab_caches);
> -
> -	/* 2+3) create the kmalloc caches */
> +	slab_state = PARTIAL;
>  
>  	/*
> -	 * Initialize the caches that provide memory for the array cache and the
> -	 * kmem_cache_node structures first.  Without this, further allocations will
> -	 * bug.
> +	 * Initialize the caches that provide memory for the  kmem_cache_node
> +	 * structures first.  Without this, further allocations will bug.
>  	 */
> -
> -	kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac",
> -					kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS);
> -
> -	if (INDEX_AC != INDEX_NODE)
> -		kmalloc_caches[INDEX_NODE] =
> -			create_kmalloc_cache("kmalloc-node",
> +	kmalloc_caches[INDEX_NODE] = create_kmalloc_cache("kmalloc-node",
>  				kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
> +	slab_state = PARTIAL_NODE;
>  
>  	slab_early_init = 0;
>  
> -	/* 4) Replace the bootstrap head arrays */
> -	{
> -		struct array_cache *ptr;
> -
> -		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
> -
> -		memcpy(ptr, cpu_cache_get(kmem_cache),
> -		       sizeof(struct arraycache_init));
> -
> -		kmem_cache->array[smp_processor_id()] = ptr;
> -
> -		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
> -
> -		BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC])
> -		       != &initarray_generic.cache);
> -		memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
> -		       sizeof(struct arraycache_init));
> -
> -		kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
> -	}
>  	/* 5) Replace the bootstrap kmem_cache_node */
>  	{
>  		int nid;
> @@ -1502,13 +1448,8 @@ void __init kmem_cache_init(void)
>  		for_each_online_node(nid) {
>  			init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
>  
> -			init_list(kmalloc_caches[INDEX_AC],
> -				  &init_kmem_cache_node[SIZE_AC + nid], nid);
> -
> -			if (INDEX_AC != INDEX_NODE) {
> -				init_list(kmalloc_caches[INDEX_NODE],
> +			init_list(kmalloc_caches[INDEX_NODE],
>  					  &init_kmem_cache_node[SIZE_NODE + nid], nid);
> -			}
>  		}
>  	}
>  
> @@ -2041,56 +1982,63 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
>  	return left_over;
>  }
>  
> +static struct array_cache __percpu *__alloc_kmem_cache_cpus(
> +		struct kmem_cache *cachep, int entries, int batchcount)
> +{
> +	int cpu;
> +	size_t size;
> +	struct array_cache __percpu *cpu_cache;
> +
> +	size = sizeof(void *) * entries + sizeof(struct array_cache);
> +	cpu_cache = __alloc_percpu(size, 0);
> +
> +	if (!cpu_cache)
> +		return NULL;
> +
> +	for_each_possible_cpu(cpu) {
> +		init_arraycache(per_cpu_ptr(cpu_cache, cpu),
> +				entries, batchcount);
> +	}
> +
> +	return cpu_cache;
> +}
> +
> +static int alloc_kmem_cache_cpus(struct kmem_cache *cachep, int entries,
> +				int batchcount)
> +{
> +	cachep->cpu_cache = __alloc_kmem_cache_cpus(cachep, entries,
> +							batchcount);
> +	if (!cachep->cpu_cache)
> +		return 1;
> +
> +	return 0;
> +}
> +
>  static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
>  {
>  	if (slab_state >= FULL)
>  		return enable_cpucache(cachep, gfp);
>  
> +	if (alloc_kmem_cache_cpus(cachep, 1, 1))
> +		return 1;
> +
>  	if (slab_state == DOWN) {
> -		/*
> -		 * Note: Creation of first cache (kmem_cache).
> -		 * The setup_node is taken care
> -		 * of by the caller of __kmem_cache_create
> -		 */
> -		cachep->array[smp_processor_id()] = &initarray_generic.cache;
> -		slab_state = PARTIAL;
> +		/* Creation of first cache (kmem_cache). */
> +		set_up_node(kmem_cache, CACHE_CACHE);
>  	} else if (slab_state == PARTIAL) {
> -		/*
> -		 * Note: the second kmem_cache_create must create the cache
> -		 * that's used by kmalloc(24), otherwise the creation of
> -		 * further caches will BUG().
> -		 */
> -		cachep->array[smp_processor_id()] = &initarray_generic.cache;
> -
> -		/*
> -		 * If the cache that's used by kmalloc(sizeof(kmem_cache_node)) is
> -		 * the second cache, then we need to set up all its node/,
> -		 * otherwise the creation of further caches will BUG().
> -		 */
> -		set_up_node(cachep, SIZE_AC);
> -		if (INDEX_AC == INDEX_NODE)
> -			slab_state = PARTIAL_NODE;
> -		else
> -			slab_state = PARTIAL_ARRAYCACHE;
> +		/* For kmem_cache_node */
> +		set_up_node(cachep, SIZE_NODE);
>  	} else {
> -		/* Remaining boot caches */
> -		cachep->array[smp_processor_id()] =
> -			kmalloc(sizeof(struct arraycache_init), gfp);
> +		int node;
>  
> -		if (slab_state == PARTIAL_ARRAYCACHE) {
> -			set_up_node(cachep, SIZE_NODE);
> -			slab_state = PARTIAL_NODE;
> -		} else {
> -			int node;
> -			for_each_online_node(node) {
> -				cachep->node[node] =
> -				    kmalloc_node(sizeof(struct kmem_cache_node),
> -						gfp, node);
> -				BUG_ON(!cachep->node[node]);
> -				kmem_cache_node_init(cachep->node[node]);
> -			}
> +		for_each_online_node(node) {
> +			cachep->node[node] = kmalloc_node(
> +				sizeof(struct kmem_cache_node), gfp, node);
> +			BUG_ON(!cachep->node[node]);
> +			kmem_cache_node_init(cachep->node[node]);
>  		}
>  	}
> +
>  	cachep->node[numa_mem_id()]->next_reap =
>  			jiffies + REAPTIMEOUT_NODE +
>  			((unsigned long)cachep) % REAPTIMEOUT_NODE;
> @@ -2194,7 +2142,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
>  	else
>  		gfp = GFP_NOWAIT;
>  
> -	setup_node_pointer(cachep);
>  #if DEBUG
>  
>  	/*
> @@ -2451,8 +2398,7 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
>  	if (rc)
>  		return rc;
>  
> -	for_each_online_cpu(i)
> -	    kfree(cachep->array[i]);
> +	free_percpu(cachep->cpu_cache);
>  
>  	/* NUMA: free the node structures */
>  	for_each_kmem_cache_node(cachep, i, n) {
> @@ -3700,72 +3646,45 @@ fail:
>  	return -ENOMEM;
>  }
>  
> -struct ccupdate_struct {
> -	struct kmem_cache *cachep;
> -	struct array_cache *new[0];
> -};
> -
> -static void do_ccupdate_local(void *info)
> -{
> -	struct ccupdate_struct *new = info;
> -	struct array_cache *old;
> -
> -	check_irq_off();
> -	old = cpu_cache_get(new->cachep);
> -
> -	new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
> -	new->new[smp_processor_id()] = old;
> -}
> -
>  /* Always called with the slab_mutex held */
>  static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
>  				int batchcount, int shared, gfp_t gfp)
>  {
> -	struct ccupdate_struct *new;
> -	int i;
> +	struct array_cache __percpu *cpu_cache, *prev;
> +	int cpu;
>  
> -	new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
> -		      gfp);
> -	if (!new)
> +	cpu_cache = __alloc_kmem_cache_cpus(cachep, limit, batchcount);
> +	if (!cpu_cache)
>  		return -ENOMEM;
>  
> -	for_each_online_cpu(i) {
> -		new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
> -						batchcount, gfp);
> -		if (!new->new[i]) {
> -			for (i--; i >= 0; i--)
> -				kfree(new->new[i]);
> -			kfree(new);
> -			return -ENOMEM;
> -		}
> -	}
> -	new->cachep = cachep;
> -
> -	on_each_cpu(do_ccupdate_local, (void *)new, 1);
> +	prev = cachep->cpu_cache;
> +	cachep->cpu_cache = cpu_cache;
> +	kick_all_cpus_sync();
>  
>  	check_irq_on();
>  	cachep->batchcount = batchcount;
>  	cachep->limit = limit;
>  	cachep->shared = shared;
>  
> -	for_each_online_cpu(i) {
> +	if (!prev)
> +		goto alloc_node;
> +
> +	for_each_online_cpu(cpu) {
>  		LIST_HEAD(list);
> -		struct array_cache *ccold = new->new[i];
>  		int node;
>  		struct kmem_cache_node *n;
> +		struct array_cache *ac = per_cpu_ptr(prev, cpu);
>  
> -		if (!ccold)
> -			continue;
> -
> -		node = cpu_to_mem(i);
> +		node = cpu_to_mem(cpu);
>  		n = get_node(cachep, node);
>  		spin_lock_irq(&n->list_lock);
> -		free_block(cachep, ccold->entry, ccold->avail, node, &list);
> +		free_block(cachep, ac->entry, ac->avail, node, &list);
>  		spin_unlock_irq(&n->list_lock);
>  		slabs_destroy(cachep, &list);
> -		kfree(ccold);
>  	}
> -	kfree(new);
> +	free_percpu(prev);
> +
> +alloc_node:
>  	return alloc_kmem_cache_node(cachep, gfp);
>  }
>  
> diff --git a/mm/slab.h b/mm/slab.h
> index bd1c54a..5cb4649 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -48,7 +48,6 @@ struct kmem_cache {
>  enum slab_state {
>  	DOWN,			/* No slab functionality yet */
>  	PARTIAL,		/* SLUB: kmem_cache_node available */
> -	PARTIAL_ARRAYCACHE,	/* SLAB: kmalloc size for arraycache available */
>  	PARTIAL_NODE,		/* SLAB: kmalloc size for node struct available */
>  	UP,			/* Slab caches usable but not all extras yet */
>  	FULL			/* Everything is working */
> -- 
> 1.7.9.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

I just encountered a problem on a Lenovo Carbon X1 where it will
suspend but won't resume.  A bisect indicated that this patch
is causing the problem.

997888488ef92da365b870247de773255227ce1f

I imagine the patch author, Joonsoo Kim, might have a better idea
why this is happening than I do.  But if I can provide any information
or run any tests that might be of help just let me know.

-- 
Jeremiah Mahler
jmmahler@gmail.com
http://github.com/jmahler

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [REGRESSION] [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
@ 2014-09-28  6:24   ` Jeremiah Mahler
  0 siblings, 0 replies; 43+ messages in thread
From: Jeremiah Mahler @ 2014-09-28  6:24 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Andrew Morton, Christoph Lameter, Pekka Enberg, David Rientjes,
	linux-mm, linux-kernel

On Thu, Aug 21, 2014 at 05:11:13PM +0900, Joonsoo Kim wrote:
> Because of chicken and egg problem, initializaion of SLAB is really
> complicated. We need to allocate cpu cache through SLAB to make
> the kmem_cache works, but, before initialization of kmem_cache,
> allocation through SLAB is impossible.
> 
> On the other hand, SLUB does initialization with more simple way. It
> uses percpu allocator to allocate cpu cache so there is no chicken and
> egg problem.
> 
> So, this patch try to use percpu allocator in SLAB. This simplify
> initialization step in SLAB so that we could maintain SLAB code more
> easily.
> 
> From my testing, there is no performance difference.
> 
> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> ---
>  include/linux/slab_def.h |   20 +---
>  mm/slab.c                |  237 +++++++++++++++-------------------------------
>  mm/slab.h                |    1 -
>  3 files changed, 81 insertions(+), 177 deletions(-)
> 
> diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
> index 8235dfb..b869d16 100644
> --- a/include/linux/slab_def.h
> +++ b/include/linux/slab_def.h
> @@ -8,6 +8,8 @@
>   */
>  
>  struct kmem_cache {
> +	struct array_cache __percpu *cpu_cache;
> +
>  /* 1) Cache tunables. Protected by slab_mutex */
>  	unsigned int batchcount;
>  	unsigned int limit;
> @@ -71,23 +73,7 @@ struct kmem_cache {
>  	struct memcg_cache_params *memcg_params;
>  #endif
>  
> -/* 6) per-cpu/per-node data, touched during every alloc/free */
> -	/*
> -	 * We put array[] at the end of kmem_cache, because we want to size
> -	 * this array to nr_cpu_ids slots instead of NR_CPUS
> -	 * (see kmem_cache_init())
> -	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
> -	 * is statically defined, so we reserve the max number of cpus.
> -	 *
> -	 * We also need to guarantee that the list is able to accomodate a
> -	 * pointer for each node since "nodelists" uses the remainder of
> -	 * available pointers.
> -	 */
> -	struct kmem_cache_node **node;
> -	struct array_cache *array[NR_CPUS + MAX_NUMNODES];
> -	/*
> -	 * Do not add fields after array[]
> -	 */
> +	struct kmem_cache_node *node[MAX_NUMNODES];
>  };
>  
>  #endif	/* _LINUX_SLAB_DEF_H */
> diff --git a/mm/slab.c b/mm/slab.c
> index 5927a17..09b060e 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -237,11 +237,10 @@ struct arraycache_init {
>  /*
>   * Need this for bootstrapping a per node allocator.
>   */
> -#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
> +#define NUM_INIT_LISTS (2 * MAX_NUMNODES)
>  static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
>  #define	CACHE_CACHE 0
> -#define	SIZE_AC MAX_NUMNODES
> -#define	SIZE_NODE (2 * MAX_NUMNODES)
> +#define	SIZE_NODE (MAX_NUMNODES)
>  
>  static int drain_freelist(struct kmem_cache *cache,
>  			struct kmem_cache_node *n, int tofree);
> @@ -253,7 +252,6 @@ static void cache_reap(struct work_struct *unused);
>  
>  static int slab_early_init = 1;
>  
> -#define INDEX_AC kmalloc_index(sizeof(struct arraycache_init))
>  #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
>  
>  static void kmem_cache_node_init(struct kmem_cache_node *parent)
> @@ -458,9 +456,6 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
>  	return reciprocal_divide(offset, cache->reciprocal_buffer_size);
>  }
>  
> -static struct arraycache_init initarray_generic =
> -    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
> -
>  /* internal cache of cache description objs */
>  static struct kmem_cache kmem_cache_boot = {
>  	.batchcount = 1,
> @@ -476,7 +471,7 @@ static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
>  
>  static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
>  {
> -	return cachep->array[smp_processor_id()];
> +	return this_cpu_ptr(cachep->cpu_cache);
>  }
>  
>  static size_t calculate_freelist_size(int nr_objs, size_t align)
> @@ -1096,9 +1091,6 @@ static void cpuup_canceled(long cpu)
>  		struct alien_cache **alien;
>  		LIST_HEAD(list);
>  
> -		/* cpu is dead; no one can alloc from it. */
> -		nc = cachep->array[cpu];
> -		cachep->array[cpu] = NULL;
>  		n = get_node(cachep, node);
>  
>  		if (!n)
> @@ -1108,6 +1100,9 @@ static void cpuup_canceled(long cpu)
>  
>  		/* Free limit for this kmem_cache_node */
>  		n->free_limit -= cachep->batchcount;
> +
> +		/* cpu is dead; no one can alloc from it. */
> +		nc = per_cpu_ptr(cachep->cpu_cache, cpu);
>  		if (nc)
>  			free_block(cachep, nc->entry, nc->avail, node, &list);
>  
> @@ -1135,7 +1130,6 @@ static void cpuup_canceled(long cpu)
>  		}
>  free_array_cache:
>  		slabs_destroy(cachep, &list);
> -		kfree(nc);
>  	}
>  	/*
>  	 * In the previous loop, all the objects were freed to
> @@ -1172,32 +1166,23 @@ static int cpuup_prepare(long cpu)
>  	 * array caches
>  	 */
>  	list_for_each_entry(cachep, &slab_caches, list) {
> -		struct array_cache *nc;
>  		struct array_cache *shared = NULL;
>  		struct alien_cache **alien = NULL;
>  
> -		nc = alloc_arraycache(node, cachep->limit,
> -					cachep->batchcount, GFP_KERNEL);
> -		if (!nc)
> -			goto bad;
>  		if (cachep->shared) {
>  			shared = alloc_arraycache(node,
>  				cachep->shared * cachep->batchcount,
>  				0xbaadf00d, GFP_KERNEL);
> -			if (!shared) {
> -				kfree(nc);
> +			if (!shared)
>  				goto bad;
> -			}
>  		}
>  		if (use_alien_caches) {
>  			alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
>  			if (!alien) {
>  				kfree(shared);
> -				kfree(nc);
>  				goto bad;
>  			}
>  		}
> -		cachep->array[cpu] = nc;
>  		n = get_node(cachep, node);
>  		BUG_ON(!n);
>  
> @@ -1389,15 +1374,6 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
>  }
>  
>  /*
> - * The memory after the last cpu cache pointer is used for the
> - * the node pointer.
> - */
> -static void setup_node_pointer(struct kmem_cache *cachep)
> -{
> -	cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids];
> -}
> -
> -/*
>   * Initialisation.  Called after the page allocator have been initialised and
>   * before smp_init().
>   */
> @@ -1408,7 +1384,6 @@ void __init kmem_cache_init(void)
>  	BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
>  					sizeof(struct rcu_head));
>  	kmem_cache = &kmem_cache_boot;
> -	setup_node_pointer(kmem_cache);
>  
>  	if (num_possible_nodes() == 1)
>  		use_alien_caches = 0;
> @@ -1416,8 +1391,6 @@ void __init kmem_cache_init(void)
>  	for (i = 0; i < NUM_INIT_LISTS; i++)
>  		kmem_cache_node_init(&init_kmem_cache_node[i]);
>  
> -	set_up_node(kmem_cache, CACHE_CACHE);
> -
>  	/*
>  	 * Fragmentation resistance on low memory - only use bigger
>  	 * page orders on machines with more than 32MB of memory if
> @@ -1452,49 +1425,22 @@ void __init kmem_cache_init(void)
>  	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
>  	 */
>  	create_boot_cache(kmem_cache, "kmem_cache",
> -		offsetof(struct kmem_cache, array[nr_cpu_ids]) +
> +		offsetof(struct kmem_cache, node) +
>  				  nr_node_ids * sizeof(struct kmem_cache_node *),
>  				  SLAB_HWCACHE_ALIGN);
>  	list_add(&kmem_cache->list, &slab_caches);
> -
> -	/* 2+3) create the kmalloc caches */
> +	slab_state = PARTIAL;
>  
>  	/*
> -	 * Initialize the caches that provide memory for the array cache and the
> -	 * kmem_cache_node structures first.  Without this, further allocations will
> -	 * bug.
> +	 * Initialize the caches that provide memory for the  kmem_cache_node
> +	 * structures first.  Without this, further allocations will bug.
>  	 */
> -
> -	kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac",
> -					kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS);
> -
> -	if (INDEX_AC != INDEX_NODE)
> -		kmalloc_caches[INDEX_NODE] =
> -			create_kmalloc_cache("kmalloc-node",
> +	kmalloc_caches[INDEX_NODE] = create_kmalloc_cache("kmalloc-node",
>  				kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
> +	slab_state = PARTIAL_NODE;
>  
>  	slab_early_init = 0;
>  
> -	/* 4) Replace the bootstrap head arrays */
> -	{
> -		struct array_cache *ptr;
> -
> -		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
> -
> -		memcpy(ptr, cpu_cache_get(kmem_cache),
> -		       sizeof(struct arraycache_init));
> -
> -		kmem_cache->array[smp_processor_id()] = ptr;
> -
> -		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
> -
> -		BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC])
> -		       != &initarray_generic.cache);
> -		memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
> -		       sizeof(struct arraycache_init));
> -
> -		kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
> -	}
>  	/* 5) Replace the bootstrap kmem_cache_node */
>  	{
>  		int nid;
> @@ -1502,13 +1448,8 @@ void __init kmem_cache_init(void)
>  		for_each_online_node(nid) {
>  			init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
>  
> -			init_list(kmalloc_caches[INDEX_AC],
> -				  &init_kmem_cache_node[SIZE_AC + nid], nid);
> -
> -			if (INDEX_AC != INDEX_NODE) {
> -				init_list(kmalloc_caches[INDEX_NODE],
> +			init_list(kmalloc_caches[INDEX_NODE],
>  					  &init_kmem_cache_node[SIZE_NODE + nid], nid);
> -			}
>  		}
>  	}
>  
> @@ -2041,56 +1982,63 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
>  	return left_over;
>  }
>  
> +static struct array_cache __percpu *__alloc_kmem_cache_cpus(
> +		struct kmem_cache *cachep, int entries, int batchcount)
> +{
> +	int cpu;
> +	size_t size;
> +	struct array_cache __percpu *cpu_cache;
> +
> +	size = sizeof(void *) * entries + sizeof(struct array_cache);
> +	cpu_cache = __alloc_percpu(size, 0);
> +
> +	if (!cpu_cache)
> +		return NULL;
> +
> +	for_each_possible_cpu(cpu) {
> +		init_arraycache(per_cpu_ptr(cpu_cache, cpu),
> +				entries, batchcount);
> +	}
> +
> +	return cpu_cache;
> +}
> +
> +static int alloc_kmem_cache_cpus(struct kmem_cache *cachep, int entries,
> +				int batchcount)
> +{
> +	cachep->cpu_cache = __alloc_kmem_cache_cpus(cachep, entries,
> +							batchcount);
> +	if (!cachep->cpu_cache)
> +		return 1;
> +
> +	return 0;
> +}
> +
>  static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
>  {
>  	if (slab_state >= FULL)
>  		return enable_cpucache(cachep, gfp);
>  
> +	if (alloc_kmem_cache_cpus(cachep, 1, 1))
> +		return 1;
> +
>  	if (slab_state == DOWN) {
> -		/*
> -		 * Note: Creation of first cache (kmem_cache).
> -		 * The setup_node is taken care
> -		 * of by the caller of __kmem_cache_create
> -		 */
> -		cachep->array[smp_processor_id()] = &initarray_generic.cache;
> -		slab_state = PARTIAL;
> +		/* Creation of first cache (kmem_cache). */
> +		set_up_node(kmem_cache, CACHE_CACHE);
>  	} else if (slab_state == PARTIAL) {
> -		/*
> -		 * Note: the second kmem_cache_create must create the cache
> -		 * that's used by kmalloc(24), otherwise the creation of
> -		 * further caches will BUG().
> -		 */
> -		cachep->array[smp_processor_id()] = &initarray_generic.cache;
> -
> -		/*
> -		 * If the cache that's used by kmalloc(sizeof(kmem_cache_node)) is
> -		 * the second cache, then we need to set up all its node/,
> -		 * otherwise the creation of further caches will BUG().
> -		 */
> -		set_up_node(cachep, SIZE_AC);
> -		if (INDEX_AC == INDEX_NODE)
> -			slab_state = PARTIAL_NODE;
> -		else
> -			slab_state = PARTIAL_ARRAYCACHE;
> +		/* For kmem_cache_node */
> +		set_up_node(cachep, SIZE_NODE);
>  	} else {
> -		/* Remaining boot caches */
> -		cachep->array[smp_processor_id()] =
> -			kmalloc(sizeof(struct arraycache_init), gfp);
> +		int node;
>  
> -		if (slab_state == PARTIAL_ARRAYCACHE) {
> -			set_up_node(cachep, SIZE_NODE);
> -			slab_state = PARTIAL_NODE;
> -		} else {
> -			int node;
> -			for_each_online_node(node) {
> -				cachep->node[node] =
> -				    kmalloc_node(sizeof(struct kmem_cache_node),
> -						gfp, node);
> -				BUG_ON(!cachep->node[node]);
> -				kmem_cache_node_init(cachep->node[node]);
> -			}
> +		for_each_online_node(node) {
> +			cachep->node[node] = kmalloc_node(
> +				sizeof(struct kmem_cache_node), gfp, node);
> +			BUG_ON(!cachep->node[node]);
> +			kmem_cache_node_init(cachep->node[node]);
>  		}
>  	}
> +
>  	cachep->node[numa_mem_id()]->next_reap =
>  			jiffies + REAPTIMEOUT_NODE +
>  			((unsigned long)cachep) % REAPTIMEOUT_NODE;
> @@ -2194,7 +2142,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
>  	else
>  		gfp = GFP_NOWAIT;
>  
> -	setup_node_pointer(cachep);
>  #if DEBUG
>  
>  	/*
> @@ -2451,8 +2398,7 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
>  	if (rc)
>  		return rc;
>  
> -	for_each_online_cpu(i)
> -	    kfree(cachep->array[i]);
> +	free_percpu(cachep->cpu_cache);
>  
>  	/* NUMA: free the node structures */
>  	for_each_kmem_cache_node(cachep, i, n) {
> @@ -3700,72 +3646,45 @@ fail:
>  	return -ENOMEM;
>  }
>  
> -struct ccupdate_struct {
> -	struct kmem_cache *cachep;
> -	struct array_cache *new[0];
> -};
> -
> -static void do_ccupdate_local(void *info)
> -{
> -	struct ccupdate_struct *new = info;
> -	struct array_cache *old;
> -
> -	check_irq_off();
> -	old = cpu_cache_get(new->cachep);
> -
> -	new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
> -	new->new[smp_processor_id()] = old;
> -}
> -
>  /* Always called with the slab_mutex held */
>  static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
>  				int batchcount, int shared, gfp_t gfp)
>  {
> -	struct ccupdate_struct *new;
> -	int i;
> +	struct array_cache __percpu *cpu_cache, *prev;
> +	int cpu;
>  
> -	new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
> -		      gfp);
> -	if (!new)
> +	cpu_cache = __alloc_kmem_cache_cpus(cachep, limit, batchcount);
> +	if (!cpu_cache)
>  		return -ENOMEM;
>  
> -	for_each_online_cpu(i) {
> -		new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
> -						batchcount, gfp);
> -		if (!new->new[i]) {
> -			for (i--; i >= 0; i--)
> -				kfree(new->new[i]);
> -			kfree(new);
> -			return -ENOMEM;
> -		}
> -	}
> -	new->cachep = cachep;
> -
> -	on_each_cpu(do_ccupdate_local, (void *)new, 1);
> +	prev = cachep->cpu_cache;
> +	cachep->cpu_cache = cpu_cache;
> +	kick_all_cpus_sync();
>  
>  	check_irq_on();
>  	cachep->batchcount = batchcount;
>  	cachep->limit = limit;
>  	cachep->shared = shared;
>  
> -	for_each_online_cpu(i) {
> +	if (!prev)
> +		goto alloc_node;
> +
> +	for_each_online_cpu(cpu) {
>  		LIST_HEAD(list);
> -		struct array_cache *ccold = new->new[i];
>  		int node;
>  		struct kmem_cache_node *n;
> +		struct array_cache *ac = per_cpu_ptr(prev, cpu);
>  
> -		if (!ccold)
> -			continue;
> -
> -		node = cpu_to_mem(i);
> +		node = cpu_to_mem(cpu);
>  		n = get_node(cachep, node);
>  		spin_lock_irq(&n->list_lock);
> -		free_block(cachep, ccold->entry, ccold->avail, node, &list);
> +		free_block(cachep, ac->entry, ac->avail, node, &list);
>  		spin_unlock_irq(&n->list_lock);
>  		slabs_destroy(cachep, &list);
> -		kfree(ccold);
>  	}
> -	kfree(new);
> +	free_percpu(prev);
> +
> +alloc_node:
>  	return alloc_kmem_cache_node(cachep, gfp);
>  }
>  
> diff --git a/mm/slab.h b/mm/slab.h
> index bd1c54a..5cb4649 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -48,7 +48,6 @@ struct kmem_cache {
>  enum slab_state {
>  	DOWN,			/* No slab functionality yet */
>  	PARTIAL,		/* SLUB: kmem_cache_node available */
> -	PARTIAL_ARRAYCACHE,	/* SLAB: kmalloc size for arraycache available */
>  	PARTIAL_NODE,		/* SLAB: kmalloc size for node struct available */
>  	UP,			/* Slab caches usable but not all extras yet */
>  	FULL			/* Everything is working */
> -- 
> 1.7.9.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

I just encountered a problem on a Lenovo Carbon X1 where it will
suspend but won't resume.  A bisect indicated that this patch
is causing the problem.

997888488ef92da365b870247de773255227ce1f

I imagine the patch author, Joonsoo Kim, might have a better idea
why this is happening than I do.  But if I can provide any information
or run any tests that might be of help just let me know.

-- 
Jeremiah Mahler
jmmahler@gmail.com
http://github.com/jmahler

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [REGRESSION] [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
  2014-09-28  6:24   ` Jeremiah Mahler
@ 2014-09-28 16:38     ` Christoph Lameter
  -1 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-09-28 16:38 UTC (permalink / raw)
  To: Jeremiah Mahler
  Cc: Joonsoo Kim, Andrew Morton, Pekka Enberg, David Rientjes,
	linux-mm, linux-kernel

On Sat, 27 Sep 2014, Jeremiah Mahler wrote:

> I just encountered a problem on a Lenovo Carbon X1 where it will
> suspend but won't resume.  A bisect indicated that this patch
> is causing the problem.

Could you please not quote the whole patch. Took me a while to find what
you were saying.

> 997888488ef92da365b870247de773255227ce1f
>
> I imagine the patch author, Joonsoo Kim, might have a better idea
> why this is happening than I do.  But if I can provide any information
> or run any tests that might be of help just let me know.

Could you provide more details? Any messages when the system is trying to
resume?


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [REGRESSION] [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
@ 2014-09-28 16:38     ` Christoph Lameter
  0 siblings, 0 replies; 43+ messages in thread
From: Christoph Lameter @ 2014-09-28 16:38 UTC (permalink / raw)
  To: Jeremiah Mahler
  Cc: Joonsoo Kim, Andrew Morton, Pekka Enberg, David Rientjes,
	linux-mm, linux-kernel

On Sat, 27 Sep 2014, Jeremiah Mahler wrote:

> I just encountered a problem on a Lenovo Carbon X1 where it will
> suspend but won't resume.  A bisect indicated that this patch
> is causing the problem.

Could you please not quote the whole patch. Took me a while to find what
you were saying.

> 997888488ef92da365b870247de773255227ce1f
>
> I imagine the patch author, Joonsoo Kim, might have a better idea
> why this is happening than I do.  But if I can provide any information
> or run any tests that might be of help just let me know.

Could you provide more details? Any messages when the system is trying to
resume?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [REGRESSION] [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
  2014-09-28 16:38     ` Christoph Lameter
  (?)
@ 2014-09-28 17:52     ` Jeremiah Mahler
  -1 siblings, 0 replies; 43+ messages in thread
From: Jeremiah Mahler @ 2014-09-28 17:52 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: linux-kernel

On Sun, Sep 28, 2014 at 11:38:51AM -0500, Christoph Lameter wrote:
> On Sat, 27 Sep 2014, Jeremiah Mahler wrote:
> 
> > I just encountered a problem on a Lenovo Carbon X1 where it will
> > suspend but won't resume.  A bisect indicated that this patch
> > is causing the problem.
> 
> Could you please not quote the whole patch. Took me a while to find what
> you were saying.
> 
Sorry about that.

> > 997888488ef92da365b870247de773255227ce1f
> >
> > I imagine the patch author, Joonsoo Kim, might have a better idea
> > why this is happening than I do.  But if I can provide any information
> > or run any tests that might be of help just let me know.
> 
> Could you provide more details? Any messages when the system is trying to
> resume?
> 

When I press Fn to resume there is a very brief flicker of the wireless
indicator light, as if it is trying to resume, but then it remains
suspended.  There are no messages on the screen or anything else.

-- 
Jeremiah Mahler
jmmahler@gmail.com
http://github.com/jmahler

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [REGRESSION] [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
  2014-09-28  6:24   ` Jeremiah Mahler
@ 2014-09-29  7:44     ` Joonsoo Kim
  -1 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-09-29  7:44 UTC (permalink / raw)
  To: Jeremiah Mahler, Andrew Morton, Christoph Lameter, Pekka Enberg,
	David Rientjes, linux-mm, linux-kernel

On Sat, Sep 27, 2014 at 11:24:49PM -0700, Jeremiah Mahler wrote:
> On Thu, Aug 21, 2014 at 05:11:13PM +0900, Joonsoo Kim wrote:
> > Because of chicken and egg problem, initializaion of SLAB is really
> > complicated. We need to allocate cpu cache through SLAB to make
> > the kmem_cache works, but, before initialization of kmem_cache,
> > allocation through SLAB is impossible.
> > 
> > On the other hand, SLUB does initialization with more simple way. It
> > uses percpu allocator to allocate cpu cache so there is no chicken and
> > egg problem.
> > 
> > So, this patch try to use percpu allocator in SLAB. This simplify
> > initialization step in SLAB so that we could maintain SLAB code more
> > easily.
> > 
> > From my testing, there is no performance difference.
> > 
> > Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> 
> I just encountered a problem on a Lenovo Carbon X1 where it will
> suspend but won't resume.  A bisect indicated that this patch
> is causing the problem.
> 
> 997888488ef92da365b870247de773255227ce1f
> 
> I imagine the patch author, Joonsoo Kim, might have a better idea
> why this is happening than I do.  But if I can provide any information
> or run any tests that might be of help just let me know.

Hello,

Yeah, there is a bug. Below will fix your issue.
Could you test it and report the result?

Thanks for reporting it.

--------->8---------------
>From e03ed6cc554e038b86d7b3a72b89d94e9ea808ba Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Mon, 29 Sep 2014 16:30:43 +0900
Subject: [PATCH] mm/slab: fix cpu on/off handling

When cpu off, we flush all cpu cached objects to it's own slab.
free_block() is used for this purpose and it's role is just to flush
objects from array_cache to proper slab. It doesn't adjust array_cache's
internal fields so we should manually reset them to proper value.
Without this fix, we maintain free objects duplicately, one is in
cpu cache, and, the other one is in the slab. So system would be broken.

Reported-by: Jeremiah Mahler <jmmahler@gmail.com>
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
---
 mm/slab.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mm/slab.c b/mm/slab.c
index 1162f0e..ce289b4 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1102,8 +1102,10 @@ static void cpuup_canceled(long cpu)
 
 		/* cpu is dead; no one can alloc from it. */
 		nc = per_cpu_ptr(cachep->cpu_cache, cpu);
-		if (nc)
+		if (nc) {
 			free_block(cachep, nc->entry, nc->avail, node, &list);
+			nc->avail = 0;
+		}
 
 		if (!cpumask_empty(mask)) {
 			spin_unlock_irq(&n->list_lock);
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [REGRESSION] [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
@ 2014-09-29  7:44     ` Joonsoo Kim
  0 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-09-29  7:44 UTC (permalink / raw)
  To: Jeremiah Mahler, Andrew Morton, Christoph Lameter, Pekka Enberg,
	David Rientjes, linux-mm, linux-kernel

On Sat, Sep 27, 2014 at 11:24:49PM -0700, Jeremiah Mahler wrote:
> On Thu, Aug 21, 2014 at 05:11:13PM +0900, Joonsoo Kim wrote:
> > Because of chicken and egg problem, initializaion of SLAB is really
> > complicated. We need to allocate cpu cache through SLAB to make
> > the kmem_cache works, but, before initialization of kmem_cache,
> > allocation through SLAB is impossible.
> > 
> > On the other hand, SLUB does initialization with more simple way. It
> > uses percpu allocator to allocate cpu cache so there is no chicken and
> > egg problem.
> > 
> > So, this patch try to use percpu allocator in SLAB. This simplify
> > initialization step in SLAB so that we could maintain SLAB code more
> > easily.
> > 
> > From my testing, there is no performance difference.
> > 
> > Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> 
> I just encountered a problem on a Lenovo Carbon X1 where it will
> suspend but won't resume.  A bisect indicated that this patch
> is causing the problem.
> 
> 997888488ef92da365b870247de773255227ce1f
> 
> I imagine the patch author, Joonsoo Kim, might have a better idea
> why this is happening than I do.  But if I can provide any information
> or run any tests that might be of help just let me know.

Hello,

Yeah, there is a bug. Below will fix your issue.
Could you test it and report the result?

Thanks for reporting it.

--------->8---------------

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [REGRESSION] [FIXED] [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
  2014-09-29  7:44     ` Joonsoo Kim
  (?)
@ 2014-09-29 17:03     ` Jeremiah Mahler
  2014-10-02  5:30       ` Joonsoo Kim
  -1 siblings, 1 reply; 43+ messages in thread
From: Jeremiah Mahler @ 2014-09-29 17:03 UTC (permalink / raw)
  To: Joonsoo Kim; +Cc: linux-kernel

Joonsoo,

On Mon, Sep 29, 2014 at 04:44:18PM +0900, Joonsoo Kim wrote:
> On Sat, Sep 27, 2014 at 11:24:49PM -0700, Jeremiah Mahler wrote:
> > On Thu, Aug 21, 2014 at 05:11:13PM +0900, Joonsoo Kim wrote:
> > > Because of chicken and egg problem, initializaion of SLAB is really
> > > complicated. We need to allocate cpu cache through SLAB to make
> > > the kmem_cache works, but, before initialization of kmem_cache,
> > > allocation through SLAB is impossible.
> > > 
> > > On the other hand, SLUB does initialization with more simple way. It
> > > uses percpu allocator to allocate cpu cache so there is no chicken and
> > > egg problem.
> > > 
> > > So, this patch try to use percpu allocator in SLAB. This simplify
> > > initialization step in SLAB so that we could maintain SLAB code more
> > > easily.
> > > 
> > > From my testing, there is no performance difference.
> > > 
> > > Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> > 
> > I just encountered a problem on a Lenovo Carbon X1 where it will
> > suspend but won't resume.  A bisect indicated that this patch
> > is causing the problem.
> > 
> > 997888488ef92da365b870247de773255227ce1f
> > 
> > I imagine the patch author, Joonsoo Kim, might have a better idea
> > why this is happening than I do.  But if I can provide any information
> > or run any tests that might be of help just let me know.
> 
> Hello,
> 
> Yeah, there is a bug. Below will fix your issue.
> Could you test it and report the result?
> 
> Thanks for reporting it.
> 
> --------->8---------------
> From e03ed6cc554e038b86d7b3a72b89d94e9ea808ba Mon Sep 17 00:00:00 2001
> From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> Date: Mon, 29 Sep 2014 16:30:43 +0900
> Subject: [PATCH] mm/slab: fix cpu on/off handling
> 
> When cpu off, we flush all cpu cached objects to it's own slab.
> free_block() is used for this purpose and it's role is just to flush
> objects from array_cache to proper slab. It doesn't adjust array_cache's
> internal fields so we should manually reset them to proper value.
> Without this fix, we maintain free objects duplicately, one is in
> cpu cache, and, the other one is in the slab. So system would be broken.
> 
> Reported-by: Jeremiah Mahler <jmmahler@gmail.com>
> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> ---
>  mm/slab.c |    4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/slab.c b/mm/slab.c
> index 1162f0e..ce289b4 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -1102,8 +1102,10 @@ static void cpuup_canceled(long cpu)
>  
>  		/* cpu is dead; no one can alloc from it. */
>  		nc = per_cpu_ptr(cachep->cpu_cache, cpu);
> -		if (nc)
> +		if (nc) {
>  			free_block(cachep, nc->entry, nc->avail, node, &list);
> +			nc->avail = 0;
> +		}
>  
>  		if (!cpumask_empty(mask)) {
>  			spin_unlock_irq(&n->list_lock);
> -- 
> 1.7.9.5
> 

That fixed the problem.  Thanks!

Tested-by: Jeremiah Mahler <jmmahler@gmail.com>

-- 
Jeremiah Mahler
jmmahler@gmail.com
http://github.com/jmahler

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [REGRESSION] [FIXED] [PATCH 1/3] mm/slab: use percpu allocator for cpu cache
  2014-09-29 17:03     ` [REGRESSION] [FIXED] " Jeremiah Mahler
@ 2014-10-02  5:30       ` Joonsoo Kim
  0 siblings, 0 replies; 43+ messages in thread
From: Joonsoo Kim @ 2014-10-02  5:30 UTC (permalink / raw)
  To: Jeremiah Mahler, linux-kernel

On Mon, Sep 29, 2014 at 10:03:15AM -0700, Jeremiah Mahler wrote:
> Joonsoo,
> 
> On Mon, Sep 29, 2014 at 04:44:18PM +0900, Joonsoo Kim wrote:
> > On Sat, Sep 27, 2014 at 11:24:49PM -0700, Jeremiah Mahler wrote:
> > > On Thu, Aug 21, 2014 at 05:11:13PM +0900, Joonsoo Kim wrote:
> > > > Because of chicken and egg problem, initializaion of SLAB is really
> > > > complicated. We need to allocate cpu cache through SLAB to make
> > > > the kmem_cache works, but, before initialization of kmem_cache,
> > > > allocation through SLAB is impossible.
> > > > 
> > > > On the other hand, SLUB does initialization with more simple way. It
> > > > uses percpu allocator to allocate cpu cache so there is no chicken and
> > > > egg problem.
> > > > 
> > > > So, this patch try to use percpu allocator in SLAB. This simplify
> > > > initialization step in SLAB so that we could maintain SLAB code more
> > > > easily.
> > > > 
> > > > From my testing, there is no performance difference.
> > > > 
> > > > Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> > > 
> > > I just encountered a problem on a Lenovo Carbon X1 where it will
> > > suspend but won't resume.  A bisect indicated that this patch
> > > is causing the problem.
> > > 
> > > 997888488ef92da365b870247de773255227ce1f
> > > 
> > > I imagine the patch author, Joonsoo Kim, might have a better idea
> > > why this is happening than I do.  But if I can provide any information
> > > or run any tests that might be of help just let me know.
> > 
> > Hello,
> > 
> > Yeah, there is a bug. Below will fix your issue.
> > Could you test it and report the result?
> > 
> > Thanks for reporting it.
> > 
> > --------->8---------------
> > From e03ed6cc554e038b86d7b3a72b89d94e9ea808ba Mon Sep 17 00:00:00 2001
> > From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> > Date: Mon, 29 Sep 2014 16:30:43 +0900
> > Subject: [PATCH] mm/slab: fix cpu on/off handling
> > 
> > When cpu off, we flush all cpu cached objects to it's own slab.
> > free_block() is used for this purpose and it's role is just to flush
> > objects from array_cache to proper slab. It doesn't adjust array_cache's
> > internal fields so we should manually reset them to proper value.
> > Without this fix, we maintain free objects duplicately, one is in
> > cpu cache, and, the other one is in the slab. So system would be broken.
> > 
> > Reported-by: Jeremiah Mahler <jmmahler@gmail.com>
> > Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> > ---
> >  mm/slab.c |    4 +++-
> >  1 file changed, 3 insertions(+), 1 deletion(-)
> > 
> > diff --git a/mm/slab.c b/mm/slab.c
> > index 1162f0e..ce289b4 100644
> > --- a/mm/slab.c
> > +++ b/mm/slab.c
> > @@ -1102,8 +1102,10 @@ static void cpuup_canceled(long cpu)
> >  
> >  		/* cpu is dead; no one can alloc from it. */
> >  		nc = per_cpu_ptr(cachep->cpu_cache, cpu);
> > -		if (nc)
> > +		if (nc) {
> >  			free_block(cachep, nc->entry, nc->avail, node, &list);
> > +			nc->avail = 0;
> > +		}
> >  
> >  		if (!cpumask_empty(mask)) {
> >  			spin_unlock_irq(&n->list_lock);
> > -- 
> > 1.7.9.5
> > 
> 
> That fixed the problem.  Thanks!
> 
> Tested-by: Jeremiah Mahler <jmmahler@gmail.com>

Good!

Thanks.

^ permalink raw reply	[flat|nested] 43+ messages in thread

end of thread, other threads:[~2014-10-02  5:30 UTC | newest]

Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-21  8:11 [PATCH 1/3] mm/slab: use percpu allocator for cpu cache Joonsoo Kim
2014-08-21  8:11 ` Joonsoo Kim
2014-08-21  8:11 ` [PATCH 2/3] mm/slab_common: commonize slab merge logic Joonsoo Kim
2014-08-21  8:11   ` Joonsoo Kim
2014-08-21 14:22   ` Christoph Lameter
2014-08-21 14:22     ` Christoph Lameter
2014-08-25  8:26     ` Joonsoo Kim
2014-08-25  8:26       ` Joonsoo Kim
2014-08-25 15:27   ` Christoph Lameter
2014-08-25 15:27     ` Christoph Lameter
2014-08-26  2:23     ` Joonsoo Kim
2014-08-26  2:23       ` Joonsoo Kim
2014-08-26 21:23       ` Christoph Lameter
2014-08-26 21:23         ` Christoph Lameter
2014-08-21  8:11 ` [PATCH 3/3] mm/slab: support slab merge Joonsoo Kim
2014-08-21  8:11   ` Joonsoo Kim
2014-08-25 15:29   ` Christoph Lameter
2014-08-25 15:29     ` Christoph Lameter
2014-08-26  2:26     ` Joonsoo Kim
2014-08-26  2:26       ` Joonsoo Kim
2014-08-21 14:21 ` [PATCH 1/3] mm/slab: use percpu allocator for cpu cache Christoph Lameter
2014-08-21 14:21   ` Christoph Lameter
2014-08-25  8:26   ` Joonsoo Kim
2014-08-25  8:26     ` Joonsoo Kim
2014-08-25 13:13     ` Christoph Lameter
2014-08-25 13:13       ` Christoph Lameter
2014-08-26  2:19       ` Joonsoo Kim
2014-08-26  2:19         ` Joonsoo Kim
2014-08-26 21:22         ` Christoph Lameter
2014-08-26 21:22           ` Christoph Lameter
2014-08-27 23:37 ` Christoph Lameter
2014-08-27 23:37   ` Christoph Lameter
2014-09-01  0:19   ` Joonsoo Kim
2014-09-01  0:19     ` Joonsoo Kim
2014-09-28  6:24 ` [REGRESSION] " Jeremiah Mahler
2014-09-28  6:24   ` Jeremiah Mahler
2014-09-28 16:38   ` Christoph Lameter
2014-09-28 16:38     ` Christoph Lameter
2014-09-28 17:52     ` Jeremiah Mahler
2014-09-29  7:44   ` Joonsoo Kim
2014-09-29  7:44     ` Joonsoo Kim
2014-09-29 17:03     ` [REGRESSION] [FIXED] " Jeremiah Mahler
2014-10-02  5:30       ` Joonsoo Kim

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.