From: Yu Zhao <yuzhao@google.com> To: Christoph Lameter <cl@linux.com>, Pekka Enberg <penberg@kernel.org>, David Rientjes <rientjes@google.com>, Joonsoo Kim <iamjoonsoo.kim@lge.com>, Andrew Morton <akpm@linux-foundation.org>, "Kirill A . Shutemov" <kirill@shutemov.name>, Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp> Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Yu Zhao <yuzhao@google.com> Subject: [PATCH 2/3] mm: avoid slub allocation while holding list_lock Date: Wed, 11 Sep 2019 18:29:28 -0600 Message-ID: <20190912002929.78873-2-yuzhao@google.com> (raw) In-Reply-To: <20190912002929.78873-1-yuzhao@google.com> If we are already under list_lock, don't call kmalloc(). Otherwise we will run into deadlock because kmalloc() also tries to grab the same lock. Instead, statically allocate bitmap in struct kmem_cache_node. Given currently page->objects has 15 bits, we bloat the per-node struct by 4K. So we waste some memory but only do so when slub debug is on. WARNING: possible recursive locking detected -------------------------------------------- mount-encrypted/4921 is trying to acquire lock: (&(&n->list_lock)->rlock){-.-.}, at: ___slab_alloc+0x104/0x437 but task is already holding lock: (&(&n->list_lock)->rlock){-.-.}, at: __kmem_cache_shutdown+0x81/0x3cb other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&n->list_lock)->rlock); lock(&(&n->list_lock)->rlock); *** DEADLOCK *** Signed-off-by: Yu Zhao <yuzhao@google.com> --- include/linux/slub_def.h | 4 ++++ mm/slab.h | 1 + mm/slub.c | 44 ++++++++++++++-------------------------- 3 files changed, 20 insertions(+), 29 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index d2153789bd9f..719d43574360 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -9,6 +9,10 @@ */ #include <linux/kobject.h> +#define OO_SHIFT 15 +#define OO_MASK ((1 << OO_SHIFT) - 1) +#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */ + enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ diff --git a/mm/slab.h b/mm/slab.h index 9057b8056b07..2d8639835db1 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -556,6 +556,7 @@ struct kmem_cache_node { atomic_long_t nr_slabs; atomic_long_t total_objects; struct list_head full; + unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)]; #endif #endif diff --git a/mm/slub.c b/mm/slub.c index 62053ceb4464..f28072c9f2ce 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -187,10 +187,6 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) */ #define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) -#define OO_SHIFT 15 -#define OO_MASK ((1 << OO_SHIFT) - 1) -#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */ - /* Internal SLUB flags */ /* Poison object */ #define __OBJECT_POISON ((slab_flags_t __force)0x80000000U) @@ -454,6 +450,8 @@ static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) void *p; void *addr = page_address(page); + bitmap_zero(map, page->objects); + for (p = page->freelist; p; p = get_freepointer(s, p)) set_bit(slab_index(p, s, addr), map); } @@ -3680,14 +3678,12 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) } static void list_slab_objects(struct kmem_cache *s, struct page *page, - const char *text) + unsigned long *map, const char *text) { #ifdef CONFIG_SLUB_DEBUG void *addr = page_address(page); void *p; - unsigned long *map = bitmap_zalloc(page->objects, GFP_ATOMIC); - if (!map) - return; + slab_err(s, page, text, s->name); slab_lock(page); @@ -3699,8 +3695,8 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, print_tracking(s, p); } } + slab_unlock(page); - bitmap_free(map); #endif } @@ -3721,7 +3717,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) remove_partial(n, page); list_add(&page->slab_list, &discard); } else { - list_slab_objects(s, page, + list_slab_objects(s, page, n->object_map, "Objects remaining in %s on __kmem_cache_shutdown()"); } } @@ -4397,7 +4393,6 @@ static int validate_slab(struct kmem_cache *s, struct page *page, return 0; /* Now we know that a valid freelist exists */ - bitmap_zero(map, page->objects); get_map(s, page, map); for_each_object(p, s, addr, page->objects) { @@ -4422,7 +4417,7 @@ static void validate_slab_slab(struct kmem_cache *s, struct page *page, } static int validate_slab_node(struct kmem_cache *s, - struct kmem_cache_node *n, unsigned long *map) + struct kmem_cache_node *n) { unsigned long count = 0; struct page *page; @@ -4431,7 +4426,7 @@ static int validate_slab_node(struct kmem_cache *s, spin_lock_irqsave(&n->list_lock, flags); list_for_each_entry(page, &n->partial, slab_list) { - validate_slab_slab(s, page, map); + validate_slab_slab(s, page, n->object_map); count++; } if (count != n->nr_partial) @@ -4442,7 +4437,7 @@ static int validate_slab_node(struct kmem_cache *s, goto out; list_for_each_entry(page, &n->full, slab_list) { - validate_slab_slab(s, page, map); + validate_slab_slab(s, page, n->object_map); count++; } if (count != atomic_long_read(&n->nr_slabs)) @@ -4459,15 +4454,11 @@ static long validate_slab_cache(struct kmem_cache *s) int node; unsigned long count = 0; struct kmem_cache_node *n; - unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL); - - if (!map) - return -ENOMEM; flush_all(s); for_each_kmem_cache_node(s, node, n) - count += validate_slab_node(s, n, map); - bitmap_free(map); + count += validate_slab_node(s, n); + return count; } /* @@ -4603,9 +4594,7 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s, void *addr = page_address(page); void *p; - bitmap_zero(map, page->objects); get_map(s, page, map); - for_each_object(p, s, addr, page->objects) if (!test_bit(slab_index(p, s, addr), map)) add_location(t, s, get_track(s, p, alloc)); @@ -4619,11 +4608,9 @@ static int list_locations(struct kmem_cache *s, char *buf, struct loc_track t = { 0, 0, NULL }; int node; struct kmem_cache_node *n; - unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL); - if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), - GFP_KERNEL)) { - bitmap_free(map); + if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), + GFP_KERNEL)) { return sprintf(buf, "Out of memory\n"); } /* Push back cpu slabs */ @@ -4638,9 +4625,9 @@ static int list_locations(struct kmem_cache *s, char *buf, spin_lock_irqsave(&n->list_lock, flags); list_for_each_entry(page, &n->partial, slab_list) - process_slab(&t, s, page, alloc, map); + process_slab(&t, s, page, alloc, n->object_map); list_for_each_entry(page, &n->full, slab_list) - process_slab(&t, s, page, alloc, map); + process_slab(&t, s, page, alloc, n->object_map); spin_unlock_irqrestore(&n->list_lock, flags); } @@ -4689,7 +4676,6 @@ static int list_locations(struct kmem_cache *s, char *buf, } free_loc_track(&t); - bitmap_free(map); if (!t.count) len += sprintf(buf, "No data\n"); return len; -- 2.23.0.162.g0b9fbb3734-goog
next prev parent reply index Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-09-09 6:10 [PATCH] " Yu Zhao 2019-09-09 16:00 ` Kirill A. Shutemov [not found] ` <e5e25aa3-651d-92b4-ac82-c5011c66a7cb@I-love.SAKURA.ne.jp> 2019-09-09 21:39 ` Yu Zhao [not found] ` <201909100141.x8A1fVdu048305@www262.sakura.ne.jp> 2019-09-10 2:16 ` Yu Zhao 2019-09-10 9:16 ` Kirill A. Shutemov 2019-09-11 14:13 ` Andrew Morton 2019-09-12 0:29 ` [PATCH 1/3] mm: correct mask size for slub page->objects Yu Zhao 2019-09-12 0:29 ` Yu Zhao [this message] 2019-09-12 0:44 ` [PATCH 2/3] mm: avoid slub allocation while holding list_lock Kirill A. Shutemov 2019-09-12 1:31 ` Yu Zhao 2019-09-12 2:31 ` [PATCH v2 1/4] mm: correct mask size for slub page->objects Yu Zhao 2019-09-12 2:31 ` [PATCH v2 2/4] mm: clean up validate_slab() Yu Zhao 2019-09-12 9:46 ` Kirill A. Shutemov 2019-09-12 2:31 ` [PATCH v2 3/4] mm: avoid slub allocation while holding list_lock Yu Zhao 2019-09-12 10:04 ` Kirill A. Shutemov 2019-09-12 2:31 ` [PATCH v2 4/4] mm: lock slub page when listing objects Yu Zhao 2019-09-12 10:06 ` Kirill A. Shutemov 2019-09-12 21:12 ` Yu Zhao 2019-09-13 14:58 ` Christopher Lameter 2019-09-12 9:40 ` [PATCH v2 1/4] mm: correct mask size for slub page->objects Kirill A. Shutemov 2019-09-12 21:11 ` Yu Zhao 2019-09-12 22:03 ` Kirill A. Shutemov 2019-09-14 0:07 ` [PATCH v3 1/2] mm: clean up validate_slab() Yu Zhao 2019-09-14 0:07 ` [PATCH v3 2/2] mm: avoid slub allocation while holding list_lock Yu Zhao 2019-09-16 8:39 ` [PATCH v3 1/2] mm: clean up validate_slab() Kirill A. Shutemov 2019-11-08 19:39 ` [PATCH v4 " Yu Zhao 2019-11-08 19:39 ` [PATCH v4 2/2] mm: avoid slub allocation while holding list_lock Yu Zhao 2019-11-09 20:52 ` Christopher Lameter 2019-11-09 23:01 ` Yu Zhao 2019-11-09 23:16 ` Christopher Lameter 2019-11-10 18:47 ` Yu Zhao 2019-11-11 15:47 ` Christopher Lameter 2019-11-11 15:55 ` [FIX] slub: Remove kmalloc under list_lock from list_slab_objects() V2 Christopher Lameter 2019-11-30 23:09 ` Andrew Morton 2019-12-02 15:12 ` Christopher Lameter 2019-12-07 22:03 ` Yu Zhao 2020-01-10 14:11 ` Vlastimil Babka [not found] ` <e0ed44ae-8dae-e8db-9d14-2b09b239af8e@i-love.sakura.ne.jp> 2020-01-13 1:34 ` Christopher Lameter 2019-11-11 18:15 ` [PATCH v4 2/2] mm: avoid slub allocation while holding list_lock Shakeel Butt 2019-09-12 0:29 ` [PATCH 3/3] mm: lock slub page when listing objects Yu Zhao
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20190912002929.78873-2-yuzhao@google.com \ --to=yuzhao@google.com \ --cc=akpm@linux-foundation.org \ --cc=cl@linux.com \ --cc=iamjoonsoo.kim@lge.com \ --cc=kirill@shutemov.name \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-mm@kvack.org \ --cc=penberg@kernel.org \ --cc=penguin-kernel@i-love.sakura.ne.jp \ --cc=rientjes@google.com \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Linux-mm Archive on lore.kernel.org Archives are clonable: git clone --mirror https://lore.kernel.org/linux-mm/0 linux-mm/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 linux-mm linux-mm/ https://lore.kernel.org/linux-mm \ linux-mm@kvack.org public-inbox-index linux-mm Example config snippet for mirrors Newsgroup available over NNTP: nntp://nntp.lore.kernel.org/org.kvack.linux-mm AGPL code for this site: git clone https://public-inbox.org/public-inbox.git