linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB
@ 2012-07-03  3:57 Jiang Liu
  2012-07-03  3:57 ` [RFC PATCH 2/4] mm: make consistent use of PG_slab flag Jiang Liu
                   ` (4 more replies)
  0 siblings, 5 replies; 17+ messages in thread
From: Jiang Liu @ 2012-07-03  3:57 UTC (permalink / raw)
  To: Christoph Lameter, Pekka Enberg, Matt Mackall, Mel Gorman, Yinghai Lu
  Cc: Jiang Liu, Tony Luck, KAMEZAWA Hiroyuki, KOSAKI Motohiro,
	David Rientjes, Minchan Kim, Keping Chen, linux-mm, linux-kernel,
	Jiang Liu

Several subsystems, including memory-failure, swap, sparse, DRBD etc,
use PageSlab() to check whether a page is managed by SLAB/SLUB/SLOB.
And they treat slab pages differently from pagecache/anonymous pages.

But it's unsafe to use PageSlab() to detect whether a page is managed by
SLUB. SLUB allocates compound pages when page order is bigger than 0 and
only sets PG_slab on head pages. So if a SLUB object is hosted by a tail
page, PageSlab() will incorrectly return false for that object.

Following code from sparse.c triggers this issue, which causes failure
when removing a hot-added memory device.
        /*
         * Check to see if allocation came from hot-plug-add
         */
        if (PageSlab(usemap_page)) {
                kfree(usemap);
                if (memmap)
                        __kfree_section_memmap(memmap, PAGES_PER_SECTION);
                return;
        }

So introduce a transparent huge page and compound page safe macro as below
to check whether a page is managed by SLAB/SLUB/SLOB allocator.

#define page_managed_by_slab(page)     (!!PageSlab(compound_trans_head(page)))

Signed-off-by: Jiang Liu <liuj97@gmail.com>
---
 arch/arm/mm/init.c             |    3 ++-
 arch/ia64/kernel/mca_drv.c     |    2 +-
 arch/unicore32/mm/init.c       |    3 ++-
 crypto/scatterwalk.c           |    2 +-
 drivers/ata/libata-sff.c       |    3 ++-
 drivers/block/drbd/drbd_main.c |    3 ++-
 fs/proc/page.c                 |    4 ++--
 include/linux/slab.h           |    7 +++++++
 mm/memory-failure.c            |    6 +++---
 mm/sparse.c                    |    4 +---
 10 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index f54d592..73ff340 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -18,6 +18,7 @@
 #include <linux/initrd.h>
 #include <linux/of_fdt.h>
 #include <linux/highmem.h>
+#include <linux/huge_mm.h>
 #include <linux/gfp.h>
 #include <linux/memblock.h>
 #include <linux/dma-contiguous.h>
@@ -116,7 +117,7 @@ void show_mem(unsigned int filter)
 				reserved++;
 			else if (PageSwapCache(page))
 				cached++;
-			else if (PageSlab(page))
+			else if (page_managed_by_slab(page))
 				slab++;
 			else if (!page_count(page))
 				free++;
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index 1c2e894..4415bb6 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -136,7 +136,7 @@ mca_page_isolate(unsigned long paddr)
 		return ISOLATE_NG;
 
 	/* kick pages having attribute 'SLAB' or 'Reserved' */
-	if (PageSlab(p) || PageReserved(p))
+	if (page_managed_by_slab(p) || PageReserved(p))
 		return ISOLATE_NG;
 
 	/* add attribute 'Reserved' and register the page */
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index de186bd..829a0d9 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -21,6 +21,7 @@
 #include <linux/sort.h>
 #include <linux/dma-mapping.h>
 #include <linux/export.h>
+#include <linux/huge_mm.h>
 
 #include <asm/sections.h>
 #include <asm/setup.h>
@@ -83,7 +84,7 @@ void show_mem(unsigned int filter)
 				reserved++;
 			else if (PageSwapCache(page))
 				cached++;
-			else if (PageSlab(page))
+			else if (page_managed_by_slab(page))
 				slab++;
 			else if (!page_count(page))
 				free++;
diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index 7281b8a..a20e019 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -54,7 +54,7 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
 		struct page *page;
 
 		page = sg_page(walk->sg) + ((walk->offset - 1) >> PAGE_SHIFT);
-		if (!PageSlab(page))
+		if (!page_managed_by_slab(page))
 			flush_dcache_page(page);
 	}
 
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index d8af325..1ab8378 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -38,6 +38,7 @@
 #include <linux/module.h>
 #include <linux/libata.h>
 #include <linux/highmem.h>
+#include <linux/huge_mm.h>
 
 #include "libata.h"
 
@@ -734,7 +735,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
 				       do_write);
 	}
 
-	if (!do_write && !PageSlab(page))
+	if (!do_write && !page_managed_by_slab(page))
 		flush_dcache_page(page);
 
 	qc->curbytes += qc->sect_size;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 920ede2..de5b395 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2734,7 +2734,8 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
 	 * put_page(); and would cause either a VM_BUG directly, or
 	 * __page_cache_release a page that would actually still be referenced
 	 * by someone, leading to some obscure delayed Oops somewhere else. */
-	if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
+	if (disable_sendpage || (page_count(page) < 1) ||
+	    page_managed_by_slab(page))
 		return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
 
 	msg_flags |= MSG_NOSIGNAL;
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 7fcd0d6..ae42dc7 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -40,7 +40,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
 			ppage = pfn_to_page(pfn);
 		else
 			ppage = NULL;
-		if (!ppage || PageSlab(ppage))
+		if (!ppage || page_managed_by_slab(ppage))
 			pcount = 0;
 		else
 			pcount = page_mapcount(ppage);
@@ -98,7 +98,7 @@ u64 stable_page_flags(struct page *page)
 	 * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the
 	 * simple test in page_mapped() is not enough.
 	 */
-	if (!PageSlab(page) && page_mapped(page))
+	if (!page_managed_by_slab(page) && page_mapped(page))
 		u |= 1 << KPF_MMAP;
 	if (PageAnon(page))
 		u |= 1 << KPF_ANON;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 67d5d94..bb26fab 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -364,4 +364,11 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
 
 void __init kmem_cache_init_late(void);
 
+/*
+ * Check whether a page is allocated/managed by SLAB/SLUB/SLOB allocator.
+ * Defined as macro instead of function to avoid header file pollution.
+ */
+#define page_managed_by_slab(page)	(!!PageSlab(compound_trans_head(page)))
+#define mem_managed_by_slab(addr)	page_managed_by_slab(virt_to_page(addr))
+
 #endif	/* _LINUX_SLAB_H */
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index ab1e714..684e7f7 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -88,7 +88,7 @@ static int hwpoison_filter_dev(struct page *p)
 	/*
 	 * page_mapping() does not accept slab pages.
 	 */
-	if (PageSlab(p))
+	if (page_managed_by_slab(p))
 		return -EINVAL;
 
 	mapping = page_mapping(p);
@@ -233,7 +233,7 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
  */
 void shake_page(struct page *p, int access)
 {
-	if (!PageSlab(p)) {
+	if (!page_managed_by_slab(p)) {
 		lru_add_drain_all();
 		if (PageLRU(p))
 			return;
@@ -862,7 +862,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
 	struct page *hpage = compound_head(p);
 	struct page *ppage;
 
-	if (PageReserved(p) || PageSlab(p))
+	if (PageReserved(p) || page_managed_by_slab(p))
 		return SWAP_SUCCESS;
 
 	/*
diff --git a/mm/sparse.c b/mm/sparse.c
index 6a4bf91..32a908b 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -688,17 +688,15 @@ static void free_map_bootmem(struct page *page, unsigned long nr_pages)
 
 static void free_section_usemap(struct page *memmap, unsigned long *usemap)
 {
-	struct page *usemap_page;
 	unsigned long nr_pages;
 
 	if (!usemap)
 		return;
 
-	usemap_page = virt_to_page(usemap);
 	/*
 	 * Check to see if allocation came from hot-plug-add
 	 */
-	if (PageSlab(usemap_page)) {
+	if (mem_managed_by_slab(usemap)) {
 		kfree(usemap);
 		if (memmap)
 			__kfree_section_memmap(memmap, PAGES_PER_SECTION);
-- 
1.7.1



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [RFC PATCH 2/4] mm: make consistent use of PG_slab flag
  2012-07-03  3:57 [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB Jiang Liu
@ 2012-07-03  3:57 ` Jiang Liu
  2012-07-05 14:47   ` Christoph Lameter
  2012-07-03  3:57 ` [RFC PATCH 3/4] SLAB: minor code cleanup Jiang Liu
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 17+ messages in thread
From: Jiang Liu @ 2012-07-03  3:57 UTC (permalink / raw)
  To: Christoph Lameter, Pekka Enberg, Matt Mackall, Mel Gorman, Yinghai Lu
  Cc: Jiang Liu, Tony Luck, KAMEZAWA Hiroyuki, KOSAKI Motohiro,
	David Rientjes, Minchan Kim, Keping Chen, linux-mm, linux-kernel,
	Jiang Liu

Currently there is some inconsistency with the usages of PG_slab flag.
The SLAB/SLUB/SLOB allocator uses PG_slab flag to mark whether a (compound)
page contains managed objects, but other subsystems use PG_slab flag
to detect whether a (compound) page is allocated/managed by SLAB/SLUB/SLOB.

It's OK with SLAB allocator because all pages allocated by SLAB will be
used to host SLAB object, thus with PG_slab flag set. But it may run into
trouble with SLUB/SLOB allocators. If the requested object is bigger enough,
SLUB/SLOB allocator directly depends on the page allocator to fulfill object
allocate/release requests. To distinguish whether a (compound) page hosts
SLUB/SLOB objects, SLUB/SLOB allocator only sets PG_slab flag on small pages
hosting SLUB/SLOB objects. So the PG_slab flag won't be set for large pages
allocated/managed by SLUB/SLOB allocator.

This patch splits the traditional PG_slab flag into two flags:
PG_slab:	mark whether a (compound) page is allocated/managed by
                SLAB/SLUB/SLOB.
PG_slabobject:	mark whether a (compound) page hosts SLUB/SLOB objects.

The PG_slabobject flag won't be set on pages allocated by SLAB allocator
because it's redundant.

Signed-off-by: Jiang Liu <liuj97@gmail.com>
---
 include/linux/page-flags.h |    4 +++-
 include/linux/slub_def.h   |    3 +++
 mm/slob.c                  |   21 +++++++++++++++++----
 mm/slub.c                  |   22 ++++++++++++++--------
 4 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index c88d2a9..5fcf0b8 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -123,8 +123,9 @@ enum pageflags {
 	PG_pinned = PG_owner_priv_1,
 	PG_savepinned = PG_dirty,
 
-	/* SLOB */
+	/* SLOB & SLUB */
 	PG_slob_free = PG_private,
+	PG_slab_object = PG_private_2,
 };
 
 #ifndef __GENERATING_BOUNDS_H
@@ -208,6 +209,7 @@ PAGEFLAG(SavePinned, savepinned);			/* Xen */
 PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
 
+__PAGEFLAG(SlabObject, slab_object)
 __PAGEFLAG(SlobFree, slob_free)
 
 /*
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index c2f8c8b..e357c8d 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -11,6 +11,7 @@
 #include <linux/bug.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
+#include <linux/page-flags.h>
 
 #include <linux/kmemleak.h>
 
@@ -224,6 +225,8 @@ static __always_inline void *
 kmalloc_order(size_t size, gfp_t flags, unsigned int order)
 {
 	void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
+	if (ret)
+		__SetPageSlab(virt_to_page(ret));
 	kmemleak_alloc(ret, size, 1, flags);
 	return ret;
 }
diff --git a/mm/slob.c b/mm/slob.c
index 8105be4..2c1fa9c 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -135,17 +135,17 @@ static LIST_HEAD(free_slob_large);
  */
 static inline int is_slob_page(struct slob_page *sp)
 {
-	return PageSlab((struct page *)sp);
+	return PageSlabObject((struct page *)sp);
 }
 
 static inline void set_slob_page(struct slob_page *sp)
 {
-	__SetPageSlab((struct page *)sp);
+	__SetPageSlabObject((struct page *)sp);
 }
 
 static inline void clear_slob_page(struct slob_page *sp)
 {
-	__ClearPageSlab((struct page *)sp);
+	__ClearPageSlabObject((struct page *)sp);
 }
 
 static inline struct slob_page *slob_page(const void *addr)
@@ -242,7 +242,7 @@ static int slob_last(slob_t *s)
 
 static void *slob_new_pages(gfp_t gfp, int order, int node)
 {
-	void *page;
+	struct page *page, *pos;
 
 #ifdef CONFIG_NUMA
 	if (node != -1)
@@ -254,11 +254,24 @@ static void *slob_new_pages(gfp_t gfp, int order, int node)
 	if (!page)
 		return NULL;
 
+	/* Only set PG_slab flag on head page in case of compound page */
+	if (gfp & __GFP_COMP)
+		order = 0;
+	for (pos = page + (1 << order) - 1; pos >= page; pos--)
+		__SetPageSlab(pos);
+
 	return page_address(page);
 }
 
 static void slob_free_pages(void *b, int order)
 {
+	struct page *pos, *end;
+
+	pos = virt_to_page(b);
+	end = pos + (PageCompound(pos) ? 1 : 1 << order);
+	for (; pos < end; pos++)
+		__ClearPageSlab(pos);
+
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += 1 << order;
 	free_pages((unsigned long)b, order);
diff --git a/mm/slub.c b/mm/slub.c
index 8c691fa..9dc6524 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -840,7 +840,7 @@ static int check_slab(struct kmem_cache *s, struct page *page)
 
 	VM_BUG_ON(!irqs_disabled());
 
-	if (!PageSlab(page)) {
+	if (!PageSlabObject(page)) {
 		slab_err(s, page, "Not a valid slab page");
 		return 0;
 	}
@@ -1069,7 +1069,7 @@ static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *pa
 	return 1;
 
 bad:
-	if (PageSlab(page)) {
+	if (PageSlabObject(page)) {
 		/*
 		 * If this is a slab page then lets do the best we can
 		 * to avoid issues in the future. Marking all objects
@@ -1108,7 +1108,7 @@ static noinline int free_debug_processing(struct kmem_cache *s,
 		goto out;
 
 	if (unlikely(s != page->slab)) {
-		if (!PageSlab(page)) {
+		if (!PageSlabObject(page)) {
 			slab_err(s, page, "Attempt to free object(0x%p) "
 				"outside of slab", object);
 		} else if (!page->slab) {
@@ -1370,6 +1370,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	inc_slabs_node(s, page_to_nid(page), page->objects);
 	page->slab = s;
 	__SetPageSlab(page);
+	__SetPageSlabObject(page);
 
 	start = page_address(page);
 
@@ -1413,6 +1414,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 		NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
 		-pages);
 
+	__ClearPageSlabObject(page);
 	__ClearPageSlab(page);
 	reset_page_mapcount(page);
 	if (current->reclaim_state)
@@ -3369,8 +3371,10 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
 
 	flags |= __GFP_COMP | __GFP_NOTRACK;
 	page = alloc_pages_node(node, flags, get_order(size));
-	if (page)
+	if (page) {
+		__SetPageSlab(page);
 		ptr = page_address(page);
+	}
 
 	kmemleak_alloc(ptr, size, 1, flags);
 	return ptr;
@@ -3414,7 +3418,7 @@ size_t ksize(const void *object)
 
 	page = virt_to_head_page(object);
 
-	if (unlikely(!PageSlab(page))) {
+	if (unlikely(!PageSlabObject(page))) {
 		WARN_ON(!PageCompound(page));
 		return PAGE_SIZE << compound_order(page);
 	}
@@ -3437,7 +3441,7 @@ bool verify_mem_not_deleted(const void *x)
 	local_irq_save(flags);
 
 	page = virt_to_head_page(x);
-	if (unlikely(!PageSlab(page))) {
+	if (unlikely(!PageSlabObject(page))) {
 		/* maybe it was from stack? */
 		rv = true;
 		goto out_unlock;
@@ -3470,9 +3474,10 @@ void kfree(const void *x)
 		return;
 
 	page = virt_to_head_page(x);
-	if (unlikely(!PageSlab(page))) {
+	if (unlikely(!PageSlabObject(page))) {
 		BUG_ON(!PageCompound(page));
 		kmemleak_free(x);
+		__ClearPageSlab(page);
 		put_page(page);
 		return;
 	}
@@ -3715,7 +3720,8 @@ void __init kmem_cache_init(void)
 	/* Allocate two kmem_caches from the page allocator */
 	kmalloc_size = ALIGN(kmem_size, cache_line_size());
 	order = get_order(2 * kmalloc_size);
-	kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
+	kmem_cache = (void *)__get_free_pages(GFP_NOWAIT | __GFP_COMP, order);
+	__SetPageSlab(virt_to_page(kmem_cache));
 
 	/*
 	 * Must first have the slab cache available for the allocations of the
-- 
1.7.1



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [RFC PATCH 3/4] SLAB: minor code cleanup
  2012-07-03  3:57 [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB Jiang Liu
  2012-07-03  3:57 ` [RFC PATCH 2/4] mm: make consistent use of PG_slab flag Jiang Liu
@ 2012-07-03  3:57 ` Jiang Liu
  2012-07-03  3:57 ` [RFC PATCH 4/4] mm: change slob's struct page definition to accomodate struct page changes Jiang Liu
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 17+ messages in thread
From: Jiang Liu @ 2012-07-03  3:57 UTC (permalink / raw)
  To: Christoph Lameter, Pekka Enberg, Matt Mackall, Mel Gorman, Yinghai Lu
  Cc: Jiang Liu, Tony Luck, KAMEZAWA Hiroyuki, KOSAKI Motohiro,
	David Rientjes, Minchan Kim, Keping Chen, linux-mm, linux-kernel,
	Jiang Liu

Minor code cleanup for SLAB allocator.

Signed-off-by: Jiang Liu <liuj97@gmail.com>
---
 mm/slab.c |   26 ++++++--------------------
 1 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index e901a36..cd163d1 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -499,34 +499,23 @@ static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
 	page->lru.next = (struct list_head *)cache;
 }
 
-static inline struct kmem_cache *page_get_cache(struct page *page)
-{
-	page = compound_head(page);
-	BUG_ON(!PageSlab(page));
-	return (struct kmem_cache *)page->lru.next;
-}
-
 static inline void page_set_slab(struct page *page, struct slab *slab)
 {
 	page->lru.prev = (struct list_head *)slab;
 }
 
-static inline struct slab *page_get_slab(struct page *page)
-{
-	BUG_ON(!PageSlab(page));
-	return (struct slab *)page->lru.prev;
-}
-
 static inline struct kmem_cache *virt_to_cache(const void *obj)
 {
 	struct page *page = virt_to_head_page(obj);
-	return page_get_cache(page);
+	BUG_ON(!PageSlab(page));
+	return (struct kmem_cache *)page->lru.next;
 }
 
 static inline struct slab *virt_to_slab(const void *obj)
 {
 	struct page *page = virt_to_head_page(obj);
-	return page_get_slab(page);
+	BUG_ON(!PageSlab(page));
+	return (struct slab *)page->lru.prev;
 }
 
 static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
@@ -3047,7 +3036,6 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
 static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
 				   void *caller)
 {
-	struct page *page;
 	unsigned int objnr;
 	struct slab *slabp;
 
@@ -3055,9 +3043,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
 
 	objp -= obj_offset(cachep);
 	kfree_debugcheck(objp);
-	page = virt_to_head_page(objp);
-
-	slabp = page_get_slab(page);
+	slabp = virt_to_slab(objp);
 
 	if (cachep->flags & SLAB_RED_ZONE) {
 		verify_redzone_free(cachep, objp);
@@ -3261,7 +3247,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 		struct slab *slabp;
 		unsigned objnr;
 
-		slabp = page_get_slab(virt_to_head_page(objp));
+		slabp = virt_to_slab(objp);
 		objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
 		slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
 	}
-- 
1.7.1



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [RFC PATCH 4/4] mm: change slob's struct page definition to accomodate struct page changes
  2012-07-03  3:57 [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB Jiang Liu
  2012-07-03  3:57 ` [RFC PATCH 2/4] mm: make consistent use of PG_slab flag Jiang Liu
  2012-07-03  3:57 ` [RFC PATCH 3/4] SLAB: minor code cleanup Jiang Liu
@ 2012-07-03  3:57 ` Jiang Liu
  2012-07-05 14:45 ` [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB Christoph Lameter
  2012-09-04  9:18 ` Wen Congyang
  4 siblings, 0 replies; 17+ messages in thread
From: Jiang Liu @ 2012-07-03  3:57 UTC (permalink / raw)
  To: Christoph Lameter, Pekka Enberg, Matt Mackall, Mel Gorman, Yinghai Lu
  Cc: Jiang Liu, Tony Luck, KAMEZAWA Hiroyuki, KOSAKI Motohiro,
	David Rientjes, Minchan Kim, Keping Chen, linux-mm, linux-kernel,
	Jiang Liu

Changeset fc9bb8c768abe7ae10861c3510e01a95f98d5933 "mm: Rearrange struct page"
rearranges fields in struct page, so change slob's "struct page" definition
to accomodate the changes.

Signed-off-by: Jiang Liu <liuj97@gmail.com>
---
 mm/slob.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/slob.c b/mm/slob.c
index 2c1fa9c..e5515bb 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -100,10 +100,10 @@ struct slob_page {
 	union {
 		struct {
 			unsigned long flags;	/* mandatory */
-			atomic_t _count;	/* mandatory */
-			slobidx_t units;	/* free units left in page */
-			unsigned long pad[2];
+			unsigned long pad1;
 			slob_t *free;		/* first free slob_t in page */
+			slobidx_t units;	/* free units left in page */
+			atomic_t _count;	/* mandatory */
 			struct list_head list;	/* linked list of free pages */
 		};
 		struct page page;
-- 
1.7.1



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB
  2012-07-03  3:57 [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB Jiang Liu
                   ` (2 preceding siblings ...)
  2012-07-03  3:57 ` [RFC PATCH 4/4] mm: change slob's struct page definition to accomodate struct page changes Jiang Liu
@ 2012-07-05 14:45 ` Christoph Lameter
  2012-07-05 15:55   ` Jiang Liu
  2012-09-04  9:18 ` Wen Congyang
  4 siblings, 1 reply; 17+ messages in thread
From: Christoph Lameter @ 2012-07-05 14:45 UTC (permalink / raw)
  To: Jiang Liu
  Cc: Pekka Enberg, Matt Mackall, Mel Gorman, Yinghai Lu, Tony Luck,
	KAMEZAWA Hiroyuki, KOSAKI Motohiro, David Rientjes, Minchan Kim,
	Keping Chen, linux-mm, linux-kernel, Jiang Liu

On Tue, 3 Jul 2012, Jiang Liu wrote:

> Several subsystems, including memory-failure, swap, sparse, DRBD etc,
> use PageSlab() to check whether a page is managed by SLAB/SLUB/SLOB.
> And they treat slab pages differently from pagecache/anonymous pages.
>
> But it's unsafe to use PageSlab() to detect whether a page is managed by
> SLUB. SLUB allocates compound pages when page order is bigger than 0 and
> only sets PG_slab on head pages. So if a SLUB object is hosted by a tail
> page, PageSlab() will incorrectly return false for that object.

This is not an issue only with slab allocators. Multiple kernel systems
may do a compound order allocation for some or the other metadata and
will not mark the page in any special way. What makes the slab allocators
so special that you need to do this?

> So introduce a transparent huge page and compound page safe macro as below
> to check whether a page is managed by SLAB/SLUB/SLOB allocator.

Why? Any page is unsafe to touch unless you can account for all references to
the page.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 2/4] mm: make consistent use of PG_slab flag
  2012-07-03  3:57 ` [RFC PATCH 2/4] mm: make consistent use of PG_slab flag Jiang Liu
@ 2012-07-05 14:47   ` Christoph Lameter
  2012-07-05 16:15     ` Jiang Liu
  0 siblings, 1 reply; 17+ messages in thread
From: Christoph Lameter @ 2012-07-05 14:47 UTC (permalink / raw)
  To: Jiang Liu
  Cc: Pekka Enberg, Matt Mackall, Mel Gorman, Yinghai Lu, Tony Luck,
	KAMEZAWA Hiroyuki, KOSAKI Motohiro, David Rientjes, Minchan Kim,
	Keping Chen, linux-mm, linux-kernel, Jiang Liu

On Tue, 3 Jul 2012, Jiang Liu wrote:

> PG_slabobject:	mark whether a (compound) page hosts SLUB/SLOB objects.

Any subsystem may allocate a compound page to store metadata.

The compound pages used by SLOB and SLUB are not managed in any way but
the calls to kfree and kmalloc are converted to calls to the page
allocator. There is no "management" by the slab allocators for these
cases and its inaccurate to say that these are SLUB/SLOB objects since the
allocators never deal with these objects.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB
  2012-07-05 14:45 ` [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB Christoph Lameter
@ 2012-07-05 15:55   ` Jiang Liu
  2012-07-05 17:36     ` Christoph Lameter
  0 siblings, 1 reply; 17+ messages in thread
From: Jiang Liu @ 2012-07-05 15:55 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Jiang Liu, Pekka Enberg, Matt Mackall, Mel Gorman, Yinghai Lu,
	Tony Luck, KAMEZAWA Hiroyuki, KOSAKI Motohiro, David Rientjes,
	Minchan Kim, Keping Chen, linux-mm, linux-kernel

On 07/05/2012 10:45 PM, Christoph Lameter wrote:
> On Tue, 3 Jul 2012, Jiang Liu wrote:
> 
>> Several subsystems, including memory-failure, swap, sparse, DRBD etc,
>> use PageSlab() to check whether a page is managed by SLAB/SLUB/SLOB.
>> And they treat slab pages differently from pagecache/anonymous pages.
>>
>> But it's unsafe to use PageSlab() to detect whether a page is managed by
>> SLUB. SLUB allocates compound pages when page order is bigger than 0 and
>> only sets PG_slab on head pages. So if a SLUB object is hosted by a tail
>> page, PageSlab() will incorrectly return false for that object.
> 
> This is not an issue only with slab allocators. Multiple kernel systems
> may do a compound order allocation for some or the other metadata and
> will not mark the page in any special way. What makes the slab allocators
> so special that you need to do this?
HI Chris,
	I think here PageSlab() is used to check whether a page hosting a memory
object is managed/allocated by the slab allocator. If it's allocated by slab 
allocator, we could use kfree() to free the object.
	For SLUB allocator, if the memory space needed to host a memory object
is bigger than 2 pages, it directly depends on page allocator to fulfill the
request. But SLUB may allocate a compound page of two pages and only sets
PG_slab on the head page. So if a memory object is hosted by the second page,
we will get a wrong conclusion that the memory object wasn't allocated by slab.
	We encountered this issue when trying to implement physical memory hot-removal.
After removing a memory device, we need to tear down memory management structures
of the removed memory device. Those memory management structures may be allocated
by bootmem allocator at boot time, or allocated by slab allocator at runtime when
hot-adding memory device. So in our case, PageSlab() is used to distinguish between
bootmem allocator and slab allocator. With SLUB, some pages will never be released
due to the issue described above.
	Thanks!
	Gerry

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 2/4] mm: make consistent use of PG_slab flag
  2012-07-05 14:47   ` Christoph Lameter
@ 2012-07-05 16:15     ` Jiang Liu
  2012-07-05 17:37       ` Christoph Lameter
  0 siblings, 1 reply; 17+ messages in thread
From: Jiang Liu @ 2012-07-05 16:15 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Jiang Liu, Pekka Enberg, Matt Mackall, Mel Gorman, Yinghai Lu,
	Tony Luck, KAMEZAWA Hiroyuki, KOSAKI Motohiro, David Rientjes,
	Minchan Kim, Keping Chen, linux-mm, linux-kernel

On 07/05/2012 10:47 PM, Christoph Lameter wrote:
> On Tue, 3 Jul 2012, Jiang Liu wrote:
> 
>> PG_slabobject:	mark whether a (compound) page hosts SLUB/SLOB objects.
> 
> Any subsystem may allocate a compound page to store metadata.
> 
> The compound pages used by SLOB and SLUB are not managed in any way but
> the calls to kfree and kmalloc are converted to calls to the page
> allocator. There is no "management" by the slab allocators for these
> cases and its inaccurate to say that these are SLUB/SLOB objects since the
> allocators never deal with these objects.
> 
Hi Chris,
	I think there's a little difference with SLUB and SLOB for compound page.
For SLOB, it relies on the page allocator to allocate compound page to fulfill
request bigger than one page. For SLUB, it relies on the page allocator if the
request is bigger than two pages. So SLUB may allocate a 2-pages compound page
to host SLUB managed objects.
	My proposal may be summarized as below:
	1) PG_slab flag marks a memory object is allocated from slab allocator.
	2) PG_slabobject marks a (compound) page hosts SLUB/SLOB managed objects.
	3) Only set PG_slab/PG_slabobject on the head page of compound pages.
	4) For SLAB, PG_slabobject is redundant and so not used.

	A summary of proposed usage of PG_slab(S) and PG_slabobject(O) with 
SLAB/SLUB/SLOB allocators as below:
pagesize	SLAB			SLUB			SLOB
1page		S			S,O			S,O
2page		S			S,O			S
>=4page		S			S			S

	Thanks!
	Gerry



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB
  2012-07-05 15:55   ` Jiang Liu
@ 2012-07-05 17:36     ` Christoph Lameter
  2012-07-06  7:29       ` Jiang Liu
  0 siblings, 1 reply; 17+ messages in thread
From: Christoph Lameter @ 2012-07-05 17:36 UTC (permalink / raw)
  To: Jiang Liu
  Cc: Jiang Liu, Pekka Enberg, Matt Mackall, Mel Gorman, Yinghai Lu,
	Tony Luck, KAMEZAWA Hiroyuki, KOSAKI Motohiro, David Rientjes,
	Minchan Kim, Keping Chen, linux-mm, linux-kernel

On Thu, 5 Jul 2012, Jiang Liu wrote:

> 	I think here PageSlab() is used to check whether a page hosting a memory
> object is managed/allocated by the slab allocator. If it's allocated by slab
> allocator, we could use kfree() to free the object.

This is BS (here? what does that refer to). Could you please respond to my
email?

> 	We encountered this issue when trying to implement physical memory hot-removal.
> After removing a memory device, we need to tear down memory management structures
> of the removed memory device. Those memory management structures may be allocated
> by bootmem allocator at boot time, or allocated by slab allocator at runtime when
> hot-adding memory device. So in our case, PageSlab() is used to distinguish between
> bootmem allocator and slab allocator. With SLUB, some pages will never be released
> due to the issue described above.

Trying to be more detailed that in my last email:

These compound pages could also be allocated by any other kernel subsystem
for metadata purposes and they will never be marked as slab pages. These
generic structures generally cannot be removed.

For the slab allocators: Only kmalloc memory uses the unmarked compound
pages and those kmalloc objects are never recoverable. You can only
recover objects that are in slabs marked reclaimable and those are
properly marked as slab pages.

AFAICT the patchset is pointless.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 2/4] mm: make consistent use of PG_slab flag
  2012-07-05 16:15     ` Jiang Liu
@ 2012-07-05 17:37       ` Christoph Lameter
  2012-07-06  8:30         ` Jiang Liu
  0 siblings, 1 reply; 17+ messages in thread
From: Christoph Lameter @ 2012-07-05 17:37 UTC (permalink / raw)
  To: Jiang Liu
  Cc: Jiang Liu, Pekka Enberg, Matt Mackall, Mel Gorman, Yinghai Lu,
	Tony Luck, KAMEZAWA Hiroyuki, KOSAKI Motohiro, David Rientjes,
	Minchan Kim, Keping Chen, linux-mm, linux-kernel

On Fri, 6 Jul 2012, Jiang Liu wrote:

> On 07/05/2012 10:47 PM, Christoph Lameter wrote:
> > On Tue, 3 Jul 2012, Jiang Liu wrote:
> >
> >> PG_slabobject:	mark whether a (compound) page hosts SLUB/SLOB objects.
> >
> > Any subsystem may allocate a compound page to store metadata.
> >
> > The compound pages used by SLOB and SLUB are not managed in any way but
> > the calls to kfree and kmalloc are converted to calls to the page
> > allocator. There is no "management" by the slab allocators for these
> > cases and its inaccurate to say that these are SLUB/SLOB objects since the
> > allocators never deal with these objects.
> >
> Hi Chris,
> 	I think there's a little difference with SLUB and SLOB for compound page.
> For SLOB, it relies on the page allocator to allocate compound page to fulfill
> request bigger than one page. For SLUB, it relies on the page allocator if the
> request is bigger than two pages. So SLUB may allocate a 2-pages compound page
> to host SLUB managed objects.
> 	My proposal may be summarized as below:
> 	1) PG_slab flag marks a memory object is allocated from slab allocator.
> 	2) PG_slabobject marks a (compound) page hosts SLUB/SLOB managed objects.
> 	3) Only set PG_slab/PG_slabobject on the head page of compound pages.
> 	4) For SLAB, PG_slabobject is redundant and so not used.
>
> 	A summary of proposed usage of PG_slab(S) and PG_slabobject(O) with
> SLAB/SLUB/SLOB allocators as below:
> pagesize	SLAB			SLUB			SLOB
> 1page		S			S,O			S,O
> 2page		S			S,O			S
> >=4page		S			S			S

There is no point of recognizing such objects because those will be
kmalloc objects and they can only be freed in a subsystem specific way.
There is no standard way to even figure out which subsystem allocated
them. So for all practical purposes those are unrecoverable.



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB
  2012-07-05 17:36     ` Christoph Lameter
@ 2012-07-06  7:29       ` Jiang Liu
  2012-07-06 13:50         ` Christoph Lameter
  0 siblings, 1 reply; 17+ messages in thread
From: Jiang Liu @ 2012-07-06  7:29 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Jiang Liu, Pekka Enberg, Matt Mackall, Mel Gorman, Yinghai Lu,
	Tony Luck, KAMEZAWA Hiroyuki, KOSAKI Motohiro, David Rientjes,
	Minchan Kim, Keping Chen, linux-mm, linux-kernel

On 2012-7-6 1:36, Christoph Lameter wrote:
> On Thu, 5 Jul 2012, Jiang Liu wrote:
> 
>> 	I think here PageSlab() is used to check whether a page hosting a memory
>> object is managed/allocated by the slab allocator. If it's allocated by slab
>> allocator, we could use kfree() to free the object.
> 
> This is BS (here? what does that refer to). Could you please respond to my
> email?
> 
>> 	We encountered this issue when trying to implement physical memory hot-removal.
>> After removing a memory device, we need to tear down memory management structures
>> of the removed memory device. Those memory management structures may be allocated
>> by bootmem allocator at boot time, or allocated by slab allocator at runtime when
>> hot-adding memory device. So in our case, PageSlab() is used to distinguish between
>> bootmem allocator and slab allocator. With SLUB, some pages will never be released
>> due to the issue described above.
> 
> Trying to be more detailed that in my last email:
> 
> These compound pages could also be allocated by any other kernel subsystem
> for metadata purposes and they will never be marked as slab pages. These
> generic structures generally cannot be removed.
> 
> For the slab allocators: Only kmalloc memory uses the unmarked compound
> pages and those kmalloc objects are never recoverable. You can only
> recover objects that are in slabs marked reclaimable and those are
> properly marked as slab pages.
> 
> AFAICT the patchset is pointless.
Hi Chris,
	Seems there's a big misunderstanding here. 
	We are not trying to use PageSlab() as a common mechanism to check
whether a page could be migrated/removed. For that, we still rely on 
ZONE_MOVABLE, MIGRATE_RECLAIM, MIGRATE_MOVABLE, MIGRATE_CMA to migrate or
reclaim pages for hot-removing.
	Originally the patch is aimed to fix an issue encountered when 
hot-removing a hot-added memory device. Currently memory hotplug is only
supported with SPARSE memory model. After offlining all pages of a memory
section, we need to free resources used by "struct mem_section" itself.
That is to free section_mem_map and pageblock_flags. For memory section
created at boot time, section_mem_map and pageblock_flags are allocated
from bootmem. For memory section created at runtime, section_mem_map
and pageblock_flags are allocated from slab. So when freeing these
resources, we use PageSlab() to tell whether there are allocated from slab.
So free_section_usemap() has following code snippet.
{
        usemap_page = virt_to_page(usemap);
        /*
         * Check to see if allocation came from hot-plug-add
         */
        if (PageSlab(usemap_page)) {
                kfree(usemap);
                if (memmap)
                        __kfree_section_memmap(memmap, PAGES_PER_SECTION);
                return;
        }

        /*
         * The usemap came from bootmem. This is packed with other usemaps
         * on the section which has pgdat at boot time. Just keep it as is now.
         */

        if (memmap) {
                struct page *memmap_page;
                memmap_page = virt_to_page(memmap);

                nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
                        >> PAGE_SHIFT;

                free_map_bootmem(memmap_page, nr_pages);
        }
}

	Here if usemap is allocated from SLUB but PageSlab() incorrectly return
false, we will try to free pages allocated from slab as bootmem pages. That will
confuse the memory hotplug logic.

	And when fixing this issue, we found some other usages of PageSlab() may
have the same issue. For example:
	1) /proc/kpageflags and /proc/kpagecount may return incorrect result for
pages allocated by slab.
	2) DRBD has following comments. At first glance, it seems that it's 
	dangerous if PageSlab() to return false for pages allocated by slab.
	(With more thinking, the comments is a little out of date because now
	put_page/get_page already correctly handle compound pages, so it should
	be OK to send pages allocated from slab.)
        /* e.g. XFS meta- & log-data is in slab pages, which have a
         * page_count of 0 and/or have PageSlab() set.
         * we cannot use send_page for those, as that does get_page();
         * put_page(); and would cause either a VM_BUG directly, or
         * __page_cache_release a page that would actually still be referenced
         * by someone, leading to some obscure delayed Oops somewhere else. */
        if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
                return _drbd_no_send_page(mdev, page, offset, size, msg_flags);

	3) show_mem() on ARM and unicore32 reports much less pages used by slab
	if SLUB/SLOB is used instead of SLAB because SLUB/SLOB doesn't mark big
	compound pages with PG_slab flag.

	So we worked out the patch set to address these issues. We also found
other possible usages of the proposed macro. 
	For example, if the memory backing a "struct resource" structure is
allocated from bootmem, __release_region() shouldn't free the memory into 
slab allocator, otherwise it will trigger panic as below. This issue is 
reproducible when hot-removing a memory device present at boot time on x86
platforms. On x86 platforms, e820_reserve_resources() allocates bootmem for
all physical memory resources present at boot time. Later when those memory
devices are hot-removed, __release_region() will try to free  memory from
bootmem into slab, so trigger the panic. And a proposed fix is:
diff --git a/kernel/resource.c b/kernel/resource.c
index e1d2b8e..a40c11b 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -19,6 +19,7 @@
 #include <linux/seq_file.h>
 #include <linux/device.h>
 #include <linux/pfn.h>
+#include <linux/mm.h>
 #include <asm/io.h>


@@ -947,7 +948,8 @@ void __release_region(struct resource *parent, resource_size_t start,
                        write_unlock(&resource_lock);
                        if (res->flags & IORESOURCE_MUXED)
                                wake_up(&muxed_resource_wait);
-                       kfree(res);
+                       if (mem_managed_by_slab(res))
+                               kfree(res);
                        return;
                }
                p = &res->sibling;


------------[ cut here ]------------
kernel BUG at mm/slub.c:3471!
invalid opcode: 0000 [#1] SMP
CPU 2
Modules linked in: module(O+) cpufreq_conservative cpufreq_userspace
cpufreq_powersave acpi_cpufreq mperf fuse loop dm_mod coretemp igb bnx2
tpm_tis i2c_i801 serio_raw microcode tpm sg tpm_bios i2c_core dca iTCO_wdt
iTCO_vendor_support pcspkr button mptctl usbhid hid uhci_hcd ehci_hcd usbcore
usb_common sd_mod crc_t10dif edd ext3 mbcache jbd fan ide_pci_generic ide_core
ata_generic ata_piix libata thermal processor thermal_sys hwmon mptsas
mptscsih mptbase scsi_transport_sas scsi_mod

Pid: 30857, comm: insmod Tainted: G           O 3.4.0-rc4-memory-hotplug+ #10
Huawei Technologies Co., Ltd. Tecal RH2285          /BC11BTSA
RIP: 0010:[<ffffffff810d245f>]  [<ffffffff810d245f>] kfree+0x49/0xb1
RSP: 0018:ffff880c1bbd1ec8  EFLAGS: 00010246
RAX: 0060000000000400 RBX: ffff880c3ffbee60 RCX: 0000000000000082
RDX: ffffea0000000000 RSI: 0000000100000000 RDI: ffffea0030ffef80
RBP: ffff880c1bbd1ed8 R08: ffff880c1bbd1de8 R09: ffff880627802700
R10: ffffffff810c16a4 R11: 0000000000013a90 R12: ffff880c3ffbee50
R13: 0000000100000000 R14: 0000000c3fffffff R15: 0000000000000002
FS:  00007fe63ca906f0(0000) GS:ffff880627c40000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 00007fe63caca000 CR3: 000000061b2e4000 CR4: 00000000000007e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process insmod (pid: 30857, threadinfo ffff880c1bbd0000, task
ffff880c1e08ca70)
Stack:
 ffff880c3ffbee60 ffff880c3ffbee50 ffff880c1bbd1f08 ffffffff810313d0
 ffffffffa00e8090 0000000000025feb ffffffffa00ea000 0000000000000000
 ffff880c1bbd1f18 ffffffffa00ea032 ffff880c1bbd1f48 ffffffff8100020c
Call Trace:
 [<ffffffff810313d0>] __release_region+0x88/0xb4
 [<ffffffffa00ea000>] ? 0xffffffffa00e9fff
 [<ffffffffa00ea032>] test_module_init+0x32/0x36 [module]
 [<ffffffff8100020c>] do_one_initcall+0x7c/0x130
 [<ffffffff8106e9ac>] sys_init_module+0x7c/0x1c4
 [<ffffffff81310a22>] system_call_fastpath+0x16/0x1b
Code: ba 00 00 00 00 00 ea ff ff 48 c1 e0 06 48 8d 3c 10 48 8b 07 66 85 c0 79
04 48 8b 7f 30 48 8b 07 84 c0 78 12 66 f7 07 00 c0 75 04 <0f> 0b eb fe e8 44
30 fd ff eb 58 4c 8b 55 08 4c 8b 4f 30 49 8b
RIP  [<ffffffff810d245f>] kfree+0x49/0xb1
 RSP <ffff880c1bbd1ec8>
---[ end trace f5e0eba731c4d41e ]---

	Thanks!
	Gerry


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 2/4] mm: make consistent use of PG_slab flag
  2012-07-05 17:37       ` Christoph Lameter
@ 2012-07-06  8:30         ` Jiang Liu
  2012-07-06 13:53           ` Christoph Lameter
  0 siblings, 1 reply; 17+ messages in thread
From: Jiang Liu @ 2012-07-06  8:30 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Jiang Liu, Pekka Enberg, Matt Mackall, Mel Gorman, Yinghai Lu,
	Tony Luck, KAMEZAWA Hiroyuki, KOSAKI Motohiro, David Rientjes,
	Minchan Kim, Keping Chen, linux-mm, linux-kernel

On 2012-7-6 1:37, Christoph Lameter wrote:
>> Hi Chris,
>> 	I think there's a little difference with SLUB and SLOB for compound page.
>> For SLOB, it relies on the page allocator to allocate compound page to fulfill
>> request bigger than one page. For SLUB, it relies on the page allocator if the
>> request is bigger than two pages. So SLUB may allocate a 2-pages compound page
>> to host SLUB managed objects.
>> 	My proposal may be summarized as below:
>> 	1) PG_slab flag marks a memory object is allocated from slab allocator.
>> 	2) PG_slabobject marks a (compound) page hosts SLUB/SLOB managed objects.
>> 	3) Only set PG_slab/PG_slabobject on the head page of compound pages.
>> 	4) For SLAB, PG_slabobject is redundant and so not used.
>>
>> 	A summary of proposed usage of PG_slab(S) and PG_slabobject(O) with
>> SLAB/SLUB/SLOB allocators as below:
>> pagesize	SLAB			SLUB			SLOB
>> 1page		S			S,O			S,O
>> 2page		S			S,O			S
>>> =4page		S			S			S
> 
> There is no point of recognizing such objects because those will be
> kmalloc objects and they can only be freed in a subsystem specific way.
> There is no standard way to even figure out which subsystem allocated
> them. So for all practical purposes those are unrecoverable.

Hi Chris,
	This patch is not for hotplug, but is to fix some issues in current
kernel, such as:
	1) make show_mem() on ARM and unicore32 report consistent information
no matter which slab allocator is used.
	2) make /proc/kpagecount and /proc/kpageflags return accurate information.
	3) Get rid of risks in mm/memory_failure.c and arch/ia64/kernel/mca_drv.c
	Thanks!


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB
  2012-07-06  7:29       ` Jiang Liu
@ 2012-07-06 13:50         ` Christoph Lameter
  2012-07-06 15:36           ` Jiang Liu
  0 siblings, 1 reply; 17+ messages in thread
From: Christoph Lameter @ 2012-07-06 13:50 UTC (permalink / raw)
  To: Jiang Liu
  Cc: Jiang Liu, Pekka Enberg, Matt Mackall, Mel Gorman, Yinghai Lu,
	Tony Luck, KAMEZAWA Hiroyuki, KOSAKI Motohiro, David Rientjes,
	Minchan Kim, Keping Chen, linux-mm, linux-kernel

On Fri, 6 Jul 2012, Jiang Liu wrote:

> 	Originally the patch is aimed to fix an issue encountered when
> hot-removing a hot-added memory device. Currently memory hotplug is only
> supported with SPARSE memory model. After offlining all pages of a memory
> section, we need to free resources used by "struct mem_section" itself.
> That is to free section_mem_map and pageblock_flags. For memory section
> created at boot time, section_mem_map and pageblock_flags are allocated
> from bootmem. For memory section created at runtime, section_mem_map
> and pageblock_flags are allocated from slab. So when freeing these
> resources, we use PageSlab() to tell whether there are allocated from slab.
> So free_section_usemap() has following code snippet.
> {
>         usemap_page = virt_to_page(usemap);
>         /*
>          * Check to see if allocation came from hot-plug-add
>          */
>         if (PageSlab(usemap_page)) {

Change this to PageSlab(usemap_page) || PageCompound(usemap_page) and then
the code segment will work. Fallback to the page allocator always implied
the use of compound pages. It would be cleaner if memory hotplug had an
indicator which allocation mechanism was used and would use the
corresponding free action. Slab allocators could put multiple objects into
the slab page (if the structures are sufficiently small). So this is not
that good of a solution.


> 	And when fixing this issue, we found some other usages of PageSlab() may
> have the same issue. For example:
> 	1) /proc/kpageflags and /proc/kpagecount may return incorrect result for
> pages allocated by slab.

Ok then the compound page handling is broken in those.

> 	2) DRBD has following comments. At first glance, it seems that it's
> 	dangerous if PageSlab() to return false for pages allocated by slab.

Again the pages that do not have PageSlab set were not allocated using a
slab allocator. They were allocated by calls to the page allocator.

> 	(With more thinking, the comments is a little out of date because now
> 	put_page/get_page already correctly handle compound pages, so it should
> 	be OK to send pages allocated from slab.)

AFAICT they always handled compound pages correctly.

> 	3) show_mem() on ARM and unicore32 reports much less pages used by slab
> 	if SLUB/SLOB is used instead of SLAB because SLUB/SLOB doesn't mark big
> 	compound pages with PG_slab flag.

Right. That is because SLUB/SLOB lets the page allocator directly
allocator large structures where it would not make sense to use the slab
allocators. The main purpose of the slab allocators is to allocate
objects in fractions of pages. This does not seem to be a use case for
slab objects. Maybe it would be better to directly call the page allocator
for your large structures?

> 	For example, if the memory backing a "struct resource" structure is
> allocated from bootmem, __release_region() shouldn't free the memory into
> slab allocator, otherwise it will trigger panic as below. This issue is
> reproducible when hot-removing a memory device present at boot time on x86
> platforms. On x86 platforms, e820_reserve_resources() allocates bootmem for
> all physical memory resources present at boot time. Later when those memory
> devices are hot-removed, __release_region() will try to free  memory from
> bootmem into slab, so trigger the panic. And a proposed fix is:

Working out how a certain memory structure was allocated could be most
easily done by setting a flag somewhere instead of checking the page flags
of a page that may potentially include multiple slab objects.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 2/4] mm: make consistent use of PG_slab flag
  2012-07-06  8:30         ` Jiang Liu
@ 2012-07-06 13:53           ` Christoph Lameter
  0 siblings, 0 replies; 17+ messages in thread
From: Christoph Lameter @ 2012-07-06 13:53 UTC (permalink / raw)
  To: Jiang Liu
  Cc: Jiang Liu, Pekka Enberg, Matt Mackall, Mel Gorman, Yinghai Lu,
	Tony Luck, KAMEZAWA Hiroyuki, KOSAKI Motohiro, David Rientjes,
	Minchan Kim, Keping Chen, linux-mm, linux-kernel

On Fri, 6 Jul 2012, Jiang Liu wrote:

> 	This patch is not for hotplug, but is to fix some issues in current
> kernel, such as:
> 	1) make show_mem() on ARM and unicore32 report consistent information
> no matter which slab allocator is used.

The information is only different because allocations do not go through
the slab allocators for SLUB/SLOB.

> 	2) make /proc/kpagecount and /proc/kpageflags return accurate information.

Fix the compound handling in those and the numbers will be correct. This
is also good for other issues that may arise because the flags in the
compound head are not considered.

> 	3) Get rid of risks in mm/memory_failure.c and arch/ia64/kernel/mca_drv.c

Assuming that a slab allocation fits into a page is a dangerous
assumption. There are arches with much large page sizes. Please fix the
code.



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB
  2012-07-06 13:50         ` Christoph Lameter
@ 2012-07-06 15:36           ` Jiang Liu
  0 siblings, 0 replies; 17+ messages in thread
From: Jiang Liu @ 2012-07-06 15:36 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Jiang Liu, Pekka Enberg, Matt Mackall, Mel Gorman, Yinghai Lu,
	Tony Luck, KAMEZAWA Hiroyuki, KOSAKI Motohiro, David Rientjes,
	Minchan Kim, Keping Chen, linux-mm, linux-kernel

Hi Chris,
	Really appreciate your suggestions and I will work out a new version to
fix callers of PageSlab() instead of changing the slab allocator.
	Thanks!
	Gerry

On 07/06/2012 09:50 PM, Christoph Lameter wrote:
> On Fri, 6 Jul 2012, Jiang Liu wrote:
> 
>> 	Originally the patch is aimed to fix an issue encountered when
>> hot-removing a hot-added memory device. Currently memory hotplug is only
>> supported with SPARSE memory model. After offlining all pages of a memory
>> section, we need to free resources used by "struct mem_section" itself.
>> That is to free section_mem_map and pageblock_flags. For memory section
>> created at boot time, section_mem_map and pageblock_flags are allocated
>> from bootmem. For memory section created at runtime, section_mem_map
>> and pageblock_flags are allocated from slab. So when freeing these
>> resources, we use PageSlab() to tell whether there are allocated from slab.
>> So free_section_usemap() has following code snippet.
>> {
>>         usemap_page = virt_to_page(usemap);
>>         /*
>>          * Check to see if allocation came from hot-plug-add
>>          */
>>         if (PageSlab(usemap_page)) {
> 
> Change this to PageSlab(usemap_page) || PageCompound(usemap_page) and then
> the code segment will work. Fallback to the page allocator always implied
> the use of compound pages. It would be cleaner if memory hotplug had an
> indicator which allocation mechanism was used and would use the
> corresponding free action. Slab allocators could put multiple objects into
> the slab page (if the structures are sufficiently small). So this is not
> that good of a solution.
> 
> 
>> 	And when fixing this issue, we found some other usages of PageSlab() may
>> have the same issue. For example:
>> 	1) /proc/kpageflags and /proc/kpagecount may return incorrect result for
>> pages allocated by slab.
> 
> Ok then the compound page handling is broken in those.
> 
>> 	2) DRBD has following comments. At first glance, it seems that it's
>> 	dangerous if PageSlab() to return false for pages allocated by slab.
> 
> Again the pages that do not have PageSlab set were not allocated using a
> slab allocator. They were allocated by calls to the page allocator.
> 
>> 	(With more thinking, the comments is a little out of date because now
>> 	put_page/get_page already correctly handle compound pages, so it should
>> 	be OK to send pages allocated from slab.)
> 
> AFAICT they always handled compound pages correctly.
> 
>> 	3) show_mem() on ARM and unicore32 reports much less pages used by slab
>> 	if SLUB/SLOB is used instead of SLAB because SLUB/SLOB doesn't mark big
>> 	compound pages with PG_slab flag.
> 
> Right. That is because SLUB/SLOB lets the page allocator directly
> allocator large structures where it would not make sense to use the slab
> allocators. The main purpose of the slab allocators is to allocate
> objects in fractions of pages. This does not seem to be a use case for
> slab objects. Maybe it would be better to directly call the page allocator
> for your large structures?
> 
>> 	For example, if the memory backing a "struct resource" structure is
>> allocated from bootmem, __release_region() shouldn't free the memory into
>> slab allocator, otherwise it will trigger panic as below. This issue is
>> reproducible when hot-removing a memory device present at boot time on x86
>> platforms. On x86 platforms, e820_reserve_resources() allocates bootmem for
>> all physical memory resources present at boot time. Later when those memory
>> devices are hot-removed, __release_region() will try to free  memory from
>> bootmem into slab, so trigger the panic. And a proposed fix is:
> 
> Working out how a certain memory structure was allocated could be most
> easily done by setting a flag somewhere instead of checking the page flags
> of a page that may potentially include multiple slab objects.
> 



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB
  2012-07-03  3:57 [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB Jiang Liu
                   ` (3 preceding siblings ...)
  2012-07-05 14:45 ` [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB Christoph Lameter
@ 2012-09-04  9:18 ` Wen Congyang
  2012-09-04 12:13   ` Jiang Liu
  4 siblings, 1 reply; 17+ messages in thread
From: Wen Congyang @ 2012-09-04  9:18 UTC (permalink / raw)
  To: Jiang Liu
  Cc: Christoph Lameter, Pekka Enberg, Matt Mackall, Mel Gorman,
	Yinghai Lu, Tony Luck, KAMEZAWA Hiroyuki, KOSAKI Motohiro,
	David Rientjes, Minchan Kim, Keping Chen, linux-mm, linux-kernel,
	Jiang Liu

At 07/03/2012 11:57 AM, Jiang Liu Wrote:
> Several subsystems, including memory-failure, swap, sparse, DRBD etc,
> use PageSlab() to check whether a page is managed by SLAB/SLUB/SLOB.
> And they treat slab pages differently from pagecache/anonymous pages.
> 
> But it's unsafe to use PageSlab() to detect whether a page is managed by
> SLUB. SLUB allocates compound pages when page order is bigger than 0 and
> only sets PG_slab on head pages. So if a SLUB object is hosted by a tail
> page, PageSlab() will incorrectly return false for that object.
> 
> Following code from sparse.c triggers this issue, which causes failure
> when removing a hot-added memory device.

Hi, Liu

What is the status of this patch?
I encounter the same problem when removing a hot-added memory device. It
causes the kernel panicked

Thanks
Wen Congyang

>         /*
>          * Check to see if allocation came from hot-plug-add
>          */
>         if (PageSlab(usemap_page)) {
>                 kfree(usemap);
>                 if (memmap)
>                         __kfree_section_memmap(memmap, PAGES_PER_SECTION);
>                 return;
>         }
> 
> So introduce a transparent huge page and compound page safe macro as below
> to check whether a page is managed by SLAB/SLUB/SLOB allocator.
> 
> #define page_managed_by_slab(page)     (!!PageSlab(compound_trans_head(page)))
> 
> Signed-off-by: Jiang Liu <liuj97@gmail.com>
> ---
>  arch/arm/mm/init.c             |    3 ++-
>  arch/ia64/kernel/mca_drv.c     |    2 +-
>  arch/unicore32/mm/init.c       |    3 ++-
>  crypto/scatterwalk.c           |    2 +-
>  drivers/ata/libata-sff.c       |    3 ++-
>  drivers/block/drbd/drbd_main.c |    3 ++-
>  fs/proc/page.c                 |    4 ++--
>  include/linux/slab.h           |    7 +++++++
>  mm/memory-failure.c            |    6 +++---
>  mm/sparse.c                    |    4 +---
>  10 files changed, 23 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
> index f54d592..73ff340 100644
> --- a/arch/arm/mm/init.c
> +++ b/arch/arm/mm/init.c
> @@ -18,6 +18,7 @@
>  #include <linux/initrd.h>
>  #include <linux/of_fdt.h>
>  #include <linux/highmem.h>
> +#include <linux/huge_mm.h>
>  #include <linux/gfp.h>
>  #include <linux/memblock.h>
>  #include <linux/dma-contiguous.h>
> @@ -116,7 +117,7 @@ void show_mem(unsigned int filter)
>  				reserved++;
>  			else if (PageSwapCache(page))
>  				cached++;
> -			else if (PageSlab(page))
> +			else if (page_managed_by_slab(page))
>  				slab++;
>  			else if (!page_count(page))
>  				free++;
> diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
> index 1c2e894..4415bb6 100644
> --- a/arch/ia64/kernel/mca_drv.c
> +++ b/arch/ia64/kernel/mca_drv.c
> @@ -136,7 +136,7 @@ mca_page_isolate(unsigned long paddr)
>  		return ISOLATE_NG;
>  
>  	/* kick pages having attribute 'SLAB' or 'Reserved' */
> -	if (PageSlab(p) || PageReserved(p))
> +	if (page_managed_by_slab(p) || PageReserved(p))
>  		return ISOLATE_NG;
>  
>  	/* add attribute 'Reserved' and register the page */
> diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
> index de186bd..829a0d9 100644
> --- a/arch/unicore32/mm/init.c
> +++ b/arch/unicore32/mm/init.c
> @@ -21,6 +21,7 @@
>  #include <linux/sort.h>
>  #include <linux/dma-mapping.h>
>  #include <linux/export.h>
> +#include <linux/huge_mm.h>
>  
>  #include <asm/sections.h>
>  #include <asm/setup.h>
> @@ -83,7 +84,7 @@ void show_mem(unsigned int filter)
>  				reserved++;
>  			else if (PageSwapCache(page))
>  				cached++;
> -			else if (PageSlab(page))
> +			else if (page_managed_by_slab(page))
>  				slab++;
>  			else if (!page_count(page))
>  				free++;
> diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
> index 7281b8a..a20e019 100644
> --- a/crypto/scatterwalk.c
> +++ b/crypto/scatterwalk.c
> @@ -54,7 +54,7 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
>  		struct page *page;
>  
>  		page = sg_page(walk->sg) + ((walk->offset - 1) >> PAGE_SHIFT);
> -		if (!PageSlab(page))
> +		if (!page_managed_by_slab(page))
>  			flush_dcache_page(page);
>  	}
>  
> diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
> index d8af325..1ab8378 100644
> --- a/drivers/ata/libata-sff.c
> +++ b/drivers/ata/libata-sff.c
> @@ -38,6 +38,7 @@
>  #include <linux/module.h>
>  #include <linux/libata.h>
>  #include <linux/highmem.h>
> +#include <linux/huge_mm.h>
>  
>  #include "libata.h"
>  
> @@ -734,7 +735,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
>  				       do_write);
>  	}
>  
> -	if (!do_write && !PageSlab(page))
> +	if (!do_write && !page_managed_by_slab(page))
>  		flush_dcache_page(page);
>  
>  	qc->curbytes += qc->sect_size;
> diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
> index 920ede2..de5b395 100644
> --- a/drivers/block/drbd/drbd_main.c
> +++ b/drivers/block/drbd/drbd_main.c
> @@ -2734,7 +2734,8 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
>  	 * put_page(); and would cause either a VM_BUG directly, or
>  	 * __page_cache_release a page that would actually still be referenced
>  	 * by someone, leading to some obscure delayed Oops somewhere else. */
> -	if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
> +	if (disable_sendpage || (page_count(page) < 1) ||
> +	    page_managed_by_slab(page))
>  		return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
>  
>  	msg_flags |= MSG_NOSIGNAL;
> diff --git a/fs/proc/page.c b/fs/proc/page.c
> index 7fcd0d6..ae42dc7 100644
> --- a/fs/proc/page.c
> +++ b/fs/proc/page.c
> @@ -40,7 +40,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
>  			ppage = pfn_to_page(pfn);
>  		else
>  			ppage = NULL;
> -		if (!ppage || PageSlab(ppage))
> +		if (!ppage || page_managed_by_slab(ppage))
>  			pcount = 0;
>  		else
>  			pcount = page_mapcount(ppage);
> @@ -98,7 +98,7 @@ u64 stable_page_flags(struct page *page)
>  	 * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the
>  	 * simple test in page_mapped() is not enough.
>  	 */
> -	if (!PageSlab(page) && page_mapped(page))
> +	if (!page_managed_by_slab(page) && page_mapped(page))
>  		u |= 1 << KPF_MMAP;
>  	if (PageAnon(page))
>  		u |= 1 << KPF_ANON;
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 67d5d94..bb26fab 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -364,4 +364,11 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
>  
>  void __init kmem_cache_init_late(void);
>  
> +/*
> + * Check whether a page is allocated/managed by SLAB/SLUB/SLOB allocator.
> + * Defined as macro instead of function to avoid header file pollution.
> + */
> +#define page_managed_by_slab(page)	(!!PageSlab(compound_trans_head(page)))
> +#define mem_managed_by_slab(addr)	page_managed_by_slab(virt_to_page(addr))
> +
>  #endif	/* _LINUX_SLAB_H */
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index ab1e714..684e7f7 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -88,7 +88,7 @@ static int hwpoison_filter_dev(struct page *p)
>  	/*
>  	 * page_mapping() does not accept slab pages.
>  	 */
> -	if (PageSlab(p))
> +	if (page_managed_by_slab(p))
>  		return -EINVAL;
>  
>  	mapping = page_mapping(p);
> @@ -233,7 +233,7 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
>   */
>  void shake_page(struct page *p, int access)
>  {
> -	if (!PageSlab(p)) {
> +	if (!page_managed_by_slab(p)) {
>  		lru_add_drain_all();
>  		if (PageLRU(p))
>  			return;
> @@ -862,7 +862,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
>  	struct page *hpage = compound_head(p);
>  	struct page *ppage;
>  
> -	if (PageReserved(p) || PageSlab(p))
> +	if (PageReserved(p) || page_managed_by_slab(p))
>  		return SWAP_SUCCESS;
>  
>  	/*
> diff --git a/mm/sparse.c b/mm/sparse.c
> index 6a4bf91..32a908b 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -688,17 +688,15 @@ static void free_map_bootmem(struct page *page, unsigned long nr_pages)
>  
>  static void free_section_usemap(struct page *memmap, unsigned long *usemap)
>  {
> -	struct page *usemap_page;
>  	unsigned long nr_pages;
>  
>  	if (!usemap)
>  		return;
>  
> -	usemap_page = virt_to_page(usemap);
>  	/*
>  	 * Check to see if allocation came from hot-plug-add
>  	 */
> -	if (PageSlab(usemap_page)) {
> +	if (mem_managed_by_slab(usemap)) {
>  		kfree(usemap);
>  		if (memmap)
>  			__kfree_section_memmap(memmap, PAGES_PER_SECTION);


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB
  2012-09-04  9:18 ` Wen Congyang
@ 2012-09-04 12:13   ` Jiang Liu
  0 siblings, 0 replies; 17+ messages in thread
From: Jiang Liu @ 2012-09-04 12:13 UTC (permalink / raw)
  To: Wen Congyang
  Cc: Christoph Lameter, Pekka Enberg, Matt Mackall, Mel Gorman,
	Yinghai Lu, Tony Luck, KAMEZAWA Hiroyuki, KOSAKI Motohiro,
	David Rientjes, Minchan Kim, Keping Chen, linux-mm, linux-kernel,
	Jiang Liu

Need to simplify the patch set:)
Busy with PCI patches recently and will work on this when PCI is done.

On 2012-9-4 17:18, Wen Congyang wrote:
> At 07/03/2012 11:57 AM, Jiang Liu Wrote:
>> Several subsystems, including memory-failure, swap, sparse, DRBD etc,
>> use PageSlab() to check whether a page is managed by SLAB/SLUB/SLOB.
>> And they treat slab pages differently from pagecache/anonymous pages.
>>
>> But it's unsafe to use PageSlab() to detect whether a page is managed by
>> SLUB. SLUB allocates compound pages when page order is bigger than 0 and
>> only sets PG_slab on head pages. So if a SLUB object is hosted by a tail
>> page, PageSlab() will incorrectly return false for that object.
>>
>> Following code from sparse.c triggers this issue, which causes failure
>> when removing a hot-added memory device.
> 
> Hi, Liu
> 
> What is the status of this patch?
> I encounter the same problem when removing a hot-added memory device. It
> causes the kernel panicked
> 
> Thanks
> Wen Congyang
> 
>>         /*
>>          * Check to see if allocation came from hot-plug-add
>>          */
>>         if (PageSlab(usemap_page)) {
>>                 kfree(usemap);
>>                 if (memmap)
>>                         __kfree_section_memmap(memmap, PAGES_PER_SECTION);
>>                 return;
>>         }
>>
>> So introduce a transparent huge page and compound page safe macro as below
>> to check whether a page is managed by SLAB/SLUB/SLOB allocator.
>>
>> #define page_managed_by_slab(page)     (!!PageSlab(compound_trans_head(page)))
>>
>> Signed-off-by: Jiang Liu <liuj97@gmail.com>
>> ---
>>  arch/arm/mm/init.c             |    3 ++-
>>  arch/ia64/kernel/mca_drv.c     |    2 +-
>>  arch/unicore32/mm/init.c       |    3 ++-
>>  crypto/scatterwalk.c           |    2 +-
>>  drivers/ata/libata-sff.c       |    3 ++-
>>  drivers/block/drbd/drbd_main.c |    3 ++-
>>  fs/proc/page.c                 |    4 ++--
>>  include/linux/slab.h           |    7 +++++++
>>  mm/memory-failure.c            |    6 +++---
>>  mm/sparse.c                    |    4 +---
>>  10 files changed, 23 insertions(+), 14 deletions(-)
>>
>> diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
>> index f54d592..73ff340 100644
>> --- a/arch/arm/mm/init.c
>> +++ b/arch/arm/mm/init.c
>> @@ -18,6 +18,7 @@
>>  #include <linux/initrd.h>
>>  #include <linux/of_fdt.h>
>>  #include <linux/highmem.h>
>> +#include <linux/huge_mm.h>
>>  #include <linux/gfp.h>
>>  #include <linux/memblock.h>
>>  #include <linux/dma-contiguous.h>
>> @@ -116,7 +117,7 @@ void show_mem(unsigned int filter)
>>  				reserved++;
>>  			else if (PageSwapCache(page))
>>  				cached++;
>> -			else if (PageSlab(page))
>> +			else if (page_managed_by_slab(page))
>>  				slab++;
>>  			else if (!page_count(page))
>>  				free++;
>> diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
>> index 1c2e894..4415bb6 100644
>> --- a/arch/ia64/kernel/mca_drv.c
>> +++ b/arch/ia64/kernel/mca_drv.c
>> @@ -136,7 +136,7 @@ mca_page_isolate(unsigned long paddr)
>>  		return ISOLATE_NG;
>>  
>>  	/* kick pages having attribute 'SLAB' or 'Reserved' */
>> -	if (PageSlab(p) || PageReserved(p))
>> +	if (page_managed_by_slab(p) || PageReserved(p))
>>  		return ISOLATE_NG;
>>  
>>  	/* add attribute 'Reserved' and register the page */
>> diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
>> index de186bd..829a0d9 100644
>> --- a/arch/unicore32/mm/init.c
>> +++ b/arch/unicore32/mm/init.c
>> @@ -21,6 +21,7 @@
>>  #include <linux/sort.h>
>>  #include <linux/dma-mapping.h>
>>  #include <linux/export.h>
>> +#include <linux/huge_mm.h>
>>  
>>  #include <asm/sections.h>
>>  #include <asm/setup.h>
>> @@ -83,7 +84,7 @@ void show_mem(unsigned int filter)
>>  				reserved++;
>>  			else if (PageSwapCache(page))
>>  				cached++;
>> -			else if (PageSlab(page))
>> +			else if (page_managed_by_slab(page))
>>  				slab++;
>>  			else if (!page_count(page))
>>  				free++;
>> diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
>> index 7281b8a..a20e019 100644
>> --- a/crypto/scatterwalk.c
>> +++ b/crypto/scatterwalk.c
>> @@ -54,7 +54,7 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
>>  		struct page *page;
>>  
>>  		page = sg_page(walk->sg) + ((walk->offset - 1) >> PAGE_SHIFT);
>> -		if (!PageSlab(page))
>> +		if (!page_managed_by_slab(page))
>>  			flush_dcache_page(page);
>>  	}
>>  
>> diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
>> index d8af325..1ab8378 100644
>> --- a/drivers/ata/libata-sff.c
>> +++ b/drivers/ata/libata-sff.c
>> @@ -38,6 +38,7 @@
>>  #include <linux/module.h>
>>  #include <linux/libata.h>
>>  #include <linux/highmem.h>
>> +#include <linux/huge_mm.h>
>>  
>>  #include "libata.h"
>>  
>> @@ -734,7 +735,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
>>  				       do_write);
>>  	}
>>  
>> -	if (!do_write && !PageSlab(page))
>> +	if (!do_write && !page_managed_by_slab(page))
>>  		flush_dcache_page(page);
>>  
>>  	qc->curbytes += qc->sect_size;
>> diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
>> index 920ede2..de5b395 100644
>> --- a/drivers/block/drbd/drbd_main.c
>> +++ b/drivers/block/drbd/drbd_main.c
>> @@ -2734,7 +2734,8 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
>>  	 * put_page(); and would cause either a VM_BUG directly, or
>>  	 * __page_cache_release a page that would actually still be referenced
>>  	 * by someone, leading to some obscure delayed Oops somewhere else. */
>> -	if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
>> +	if (disable_sendpage || (page_count(page) < 1) ||
>> +	    page_managed_by_slab(page))
>>  		return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
>>  
>>  	msg_flags |= MSG_NOSIGNAL;
>> diff --git a/fs/proc/page.c b/fs/proc/page.c
>> index 7fcd0d6..ae42dc7 100644
>> --- a/fs/proc/page.c
>> +++ b/fs/proc/page.c
>> @@ -40,7 +40,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
>>  			ppage = pfn_to_page(pfn);
>>  		else
>>  			ppage = NULL;
>> -		if (!ppage || PageSlab(ppage))
>> +		if (!ppage || page_managed_by_slab(ppage))
>>  			pcount = 0;
>>  		else
>>  			pcount = page_mapcount(ppage);
>> @@ -98,7 +98,7 @@ u64 stable_page_flags(struct page *page)
>>  	 * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the
>>  	 * simple test in page_mapped() is not enough.
>>  	 */
>> -	if (!PageSlab(page) && page_mapped(page))
>> +	if (!page_managed_by_slab(page) && page_mapped(page))
>>  		u |= 1 << KPF_MMAP;
>>  	if (PageAnon(page))
>>  		u |= 1 << KPF_ANON;
>> diff --git a/include/linux/slab.h b/include/linux/slab.h
>> index 67d5d94..bb26fab 100644
>> --- a/include/linux/slab.h
>> +++ b/include/linux/slab.h
>> @@ -364,4 +364,11 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
>>  
>>  void __init kmem_cache_init_late(void);
>>  
>> +/*
>> + * Check whether a page is allocated/managed by SLAB/SLUB/SLOB allocator.
>> + * Defined as macro instead of function to avoid header file pollution.
>> + */
>> +#define page_managed_by_slab(page)	(!!PageSlab(compound_trans_head(page)))
>> +#define mem_managed_by_slab(addr)	page_managed_by_slab(virt_to_page(addr))
>> +
>>  #endif	/* _LINUX_SLAB_H */
>> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
>> index ab1e714..684e7f7 100644
>> --- a/mm/memory-failure.c
>> +++ b/mm/memory-failure.c
>> @@ -88,7 +88,7 @@ static int hwpoison_filter_dev(struct page *p)
>>  	/*
>>  	 * page_mapping() does not accept slab pages.
>>  	 */
>> -	if (PageSlab(p))
>> +	if (page_managed_by_slab(p))
>>  		return -EINVAL;
>>  
>>  	mapping = page_mapping(p);
>> @@ -233,7 +233,7 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
>>   */
>>  void shake_page(struct page *p, int access)
>>  {
>> -	if (!PageSlab(p)) {
>> +	if (!page_managed_by_slab(p)) {
>>  		lru_add_drain_all();
>>  		if (PageLRU(p))
>>  			return;
>> @@ -862,7 +862,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
>>  	struct page *hpage = compound_head(p);
>>  	struct page *ppage;
>>  
>> -	if (PageReserved(p) || PageSlab(p))
>> +	if (PageReserved(p) || page_managed_by_slab(p))
>>  		return SWAP_SUCCESS;
>>  
>>  	/*
>> diff --git a/mm/sparse.c b/mm/sparse.c
>> index 6a4bf91..32a908b 100644
>> --- a/mm/sparse.c
>> +++ b/mm/sparse.c
>> @@ -688,17 +688,15 @@ static void free_map_bootmem(struct page *page, unsigned long nr_pages)
>>  
>>  static void free_section_usemap(struct page *memmap, unsigned long *usemap)
>>  {
>> -	struct page *usemap_page;
>>  	unsigned long nr_pages;
>>  
>>  	if (!usemap)
>>  		return;
>>  
>> -	usemap_page = virt_to_page(usemap);
>>  	/*
>>  	 * Check to see if allocation came from hot-plug-add
>>  	 */
>> -	if (PageSlab(usemap_page)) {
>> +	if (mem_managed_by_slab(usemap)) {
>>  		kfree(usemap);
>>  		if (memmap)
>>  			__kfree_section_memmap(memmap, PAGES_PER_SECTION);
> 
> 
> .
> 



^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2012-09-04 12:15 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-03  3:57 [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB Jiang Liu
2012-07-03  3:57 ` [RFC PATCH 2/4] mm: make consistent use of PG_slab flag Jiang Liu
2012-07-05 14:47   ` Christoph Lameter
2012-07-05 16:15     ` Jiang Liu
2012-07-05 17:37       ` Christoph Lameter
2012-07-06  8:30         ` Jiang Liu
2012-07-06 13:53           ` Christoph Lameter
2012-07-03  3:57 ` [RFC PATCH 3/4] SLAB: minor code cleanup Jiang Liu
2012-07-03  3:57 ` [RFC PATCH 4/4] mm: change slob's struct page definition to accomodate struct page changes Jiang Liu
2012-07-05 14:45 ` [RFC PATCH 1/4] mm: introduce a safer interface to check whether a page is managed by SLxB Christoph Lameter
2012-07-05 15:55   ` Jiang Liu
2012-07-05 17:36     ` Christoph Lameter
2012-07-06  7:29       ` Jiang Liu
2012-07-06 13:50         ` Christoph Lameter
2012-07-06 15:36           ` Jiang Liu
2012-09-04  9:18 ` Wen Congyang
2012-09-04 12:13   ` Jiang Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).