* [PATCH 1/3] mm, debug_pagelloc: use static keys to enable debugging
2019-06-03 14:34 [PATCH 0/3] debug_pagealloc improvements Vlastimil Babka
@ 2019-06-03 14:34 ` Vlastimil Babka
2019-06-03 14:34 ` [PATCH 2/3] mm, page_alloc: more extensive free page checking with debug_pagealloc Vlastimil Babka
2019-06-03 14:34 ` [PATCH 3/3] mm, debug_pagealloc: use a page type instead of page_ext flag Vlastimil Babka
2 siblings, 0 replies; 4+ messages in thread
From: Vlastimil Babka @ 2019-06-03 14:34 UTC (permalink / raw)
To: linux-mm
Cc: linux-kernel, Andrew Morton, Kirill A. Shutemov, Michal Hocko,
Vlastimil Babka, Joonsoo Kim, Matthew Wilcox, Mel Gorman
CONFIG_DEBUG_PAGEALLOC has been redesigned by 031bc5743f15
("mm/debug-pagealloc: make debug-pagealloc boottime configurable") to allow
being always enabled in a distro kernel, but only perform its expensive
functionality when booted with debug_pagelloc=on. We can further reduce
the overhead when not boot-enabled (including page allocator fast paths) using
static keys. This patch introduces one for debug_pagealloc core functionality,
and another for the optional guard page functionality (enabled by booting with
debug_guardpage_minorder=X).
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
---
include/linux/mm.h | 15 +++++++++++----
mm/page_alloc.c | 23 +++++++++++++++++------
2 files changed, 28 insertions(+), 10 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0e8834ac32b7..c71ed22769f3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2685,11 +2685,18 @@ static inline void kernel_poison_pages(struct page *page, int numpages,
int enable) { }
#endif
-extern bool _debug_pagealloc_enabled;
+#ifdef CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT
+DECLARE_STATIC_KEY_TRUE(_debug_pagealloc_enabled);
+#else
+DECLARE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
+#endif
static inline bool debug_pagealloc_enabled(void)
{
- return IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) && _debug_pagealloc_enabled;
+ if (!IS_ENABLED(CONFIG_DEBUG_PAGEALLOC))
+ return false;
+
+ return static_branch_unlikely(&_debug_pagealloc_enabled);
}
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP)
@@ -2843,7 +2850,7 @@ extern struct page_ext_operations debug_guardpage_ops;
#ifdef CONFIG_DEBUG_PAGEALLOC
extern unsigned int _debug_guardpage_minorder;
-extern bool _debug_guardpage_enabled;
+DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
static inline unsigned int debug_guardpage_minorder(void)
{
@@ -2852,7 +2859,7 @@ static inline unsigned int debug_guardpage_minorder(void)
static inline bool debug_guardpage_enabled(void)
{
- return _debug_guardpage_enabled;
+ return static_branch_unlikely(&_debug_guardpage_enabled);
}
static inline bool page_is_guard(struct page *page)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d66bc8abe0af..639f1f9e74c5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -646,16 +646,27 @@ void prep_compound_page(struct page *page, unsigned int order)
#ifdef CONFIG_DEBUG_PAGEALLOC
unsigned int _debug_guardpage_minorder;
-bool _debug_pagealloc_enabled __read_mostly
- = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT
+DEFINE_STATIC_KEY_TRUE(_debug_pagealloc_enabled);
+#else
+DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
+#endif
EXPORT_SYMBOL(_debug_pagealloc_enabled);
-bool _debug_guardpage_enabled __read_mostly;
+
+DEFINE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
static int __init early_debug_pagealloc(char *buf)
{
- if (!buf)
+ bool enable = false;
+
+ if (kstrtobool(buf, &enable))
return -EINVAL;
- return kstrtobool(buf, &_debug_pagealloc_enabled);
+
+ if (enable)
+ static_branch_enable(&_debug_pagealloc_enabled);
+
+ return 0;
}
early_param("debug_pagealloc", early_debug_pagealloc);
@@ -679,7 +690,7 @@ static void init_debug_guardpage(void)
if (!debug_guardpage_minorder())
return;
- _debug_guardpage_enabled = true;
+ static_branch_enable(&_debug_guardpage_enabled);
}
struct page_ext_operations debug_guardpage_ops = {
--
2.21.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] mm, page_alloc: more extensive free page checking with debug_pagealloc
2019-06-03 14:34 [PATCH 0/3] debug_pagealloc improvements Vlastimil Babka
2019-06-03 14:34 ` [PATCH 1/3] mm, debug_pagelloc: use static keys to enable debugging Vlastimil Babka
@ 2019-06-03 14:34 ` Vlastimil Babka
2019-06-03 14:34 ` [PATCH 3/3] mm, debug_pagealloc: use a page type instead of page_ext flag Vlastimil Babka
2 siblings, 0 replies; 4+ messages in thread
From: Vlastimil Babka @ 2019-06-03 14:34 UTC (permalink / raw)
To: linux-mm
Cc: linux-kernel, Andrew Morton, Kirill A. Shutemov, Michal Hocko,
Vlastimil Babka, Mel Gorman, Joonsoo Kim, Matthew Wilcox
The page allocator checks struct pages for expected state (mapcount, flags etc)
as pages are being allocated (check_new_page()) and freed (free_pages_check())
to provide some defense against errors in page allocator users. Prior commits
479f854a207c ("mm, page_alloc: defer debugging checks of pages allocated from
the PCP") and 4db7548ccbd9 ("mm, page_alloc: defer debugging checks of freed
pages until a PCP drain") this has happened for order-0 pages as they were
allocated from or freed to the per-cpu caches (pcplists). Since those are fast
paths, the checks are now performed only when pages are moved between pcplists
and global free lists. This however lowers the chances of catching errors soon
enough.
In order to increase the chances of the checks to catch errors, the kernel has
to be rebuilt with CONFIG_DEBUG_VM, which also enables multiple other internal
debug checks (VM_BUG_ON() etc), which is suboptimal when the goal is to catch
errors in mm users, not in mm code itself.
To catch some wrong users of page allocator, we have CONFIG_DEBUG_PAGEALLOC,
which is designed to have virtually no overhead unless enabled at boot time.
Memory corruptions when writing to freed pages have often the same underlying
errors (use-after-free, double free) as corrupting the corresponding struct
pages, so this existing debugging functionality is a good fit to extend by
also perform struct page checks at least as often as if CONFIG_DEBUG_VM was
enabled.
Specifically, after this patch, when debug_pagealloc is enabled on boot, and
CONFIG_DEBUG_VM disabled, pages are checked when allocated from or freed to the
pcplists *in addition* to being moved between pcplists and free lists. When
both debug_pagealloc and CONFIG_DEBUG_VM are enabled, pages are checked when
being moved between pcplists and free lists *in addition* to when allocated
from or freed to the pcplists.
When debug_pagealloc is not enabled on boot, the overhead in fast paths should
be virtually none thanks to the use of static key.
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Mel Gorman <mgorman@techsingularity.net>
---
mm/Kconfig.debug | 13 ++++++++----
mm/page_alloc.c | 53 +++++++++++++++++++++++++++++++++++++++---------
2 files changed, 52 insertions(+), 14 deletions(-)
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index fa6d79281368..a35ab6c55192 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -19,12 +19,17 @@ config DEBUG_PAGEALLOC
Depending on runtime enablement, this results in a small or large
slowdown, but helps to find certain types of memory corruption.
+ Also, the state of page tracking structures is checked more often as
+ pages are being allocated and freed, as unexpected state changes
+ often happen for same reasons as memory corruption (e.g. double free,
+ use-after-free).
+
For architectures which don't enable ARCH_SUPPORTS_DEBUG_PAGEALLOC,
fill the pages with poison patterns after free_pages() and verify
- the patterns before alloc_pages(). Additionally,
- this option cannot be enabled in combination with hibernation as
- that would result in incorrect warnings of memory corruption after
- a resume because free pages are not saved to the suspend image.
+ the patterns before alloc_pages(). Additionally, this option cannot
+ be enabled in combination with hibernation as that would result in
+ incorrect warnings of memory corruption after a resume because free
+ pages are not saved to the suspend image.
By default this option will have a small overhead, e.g. by not
allowing the kernel mapping to be backed by large pages on some
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 639f1f9e74c5..e6248e391358 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1162,19 +1162,36 @@ static __always_inline bool free_pages_prepare(struct page *page,
}
#ifdef CONFIG_DEBUG_VM
-static inline bool free_pcp_prepare(struct page *page)
+/*
+ * With DEBUG_VM enabled, order-0 pages are checked immediately when being freed
+ * to pcp lists. With debug_pagealloc also enabled, they are also rechecked when
+ * moved from pcp lists to free lists.
+ */
+static bool free_pcp_prepare(struct page *page)
{
return free_pages_prepare(page, 0, true);
}
-static inline bool bulkfree_pcp_prepare(struct page *page)
+static bool bulkfree_pcp_prepare(struct page *page)
{
- return false;
+ if (debug_pagealloc_enabled())
+ return free_pages_check(page);
+ else
+ return false;
}
#else
+/*
+ * With DEBUG_VM disabled, order-0 pages being freed are checked only when
+ * moving from pcp lists to free list in order to reduce overhead. With
+ * debug_pagealloc enabled, they are checked also immediately when being freed
+ * to the pcp lists.
+ */
static bool free_pcp_prepare(struct page *page)
{
- return free_pages_prepare(page, 0, false);
+ if (debug_pagealloc_enabled())
+ return free_pages_prepare(page, 0, true);
+ else
+ return free_pages_prepare(page, 0, false);
}
static bool bulkfree_pcp_prepare(struct page *page)
@@ -2036,23 +2053,39 @@ static inline bool free_pages_prezeroed(void)
}
#ifdef CONFIG_DEBUG_VM
-static bool check_pcp_refill(struct page *page)
+/*
+ * With DEBUG_VM enabled, order-0 pages are checked for expected state when
+ * being allocated from pcp lists. With debug_pagealloc also enabled, they are
+ * also checked when pcp lists are refilled from the free lists.
+ */
+static inline bool check_pcp_refill(struct page *page)
{
- return false;
+ if (debug_pagealloc_enabled())
+ return check_new_page(page);
+ else
+ return false;
}
-static bool check_new_pcp(struct page *page)
+static inline bool check_new_pcp(struct page *page)
{
return check_new_page(page);
}
#else
-static bool check_pcp_refill(struct page *page)
+/*
+ * With DEBUG_VM disabled, free order-0 pages are checked for expected state
+ * when pcp lists are being refilled from the free lists. With debug_pagealloc
+ * enabled, they are also checked when being allocated from the pcp lists.
+ */
+static inline bool check_pcp_refill(struct page *page)
{
return check_new_page(page);
}
-static bool check_new_pcp(struct page *page)
+static inline bool check_new_pcp(struct page *page)
{
- return false;
+ if (debug_pagealloc_enabled())
+ return check_new_page(page);
+ else
+ return false;
}
#endif /* CONFIG_DEBUG_VM */
--
2.21.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] mm, debug_pagealloc: use a page type instead of page_ext flag
2019-06-03 14:34 [PATCH 0/3] debug_pagealloc improvements Vlastimil Babka
2019-06-03 14:34 ` [PATCH 1/3] mm, debug_pagelloc: use static keys to enable debugging Vlastimil Babka
2019-06-03 14:34 ` [PATCH 2/3] mm, page_alloc: more extensive free page checking with debug_pagealloc Vlastimil Babka
@ 2019-06-03 14:34 ` Vlastimil Babka
2 siblings, 0 replies; 4+ messages in thread
From: Vlastimil Babka @ 2019-06-03 14:34 UTC (permalink / raw)
To: linux-mm
Cc: linux-kernel, Andrew Morton, Kirill A. Shutemov, Michal Hocko,
Vlastimil Babka, Joonsoo Kim, Matthew Wilcox, Mel Gorman
When debug_pagealloc is enabled, we currently allocate the page_ext array to
mark guard pages with the PAGE_EXT_DEBUG_GUARD flag. Now that we have the
page_type field in struct page, we can use that instead, as guard pages are
neither PageSlab nor mapped to userspace. This reduces memory overhead when
debug_pagealloc is enabled and there are no other features requiring the
page_ext array.
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Matthew Wilcox <willy@infradead.org>
---
.../admin-guide/kernel-parameters.txt | 10 ++---
include/linux/mm.h | 10 +----
include/linux/page-flags.h | 6 +++
include/linux/page_ext.h | 1 -
mm/Kconfig.debug | 1 -
mm/page_alloc.c | 40 +++----------------
mm/page_ext.c | 3 --
7 files changed, 17 insertions(+), 54 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..32003e76ba3b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -805,12 +805,10 @@
tracking down these problems.
debug_pagealloc=
- [KNL] When CONFIG_DEBUG_PAGEALLOC is set, this
- parameter enables the feature at boot time. In
- default, it is disabled. We can avoid allocating huge
- chunk of memory for debug pagealloc if we don't enable
- it at boot time and the system will work mostly same
- with the kernel built without CONFIG_DEBUG_PAGEALLOC.
+ [KNL] When CONFIG_DEBUG_PAGEALLOC is set, this parameter
+ enables the feature at boot time. By default, it is
+ disabled and the system will work mostly the same as a
+ kernel built without CONFIG_DEBUG_PAGEALLOC.
on: enable the feature
debugpat [X86] Enable PAT debugging
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c71ed22769f3..2ba991e687db 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2846,8 +2846,6 @@ extern long copy_huge_page_from_user(struct page *dst_page,
bool allow_pagefault);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
-extern struct page_ext_operations debug_guardpage_ops;
-
#ifdef CONFIG_DEBUG_PAGEALLOC
extern unsigned int _debug_guardpage_minorder;
DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
@@ -2864,16 +2862,10 @@ static inline bool debug_guardpage_enabled(void)
static inline bool page_is_guard(struct page *page)
{
- struct page_ext *page_ext;
-
if (!debug_guardpage_enabled())
return false;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
- return false;
-
- return test_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
+ return PageGuard(page);
}
#else
static inline unsigned int debug_guardpage_minorder(void) { return 0; }
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 9f8712a4b1a5..b848517da64c 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -703,6 +703,7 @@ PAGEFLAG_FALSE(DoubleMap)
#define PG_offline 0x00000100
#define PG_kmemcg 0x00000200
#define PG_table 0x00000400
+#define PG_guard 0x00000800
#define PageType(page, flag) \
((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
@@ -754,6 +755,11 @@ PAGE_TYPE_OPS(Kmemcg, kmemcg)
*/
PAGE_TYPE_OPS(Table, table)
+/*
+ * Marks guardpages used with debug_pagealloc.
+ */
+PAGE_TYPE_OPS(Guard, guard)
+
extern bool is_free_buddy_page(struct page *page);
__PAGEFLAG(Isolated, isolated, PF_ANY);
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index f84f167ec04c..09592951725c 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -17,7 +17,6 @@ struct page_ext_operations {
#ifdef CONFIG_PAGE_EXTENSION
enum page_ext_flags {
- PAGE_EXT_DEBUG_GUARD,
PAGE_EXT_OWNER,
#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
PAGE_EXT_YOUNG,
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index a35ab6c55192..82b6a20898bd 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -12,7 +12,6 @@ config DEBUG_PAGEALLOC
bool "Debug page memory allocations"
depends on DEBUG_KERNEL
depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC
- select PAGE_EXTENSION
select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC
---help---
Unmap pages from the kernel linear mapping after free_pages().
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e6248e391358..b178f297df68 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -50,7 +50,6 @@
#include <linux/backing-dev.h>
#include <linux/fault-inject.h>
#include <linux/page-isolation.h>
-#include <linux/page_ext.h>
#include <linux/debugobjects.h>
#include <linux/kmemleak.h>
#include <linux/compaction.h>
@@ -670,18 +669,6 @@ static int __init early_debug_pagealloc(char *buf)
}
early_param("debug_pagealloc", early_debug_pagealloc);
-static bool need_debug_guardpage(void)
-{
- /* If we don't use debug_pagealloc, we don't need guard page */
- if (!debug_pagealloc_enabled())
- return false;
-
- if (!debug_guardpage_minorder())
- return false;
-
- return true;
-}
-
static void init_debug_guardpage(void)
{
if (!debug_pagealloc_enabled())
@@ -693,11 +680,6 @@ static void init_debug_guardpage(void)
static_branch_enable(&_debug_guardpage_enabled);
}
-struct page_ext_operations debug_guardpage_ops = {
- .need = need_debug_guardpage,
- .init = init_debug_guardpage,
-};
-
static int __init debug_guardpage_minorder_setup(char *buf)
{
unsigned long res;
@@ -715,20 +697,13 @@ early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
static inline bool set_page_guard(struct zone *zone, struct page *page,
unsigned int order, int migratetype)
{
- struct page_ext *page_ext;
-
if (!debug_guardpage_enabled())
return false;
if (order >= debug_guardpage_minorder())
return false;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
- return false;
-
- __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
-
+ __SetPageGuard(page);
INIT_LIST_HEAD(&page->lru);
set_page_private(page, order);
/* Guard pages are not available for any usage */
@@ -740,23 +715,16 @@ static inline bool set_page_guard(struct zone *zone, struct page *page,
static inline void clear_page_guard(struct zone *zone, struct page *page,
unsigned int order, int migratetype)
{
- struct page_ext *page_ext;
-
if (!debug_guardpage_enabled())
return;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
- return;
-
- __clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
+ __ClearPageGuard(page);
set_page_private(page, 0);
if (!is_migrate_isolate(migratetype))
__mod_zone_freepage_state(zone, (1 << order), migratetype);
}
#else
-struct page_ext_operations debug_guardpage_ops;
static inline bool set_page_guard(struct zone *zone, struct page *page,
unsigned int order, int migratetype) { return false; }
static inline void clear_page_guard(struct zone *zone, struct page *page,
@@ -1931,6 +1899,10 @@ void __init page_alloc_init_late(void)
for_each_populated_zone(zone)
set_zone_contiguous(zone);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ init_debug_guardpage();
+#endif
}
#ifdef CONFIG_CMA
diff --git a/mm/page_ext.c b/mm/page_ext.c
index d8f1aca4ad43..5f5769c7db3b 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -59,9 +59,6 @@
*/
static struct page_ext_operations *page_ext_ops[] = {
-#ifdef CONFIG_DEBUG_PAGEALLOC
- &debug_guardpage_ops,
-#endif
#ifdef CONFIG_PAGE_OWNER
&page_owner_ops,
#endif
--
2.21.0
^ permalink raw reply related [flat|nested] 4+ messages in thread