On 06.10.16 11:52:07, Robert Richter wrote: > There is a memory setup problem on ThunderX systems with certain > memory configurations. The symptom is > > kernel BUG at mm/page_alloc.c:1848! > > This happens for some configs with 64k page size enabled. The bug > triggers for page zones with some pages in the zone not assigned to > this particular zone. In my case some pages that are marked as nomap > were not reassigned to the new zone of node 1, so those are still > assigned to node 0. > > The reason for the mis-configuration is a change in pfn_valid() which > reports pages marked nomap as invalid: > > 68709f45385a arm64: only consider memblocks with NOMAP cleared for linear mapping > > This causes pages marked as nomap being no long reassigned to the new > zone in memmap_init_zone() by calling __init_single_pfn(). > > Fixing this by restoring the old behavior of pfn_valid() to use > memblock_is_memory(). Also changing users of pfn_valid() in arm64 code > to use memblock_is_map_memory() where necessary. This only affects > code in ioremap.c. The code in mmu.c still can use the new version of > pfn_valid(). Below a reproducer for non-numa systems. Note that invalidating the node id just simulates a different node in reality. The patch injects a (pageblock_order) unaligned NOMAP mem range at the end of a memory block and then tries to free that area. This causes a BUG_ON() (log attached). -Robert >From 20d853e300c99be5420c7ee3f072c318804cac1b Mon Sep 17 00:00:00 2001 From: root Date: Tue, 1 Nov 2016 15:04:43 +0000 Subject: [PATCH] mm-fault-reproducer Signed-off-by: root --- arch/arm64/mm/init.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++ mm/page_alloc.c | 4 ++- 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 21c489bdeb4e..feaa7ab97551 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -301,6 +302,80 @@ void __init arm64_memblock_init(void) memblock_allow_resize(); } +static struct page *inject_pageblock; + +static void __init inject_nomap_create(void) +{ + phys_addr_t start, end; + unsigned long start_pfn, end_pfn; + u64 i; + int ret = -ENOMEM; + + pr_info("%s: PAGES_PER_SECTION=%08lx pageblock_nr_pages=%08lx PAGE_SIZE=%08lx\n", + __func__, PAGES_PER_SECTION, pageblock_nr_pages, PAGE_SIZE); + + /* + * find a mem range with a complet pageblock in it + */ + for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, NULL) { + start_pfn = PFN_DOWN(start); + end_pfn = PFN_UP(end); + if (end_pfn - (start_pfn & ~(pageblock_nr_pages-1)) > 2 * pageblock_nr_pages) + break; + } + + if (i == ULLONG_MAX) + goto fail; + + start = PFN_PHYS(start_pfn); + end = PFN_PHYS(end_pfn) - 1; + + pr_info("%s: Injecting into range: [%pa-%pa]\n", __func__, &start, &end); + + /* mark the upper 5 pages nomap of a complete pageblock */ + start_pfn = end_pfn & ~(pageblock_nr_pages-1); + start_pfn -= 5; /* unalign by 5 pages */ + + start = PFN_PHYS(start_pfn); + end = PFN_PHYS(end_pfn) - 1; + + ret = memblock_mark_nomap(start, end - start + 1); + if (ret) + goto fail; + + inject_pageblock = pfn_to_page(start_pfn & ~(pageblock_nr_pages-1)); + + pr_info("%s: Injected nomap range at: [%pa-%pa] zones: %p %p\n", __func__, + &start, &end, page_zone(inject_pageblock), + page_zone(inject_pageblock + pageblock_nr_pages - 1)); + + return; +fail: + pr_err("%s: Could not inject_unaligned_range: %d\n", __func__, ret); +} + +static void __init inject_nomap_move(void) +{ + phys_addr_t start, end; + int ret; + + if (!inject_pageblock) + return; + + start = PFN_PHYS(page_to_pfn(inject_pageblock)); + end = PFN_PHYS(page_to_pfn(inject_pageblock) + pageblock_nr_pages) - 1; + + pr_info("%s: Moving [%pa-%pa] zones: %p %p\n", __func__, + &start, &end, page_zone(inject_pageblock), + page_zone(inject_pageblock + pageblock_nr_pages - 1)); + + ret = move_freepages_block(page_zone(inject_pageblock), + inject_pageblock, + gfpflags_to_migratetype(GFP_KERNEL)); + + pr_info("%s: Moved %d pages\n", __func__, ret); +} + void __init bootmem_init(void) { unsigned long min, max; @@ -320,6 +395,7 @@ void __init bootmem_init(void) arm64_memory_present(); sparse_init(); + inject_nomap_create(); zone_sizes_init(min, max); high_memory = __va((max << PAGE_SHIFT) - 1) + 1; @@ -479,6 +555,8 @@ void __init mem_init(void) */ sysctl_overcommit_memory = OVERCOMMIT_ALWAYS; } + + inject_nomap_move(); } void free_initmem(void) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2b3bf6767d54..19d74637e242 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5077,8 +5077,10 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, if (context != MEMMAP_EARLY) goto not_early; - if (!early_pfn_valid(pfn)) + if (!early_pfn_valid(pfn)) { + set_page_node(pfn_to_page(pfn), NUMA_NO_NODE); continue; + } if (!early_pfn_in_nid(pfn, nid)) continue; if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised)) -- 2.9.3