From: akpm@linux-foundation.org
To: akpm@linux-foundation.org, cai@lca.pw, dan.j.williams@intel.com,
david@redhat.com, linux-mm@kvack.org, mm-commits@vger.kernel.org,
osalvador@suse.de, richardw.yang@linux.intel.com,
rppt@linux.ibm.com, torvalds@linux-foundation.org,
yeyunfeng@huawei.com
Subject: [patch 122/158] mm: support memblock alloc on the exact node for sparse_buffer_init()
Date: Sat, 30 Nov 2019 17:56:27 -0800 [thread overview]
Message-ID: <20191201015627.rHM34V-TS%akpm@linux-foundation.org> (raw)
From: Yunfeng Ye <yeyunfeng@huawei.com>
Subject: mm: support memblock alloc on the exact node for sparse_buffer_init()
sparse_buffer_init() use memblock_alloc_try_nid_raw() to allocate memory
for page management structure, if memory allocation fails from specified
node, it will fall back to allocate from other nodes.
Normally, the page management structure will not exceed 2% of the total
memory, but a large continuous block of allocation is needed. In most
cases, memory allocation from the specified node will succeed, but a node
memory become highly fragmented will fail. we expect to allocate memory
base section rather than by allocating a large block of memory from other
NUMA nodes
Add memblock_alloc_exact_nid_raw() for this situation, which allocate boot
memory block on the exact node. If a large contiguous block memory
allocate fail in sparse_buffer_init(), it will fall back to allocate small
block memory base section.
Link: http://lkml.kernel.org/r/66755ea7-ab10-8882-36fd-3e02b03775d5@huawei.com
Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Wei Yang <richardw.yang@linux.intel.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Qian Cai <cai@lca.pw>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/memblock.h | 3 +
mm/memblock.c | 65 ++++++++++++++++++++++++++++++-------
mm/sparse.c | 2 -
3 files changed, 58 insertions(+), 12 deletions(-)
--- a/include/linux/memblock.h~mm-support-memblock-alloc-on-the-exact-node-for-sparse_buffer_init
+++ a/include/linux/memblock.h
@@ -358,6 +358,9 @@ static inline phys_addr_t memblock_phys_
MEMBLOCK_ALLOC_ACCESSIBLE);
}
+void *memblock_alloc_exact_nid_raw(phys_addr_t size, phys_addr_t align,
+ phys_addr_t min_addr, phys_addr_t max_addr,
+ int nid);
void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
phys_addr_t min_addr, phys_addr_t max_addr,
int nid);
--- a/mm/memblock.c~mm-support-memblock-alloc-on-the-exact-node-for-sparse_buffer_init
+++ a/mm/memblock.c
@@ -1319,12 +1319,13 @@ __next_mem_pfn_range_in_zone(u64 *idx, s
* @start: the lower bound of the memory region to allocate (phys address)
* @end: the upper bound of the memory region to allocate (phys address)
* @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ * @exact_nid: control the allocation fall back to other nodes
*
* The allocation is performed from memory region limited by
* memblock.current_limit if @end == %MEMBLOCK_ALLOC_ACCESSIBLE.
*
- * If the specified node can not hold the requested memory the
- * allocation falls back to any node in the system
+ * If the specified node can not hold the requested memory and @exact_nid
+ * is false, the allocation falls back to any node in the system.
*
* For systems with memory mirroring, the allocation is attempted first
* from the regions with mirroring enabled and then retried from any
@@ -1338,7 +1339,8 @@ __next_mem_pfn_range_in_zone(u64 *idx, s
*/
static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
phys_addr_t align, phys_addr_t start,
- phys_addr_t end, int nid)
+ phys_addr_t end, int nid,
+ bool exact_nid)
{
enum memblock_flags flags = choose_memblock_flags();
phys_addr_t found;
@@ -1358,7 +1360,7 @@ again:
if (found && !memblock_reserve(found, size))
goto done;
- if (nid != NUMA_NO_NODE) {
+ if (nid != NUMA_NO_NODE && !exact_nid) {
found = memblock_find_in_range_node(size, align, start,
end, NUMA_NO_NODE,
flags);
@@ -1406,7 +1408,8 @@ phys_addr_t __init memblock_phys_alloc_r
phys_addr_t start,
phys_addr_t end)
{
- return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE);
+ return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
+ false);
}
/**
@@ -1425,7 +1428,7 @@ phys_addr_t __init memblock_phys_alloc_r
phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
{
return memblock_alloc_range_nid(size, align, 0,
- MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+ MEMBLOCK_ALLOC_ACCESSIBLE, nid, false);
}
/**
@@ -1435,6 +1438,7 @@ phys_addr_t __init memblock_phys_alloc_t
* @min_addr: the lower bound of the memory region to allocate (phys address)
* @max_addr: the upper bound of the memory region to allocate (phys address)
* @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ * @exact_nid: control the allocation fall back to other nodes
*
* Allocates memory block using memblock_alloc_range_nid() and
* converts the returned physical address to virtual.
@@ -1450,7 +1454,7 @@ phys_addr_t __init memblock_phys_alloc_t
static void * __init memblock_alloc_internal(
phys_addr_t size, phys_addr_t align,
phys_addr_t min_addr, phys_addr_t max_addr,
- int nid)
+ int nid, bool exact_nid)
{
phys_addr_t alloc;
@@ -1465,11 +1469,13 @@ static void * __init memblock_alloc_inte
if (max_addr > memblock.current_limit)
max_addr = memblock.current_limit;
- alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid);
+ alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid,
+ exact_nid);
/* retry allocation without lower limit */
if (!alloc && min_addr)
- alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid);
+ alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid,
+ exact_nid);
if (!alloc)
return NULL;
@@ -1478,6 +1484,43 @@ static void * __init memblock_alloc_inte
}
/**
+ * memblock_alloc_exact_nid_raw - allocate boot memory block on the exact node
+ * without zeroing memory
+ * @size: size of memory block to be allocated in bytes
+ * @align: alignment of the region and block's size
+ * @min_addr: the lower bound of the memory region from where the allocation
+ * is preferred (phys address)
+ * @max_addr: the upper bound of the memory region from where the allocation
+ * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to
+ * allocate only from memory limited by memblock.current_limit value
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ *
+ * Public function, provides additional debug information (including caller
+ * info), if enabled. Does not zero allocated memory.
+ *
+ * Return:
+ * Virtual address of allocated memory block on success, NULL on failure.
+ */
+void * __init memblock_alloc_exact_nid_raw(
+ phys_addr_t size, phys_addr_t align,
+ phys_addr_t min_addr, phys_addr_t max_addr,
+ int nid)
+{
+ void *ptr;
+
+ memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
+ __func__, (u64)size, (u64)align, nid, &min_addr,
+ &max_addr, (void *)_RET_IP_);
+
+ ptr = memblock_alloc_internal(size, align,
+ min_addr, max_addr, nid, true);
+ if (ptr && size > 0)
+ page_init_poison(ptr, size);
+
+ return ptr;
+}
+
+/**
* memblock_alloc_try_nid_raw - allocate boot memory block without zeroing
* memory and without panicking
* @size: size of memory block to be allocated in bytes
@@ -1508,7 +1551,7 @@ void * __init memblock_alloc_try_nid_raw
&max_addr, (void *)_RET_IP_);
ptr = memblock_alloc_internal(size, align,
- min_addr, max_addr, nid);
+ min_addr, max_addr, nid, false);
if (ptr && size > 0)
page_init_poison(ptr, size);
@@ -1543,7 +1586,7 @@ void * __init memblock_alloc_try_nid(
__func__, (u64)size, (u64)align, nid, &min_addr,
&max_addr, (void *)_RET_IP_);
ptr = memblock_alloc_internal(size, align,
- min_addr, max_addr, nid);
+ min_addr, max_addr, nid, false);
if (ptr)
memset(ptr, 0, size);
--- a/mm/sparse.c~mm-support-memblock-alloc-on-the-exact-node-for-sparse_buffer_init
+++ a/mm/sparse.c
@@ -486,7 +486,7 @@ static void __init sparse_buffer_init(un
* and we want it to be properly aligned to the section size - this is
* especially the case for VMEMMAP which maps memmap to PMDs
*/
- sparsemap_buf = memblock_alloc_try_nid_raw(size, section_map_size(),
+ sparsemap_buf = memblock_alloc_exact_nid_raw(size, section_map_size(),
addr, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
sparsemap_buf_end = sparsemap_buf + size;
}
_
reply other threads:[~2019-12-01 1:56 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191201015627.rHM34V-TS%akpm@linux-foundation.org \
--to=akpm@linux-foundation.org \
--cc=cai@lca.pw \
--cc=dan.j.williams@intel.com \
--cc=david@redhat.com \
--cc=linux-mm@kvack.org \
--cc=mm-commits@vger.kernel.org \
--cc=osalvador@suse.de \
--cc=richardw.yang@linux.intel.com \
--cc=rppt@linux.ibm.com \
--cc=torvalds@linux-foundation.org \
--cc=yeyunfeng@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).