* [PATCH V2] mm: Support memblock alloc on the exact node for sparse_buffer_init()
@ 2019-09-24 8:09 Yunfeng Ye
2019-09-25 6:36 ` Mike Rapoport
0 siblings, 1 reply; 3+ messages in thread
From: Yunfeng Ye @ 2019-09-24 8:09 UTC (permalink / raw)
To: rppt, richardw.yang, akpm, osalvador, mhocko, dan.j.williams, david, cai
Cc: linux-mm, linux-kernel
sparse_buffer_init() use memblock_alloc_try_nid_raw() to allocate memory
for page management structure, if memory allocation fails from specified
node, it will fall back to allocate from other nodes.
Normally, the page management structure will not exceed 2% of the total
memory, but a large continuous block of allocation is needed. In most
cases, memory allocation from the specified node will success always,
but a node memory become highly fragmented will fail. we expect to
allocate memory base section rather than by allocating a large block of
memory from other NUMA nodes
Add memblock_alloc_exact_nid_raw() for this situation, which allocate
boot memory block on the exact node. If a large contiguous block memory
allocate fail in sparse_buffer_init(), it will fall back to allocate
small block memory base section.
Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
---
v1 -> v2:
- use memblock_alloc_exact_nid_raw() rather than using a flag
include/linux/memblock.h | 3 +++
mm/memblock.c | 66 ++++++++++++++++++++++++++++++++++++++++--------
mm/sparse.c | 2 +-
3 files changed, 59 insertions(+), 12 deletions(-)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index f491690..b38bbef 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -358,6 +358,9 @@ static inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
MEMBLOCK_ALLOC_ACCESSIBLE);
}
+void *memblock_alloc_exact_nid_raw(phys_addr_t size, phys_addr_t align,
+ phys_addr_t min_addr, phys_addr_t max_addr,
+ int nid);
void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
phys_addr_t min_addr, phys_addr_t max_addr,
int nid);
diff --git a/mm/memblock.c b/mm/memblock.c
index 7d4f61a..a71869e 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1323,12 +1323,13 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
* @start: the lower bound of the memory region to allocate (phys address)
* @end: the upper bound of the memory region to allocate (phys address)
* @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ * @need_exact_nid: control the allocation fall back to other nodes
*
* The allocation is performed from memory region limited by
* memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE.
*
- * If the specified node can not hold the requested memory the
- * allocation falls back to any node in the system
+ * If the specified node can not hold the requested memory and @need_exact_nid
+ * is zero, the allocation falls back to any node in the system
*
* For systems with memory mirroring, the allocation is attempted first
* from the regions with mirroring enabled and then retried from any
@@ -1342,7 +1343,8 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
*/
static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
phys_addr_t align, phys_addr_t start,
- phys_addr_t end, int nid)
+ phys_addr_t end, int nid,
+ int need_exact_nid)
{
enum memblock_flags flags = choose_memblock_flags();
phys_addr_t found;
@@ -1365,7 +1367,7 @@ static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
if (found && !memblock_reserve(found, size))
goto done;
- if (nid != NUMA_NO_NODE) {
+ if (nid != NUMA_NO_NODE && !need_exact_nid) {
found = memblock_find_in_range_node(size, align, start,
end, NUMA_NO_NODE,
flags);
@@ -1413,7 +1415,8 @@ phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
phys_addr_t start,
phys_addr_t end)
{
- return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE);
+ return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
+ 0);
}
/**
@@ -1432,7 +1435,7 @@ phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
{
return memblock_alloc_range_nid(size, align, 0,
- MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+ MEMBLOCK_ALLOC_ACCESSIBLE, nid, 0);
}
/**
@@ -1442,6 +1445,7 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
* @min_addr: the lower bound of the memory region to allocate (phys address)
* @max_addr: the upper bound of the memory region to allocate (phys address)
* @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ * @need_exact_nid: control the allocation fall back to other nodes
*
* Allocates memory block using memblock_alloc_range_nid() and
* converts the returned physical address to virtual.
@@ -1457,7 +1461,7 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
static void * __init memblock_alloc_internal(
phys_addr_t size, phys_addr_t align,
phys_addr_t min_addr, phys_addr_t max_addr,
- int nid)
+ int nid, int need_exact_nid)
{
phys_addr_t alloc;
@@ -1469,11 +1473,13 @@ static void * __init memblock_alloc_internal(
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, nid);
- alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid);
+ alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid,
+ need_exact_nid);
/* retry allocation without lower limit */
if (!alloc && min_addr)
- alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid);
+ alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid,
+ need_exact_nid);
if (!alloc)
return NULL;
@@ -1482,6 +1488,44 @@ static void * __init memblock_alloc_internal(
}
/**
+ * memblock_alloc_exact_nid_raw - allocate boot memory block on the exact node,
+ * without zeroing memory and without panicking
+ * @size: size of memory block to be allocated in bytes
+ * @align: alignment of the region and block's size
+ * @min_addr: the lower bound of the memory region from where the allocation
+ * is preferred (phys address)
+ * @max_addr: the upper bound of the memory region from where the allocation
+ * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to
+ * allocate only from memory limited by memblock.current_limit value
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ *
+ * Public function, provides additional debug information (including caller
+ * info), if enabled. Does not zero allocated memory, does not panic if request
+ * cannot be satisfied.
+ *
+ * Return:
+ * Virtual address of allocated memory block on success, NULL on failure.
+ */
+void * __init memblock_alloc_exact_nid_raw(
+ phys_addr_t size, phys_addr_t align,
+ phys_addr_t min_addr, phys_addr_t max_addr,
+ int nid)
+{
+ void *ptr;
+
+ memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
+ __func__, (u64)size, (u64)align, nid, &min_addr,
+ &max_addr, (void *)_RET_IP_);
+
+ ptr = memblock_alloc_internal(size, align,
+ min_addr, max_addr, nid, 1);
+ if (ptr && size > 0)
+ page_init_poison(ptr, size);
+
+ return ptr;
+}
+
+/**
* memblock_alloc_try_nid_raw - allocate boot memory block without zeroing
* memory and without panicking
* @size: size of memory block to be allocated in bytes
@@ -1512,7 +1556,7 @@ void * __init memblock_alloc_try_nid_raw(
&max_addr, (void *)_RET_IP_);
ptr = memblock_alloc_internal(size, align,
- min_addr, max_addr, nid);
+ min_addr, max_addr, nid, 0);
if (ptr && size > 0)
page_init_poison(ptr, size);
@@ -1547,7 +1591,7 @@ void * __init memblock_alloc_try_nid(
__func__, (u64)size, (u64)align, nid, &min_addr,
&max_addr, (void *)_RET_IP_);
ptr = memblock_alloc_internal(size, align,
- min_addr, max_addr, nid);
+ min_addr, max_addr, nid, 0);
if (ptr)
memset(ptr, 0, size);
diff --git a/mm/sparse.c b/mm/sparse.c
index 72f010d..1a06471 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -475,7 +475,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */
sparsemap_buf =
- memblock_alloc_try_nid_raw(size, PAGE_SIZE,
+ memblock_alloc_exact_nid_raw(size, PAGE_SIZE,
addr,
MEMBLOCK_ALLOC_ACCESSIBLE, nid);
sparsemap_buf_end = sparsemap_buf + size;
--
2.7.4.huawei.3
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH V2] mm: Support memblock alloc on the exact node for sparse_buffer_init()
2019-09-24 8:09 [PATCH V2] mm: Support memblock alloc on the exact node for sparse_buffer_init() Yunfeng Ye
@ 2019-09-25 6:36 ` Mike Rapoport
2019-09-25 6:52 ` Yunfeng Ye
0 siblings, 1 reply; 3+ messages in thread
From: Mike Rapoport @ 2019-09-25 6:36 UTC (permalink / raw)
To: Yunfeng Ye
Cc: richardw.yang, akpm, osalvador, mhocko, dan.j.williams, david,
cai, linux-mm, linux-kernel
On Tue, Sep 24, 2019 at 04:09:32PM +0800, Yunfeng Ye wrote:
> sparse_buffer_init() use memblock_alloc_try_nid_raw() to allocate memory
> for page management structure, if memory allocation fails from specified
> node, it will fall back to allocate from other nodes.
>
> Normally, the page management structure will not exceed 2% of the total
> memory, but a large continuous block of allocation is needed. In most
> cases, memory allocation from the specified node will success always,
> but a node memory become highly fragmented will fail. we expect to
> allocate memory base section rather than by allocating a large block of
> memory from other NUMA nodes
>
> Add memblock_alloc_exact_nid_raw() for this situation, which allocate
> boot memory block on the exact node. If a large contiguous block memory
> allocate fail in sparse_buffer_init(), it will fall back to allocate
> small block memory base section.
>
> Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
> ---
> v1 -> v2:
> - use memblock_alloc_exact_nid_raw() rather than using a flag
>
> include/linux/memblock.h | 3 +++
> mm/memblock.c | 66 ++++++++++++++++++++++++++++++++++++++++--------
> mm/sparse.c | 2 +-
> 3 files changed, 59 insertions(+), 12 deletions(-)
>
> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> index f491690..b38bbef 100644
> --- a/include/linux/memblock.h
> +++ b/include/linux/memblock.h
> @@ -358,6 +358,9 @@ static inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
> MEMBLOCK_ALLOC_ACCESSIBLE);
> }
>
> +void *memblock_alloc_exact_nid_raw(phys_addr_t size, phys_addr_t align,
> + phys_addr_t min_addr, phys_addr_t max_addr,
> + int nid);
> void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
> phys_addr_t min_addr, phys_addr_t max_addr,
> int nid);
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 7d4f61a..a71869e 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -1323,12 +1323,13 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
> * @start: the lower bound of the memory region to allocate (phys address)
> * @end: the upper bound of the memory region to allocate (phys address)
> * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
> + * @need_exact_nid: control the allocation fall back to other nodes
> *
> * The allocation is performed from memory region limited by
> * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE.
> *
> - * If the specified node can not hold the requested memory the
> - * allocation falls back to any node in the system
> + * If the specified node can not hold the requested memory and @need_exact_nid
> + * is zero, the allocation falls back to any node in the system
> *
> * For systems with memory mirroring, the allocation is attempted first
> * from the regions with mirroring enabled and then retried from any
> @@ -1342,7 +1343,8 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
> */
> static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
> phys_addr_t align, phys_addr_t start,
> - phys_addr_t end, int nid)
> + phys_addr_t end, int nid,
> + int need_exact_nid)
Please make it 'bool exact_nid'
> {
> enum memblock_flags flags = choose_memblock_flags();
> phys_addr_t found;
> @@ -1365,7 +1367,7 @@ static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
> if (found && !memblock_reserve(found, size))
> goto done;
>
> - if (nid != NUMA_NO_NODE) {
> + if (nid != NUMA_NO_NODE && !need_exact_nid) {
> found = memblock_find_in_range_node(size, align, start,
> end, NUMA_NO_NODE,
> flags);
> @@ -1413,7 +1415,8 @@ phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
> phys_addr_t start,
> phys_addr_t end)
> {
> - return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE);
> + return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
> + 0);
> }
>
> /**
> @@ -1432,7 +1435,7 @@ phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
> phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
> {
> return memblock_alloc_range_nid(size, align, 0,
> - MEMBLOCK_ALLOC_ACCESSIBLE, nid);
> + MEMBLOCK_ALLOC_ACCESSIBLE, nid, 0);
> }
>
> /**
> @@ -1442,6 +1445,7 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
> * @min_addr: the lower bound of the memory region to allocate (phys address)
> * @max_addr: the upper bound of the memory region to allocate (phys address)
> * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
> + * @need_exact_nid: control the allocation fall back to other nodes
> *
> * Allocates memory block using memblock_alloc_range_nid() and
> * converts the returned physical address to virtual.
> @@ -1457,7 +1461,7 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
> static void * __init memblock_alloc_internal(
> phys_addr_t size, phys_addr_t align,
> phys_addr_t min_addr, phys_addr_t max_addr,
> - int nid)
> + int nid, int need_exact_nid)
Ditto.
> {
> phys_addr_t alloc;
>
> @@ -1469,11 +1473,13 @@ static void * __init memblock_alloc_internal(
> if (WARN_ON_ONCE(slab_is_available()))
> return kzalloc_node(size, GFP_NOWAIT, nid);
>
> - alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid);
> + alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid,
> + need_exact_nid);
>
> /* retry allocation without lower limit */
> if (!alloc && min_addr)
> - alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid);
> + alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid,
> + need_exact_nid);
>
> if (!alloc)
> return NULL;
> @@ -1482,6 +1488,44 @@ static void * __init memblock_alloc_internal(
> }
>
> /**
> + * memblock_alloc_exact_nid_raw - allocate boot memory block on the exact node,
> + * without zeroing memory and without panicking
No need to mention "without panicking" as none of the memblock APIs panic
any more.
> + * @size: size of memory block to be allocated in bytes
> + * @align: alignment of the region and block's size
> + * @min_addr: the lower bound of the memory region from where the allocation
> + * is preferred (phys address)
> + * @max_addr: the upper bound of the memory region from where the allocation
> + * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to
> + * allocate only from memory limited by memblock.current_limit value
> + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
> + *
> + * Public function, provides additional debug information (including caller
> + * info), if enabled. Does not zero allocated memory, does not panic if request
> + * cannot be satisfied.
> + *
> + * Return:
> + * Virtual address of allocated memory block on success, NULL on failure.
> + */
> +void * __init memblock_alloc_exact_nid_raw(
> + phys_addr_t size, phys_addr_t align,
> + phys_addr_t min_addr, phys_addr_t max_addr,
> + int nid)
> +{
> + void *ptr;
> +
> + memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
> + __func__, (u64)size, (u64)align, nid, &min_addr,
> + &max_addr, (void *)_RET_IP_);
> +
> + ptr = memblock_alloc_internal(size, align,
> + min_addr, max_addr, nid, 1);
> + if (ptr && size > 0)
> + page_init_poison(ptr, size);
> +
> + return ptr;
> +}
> +
> +/**
> * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing
> * memory and without panicking
> * @size: size of memory block to be allocated in bytes
> @@ -1512,7 +1556,7 @@ void * __init memblock_alloc_try_nid_raw(
> &max_addr, (void *)_RET_IP_);
>
> ptr = memblock_alloc_internal(size, align,
> - min_addr, max_addr, nid);
> + min_addr, max_addr, nid, 0);
> if (ptr && size > 0)
> page_init_poison(ptr, size);
>
> @@ -1547,7 +1591,7 @@ void * __init memblock_alloc_try_nid(
> __func__, (u64)size, (u64)align, nid, &min_addr,
> &max_addr, (void *)_RET_IP_);
> ptr = memblock_alloc_internal(size, align,
> - min_addr, max_addr, nid);
> + min_addr, max_addr, nid, 0);
> if (ptr)
> memset(ptr, 0, size);
>
> diff --git a/mm/sparse.c b/mm/sparse.c
> index 72f010d..1a06471 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -475,7 +475,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
> phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
> WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */
> sparsemap_buf =
> - memblock_alloc_try_nid_raw(size, PAGE_SIZE,
> + memblock_alloc_exact_nid_raw(size, PAGE_SIZE,
> addr,
> MEMBLOCK_ALLOC_ACCESSIBLE, nid);
> sparsemap_buf_end = sparsemap_buf + size;
> --
> 2.7.4.huawei.3
>
--
Sincerely yours,
Mike.
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH V2] mm: Support memblock alloc on the exact node for sparse_buffer_init()
2019-09-25 6:36 ` Mike Rapoport
@ 2019-09-25 6:52 ` Yunfeng Ye
0 siblings, 0 replies; 3+ messages in thread
From: Yunfeng Ye @ 2019-09-25 6:52 UTC (permalink / raw)
To: Mike Rapoport
Cc: richardw.yang, akpm, osalvador, mhocko, dan.j.williams, david,
cai, linux-mm, linux-kernel
On 2019/9/25 14:36, Mike Rapoport wrote:
> On Tue, Sep 24, 2019 at 04:09:32PM +0800, Yunfeng Ye wrote:
>> sparse_buffer_init() use memblock_alloc_try_nid_raw() to allocate memory
>> for page management structure, if memory allocation fails from specified
>> node, it will fall back to allocate from other nodes.
>>
>> Normally, the page management structure will not exceed 2% of the total
>> memory, but a large continuous block of allocation is needed. In most
>> cases, memory allocation from the specified node will success always,
>> but a node memory become highly fragmented will fail. we expect to
>> allocate memory base section rather than by allocating a large block of
>> memory from other NUMA nodes
>>
>> Add memblock_alloc_exact_nid_raw() for this situation, which allocate
>> boot memory block on the exact node. If a large contiguous block memory
>> allocate fail in sparse_buffer_init(), it will fall back to allocate
>> small block memory base section.
>>
>> Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
>> ---
>> v1 -> v2:
>> - use memblock_alloc_exact_nid_raw() rather than using a flag
>>
>> include/linux/memblock.h | 3 +++
>> mm/memblock.c | 66 ++++++++++++++++++++++++++++++++++++++++--------
>> mm/sparse.c | 2 +-
>> 3 files changed, 59 insertions(+), 12 deletions(-)
>>
>> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
>> index f491690..b38bbef 100644
>> --- a/include/linux/memblock.h
>> +++ b/include/linux/memblock.h
>> @@ -358,6 +358,9 @@ static inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
>> MEMBLOCK_ALLOC_ACCESSIBLE);
>> }
>>
>> +void *memblock_alloc_exact_nid_raw(phys_addr_t size, phys_addr_t align,
>> + phys_addr_t min_addr, phys_addr_t max_addr,
>> + int nid);
>> void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
>> phys_addr_t min_addr, phys_addr_t max_addr,
>> int nid);
>> diff --git a/mm/memblock.c b/mm/memblock.c
>> index 7d4f61a..a71869e 100644
>> --- a/mm/memblock.c
>> +++ b/mm/memblock.c
>> @@ -1323,12 +1323,13 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
>> * @start: the lower bound of the memory region to allocate (phys address)
>> * @end: the upper bound of the memory region to allocate (phys address)
>> * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
>> + * @need_exact_nid: control the allocation fall back to other nodes
>> *
>> * The allocation is performed from memory region limited by
>> * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE.
>> *
>> - * If the specified node can not hold the requested memory the
>> - * allocation falls back to any node in the system
>> + * If the specified node can not hold the requested memory and @need_exact_nid
>> + * is zero, the allocation falls back to any node in the system
>> *
>> * For systems with memory mirroring, the allocation is attempted first
>> * from the regions with mirroring enabled and then retried from any
>> @@ -1342,7 +1343,8 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
>> */
>> static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
>> phys_addr_t align, phys_addr_t start,
>> - phys_addr_t end, int nid)
>> + phys_addr_t end, int nid,
>> + int need_exact_nid)
>
> Please make it 'bool exact_nid'
>
ok, I will modify as your suggestion, thanks.
>> {
>> enum memblock_flags flags = choose_memblock_flags();
>> phys_addr_t found;
>> @@ -1365,7 +1367,7 @@ static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
>> if (found && !memblock_reserve(found, size))
>> goto done;
>>
>> - if (nid != NUMA_NO_NODE) {
>> + if (nid != NUMA_NO_NODE && !need_exact_nid) {
>> found = memblock_find_in_range_node(size, align, start,
>> end, NUMA_NO_NODE,
>> flags);
>> @@ -1413,7 +1415,8 @@ phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
>> phys_addr_t start,
>> phys_addr_t end)
>> {
>> - return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE);
>> + return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
>> + 0);
>> }
>>
>> /**
>> @@ -1432,7 +1435,7 @@ phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
>> phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
>> {
>> return memblock_alloc_range_nid(size, align, 0,
>> - MEMBLOCK_ALLOC_ACCESSIBLE, nid);
>> + MEMBLOCK_ALLOC_ACCESSIBLE, nid, 0);
>> }
>>
>> /**
>> @@ -1442,6 +1445,7 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
>> * @min_addr: the lower bound of the memory region to allocate (phys address)
>> * @max_addr: the upper bound of the memory region to allocate (phys address)
>> * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
>> + * @need_exact_nid: control the allocation fall back to other nodes
>> *
>> * Allocates memory block using memblock_alloc_range_nid() and
>> * converts the returned physical address to virtual.
>> @@ -1457,7 +1461,7 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
>> static void * __init memblock_alloc_internal(
>> phys_addr_t size, phys_addr_t align,
>> phys_addr_t min_addr, phys_addr_t max_addr,
>> - int nid)
>> + int nid, int need_exact_nid)
>
> Ditto.
>
>> {
>> phys_addr_t alloc;
>>
>> @@ -1469,11 +1473,13 @@ static void * __init memblock_alloc_internal(
>> if (WARN_ON_ONCE(slab_is_available()))
>> return kzalloc_node(size, GFP_NOWAIT, nid);
>>
>> - alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid);
>> + alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid,
>> + need_exact_nid);
>>
>> /* retry allocation without lower limit */
>> if (!alloc && min_addr)
>> - alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid);
>> + alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid,
>> + need_exact_nid);
>>
>> if (!alloc)
>> return NULL;
>> @@ -1482,6 +1488,44 @@ static void * __init memblock_alloc_internal(
>> }
>>
>> /**
>> + * memblock_alloc_exact_nid_raw - allocate boot memory block on the exact node,
>> + * without zeroing memory and without panicking
>
> No need to mention "without panicking" as none of the memblock APIs panic
> any more.
>
ok, I will remove the comment "without panicking" as your suggestion, thanks.
>> + * @size: size of memory block to be allocated in bytes
>> + * @align: alignment of the region and block's size
>> + * @min_addr: the lower bound of the memory region from where the allocation
>> + * is preferred (phys address)
>> + * @max_addr: the upper bound of the memory region from where the allocation
>> + * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to
>> + * allocate only from memory limited by memblock.current_limit value
>> + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
>> + *
>> + * Public function, provides additional debug information (including caller
>> + * info), if enabled. Does not zero allocated memory, does not panic if request
>> + * cannot be satisfied.
>> + *
>> + * Return:
>> + * Virtual address of allocated memory block on success, NULL on failure.
>> + */
>> +void * __init memblock_alloc_exact_nid_raw(
>> + phys_addr_t size, phys_addr_t align,
>> + phys_addr_t min_addr, phys_addr_t max_addr,
>> + int nid)
>> +{
>> + void *ptr;
>> +
>> + memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
>> + __func__, (u64)size, (u64)align, nid, &min_addr,
>> + &max_addr, (void *)_RET_IP_);
>> +
>> + ptr = memblock_alloc_internal(size, align,
>> + min_addr, max_addr, nid, 1);
>> + if (ptr && size > 0)
>> + page_init_poison(ptr, size);
>> +
>> + return ptr;
>> +}
>> +
>> +/**
>> * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing
>> * memory and without panicking
>> * @size: size of memory block to be allocated in bytes
>> @@ -1512,7 +1556,7 @@ void * __init memblock_alloc_try_nid_raw(
>> &max_addr, (void *)_RET_IP_);
>>
>> ptr = memblock_alloc_internal(size, align,
>> - min_addr, max_addr, nid);
>> + min_addr, max_addr, nid, 0);
>> if (ptr && size > 0)
>> page_init_poison(ptr, size);
>>
>> @@ -1547,7 +1591,7 @@ void * __init memblock_alloc_try_nid(
>> __func__, (u64)size, (u64)align, nid, &min_addr,
>> &max_addr, (void *)_RET_IP_);
>> ptr = memblock_alloc_internal(size, align,
>> - min_addr, max_addr, nid);
>> + min_addr, max_addr, nid, 0);
>> if (ptr)
>> memset(ptr, 0, size);
>>
>> diff --git a/mm/sparse.c b/mm/sparse.c
>> index 72f010d..1a06471 100644
>> --- a/mm/sparse.c
>> +++ b/mm/sparse.c
>> @@ -475,7 +475,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
>> phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
>> WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */
>> sparsemap_buf =
>> - memblock_alloc_try_nid_raw(size, PAGE_SIZE,
>> + memblock_alloc_exact_nid_raw(size, PAGE_SIZE,
>> addr,
>> MEMBLOCK_ALLOC_ACCESSIBLE, nid);
>> sparsemap_buf_end = sparsemap_buf + size;
>> --
>> 2.7.4.huawei.3
>>
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2019-09-25 6:52 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-24 8:09 [PATCH V2] mm: Support memblock alloc on the exact node for sparse_buffer_init() Yunfeng Ye
2019-09-25 6:36 ` Mike Rapoport
2019-09-25 6:52 ` Yunfeng Ye
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).