* [PATCH v3 2/2] mm, sparse: pass nid instead of pgdat to sparse_add_one_section()
2018-11-29 15:53 ` [PATCH v3 1/2] " Wei Yang
@ 2018-11-29 15:53 ` Wei Yang
2018-11-29 16:01 ` David Hildenbrand
2018-11-29 17:15 ` Michal Hocko
2018-11-29 16:06 ` [PATCH v3 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section() David Hildenbrand
` (2 subsequent siblings)
3 siblings, 2 replies; 38+ messages in thread
From: Wei Yang @ 2018-11-29 15:53 UTC (permalink / raw)
To: mhocko, dave.hansen, osalvador, david; +Cc: akpm, linux-mm, Wei Yang
Since the information needed in sparse_add_one_section() is node id to
allocate proper memory, it is not necessary to pass its pgdat.
This patch changes the prototype of sparse_add_one_section() to pass
node id directly. This is intended to reduce misleading that
sparse_add_one_section() would touch pgdat.
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
---
include/linux/memory_hotplug.h | 2 +-
mm/memory_hotplug.c | 2 +-
mm/sparse.c | 6 +++---
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 45a5affcab8a..3787d4e913e6 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -333,7 +333,7 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages, struct vmem_altmap *altmap);
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
extern bool is_memblock_offlined(struct memory_block *mem);
-extern int sparse_add_one_section(struct pglist_data *pgdat,
+extern int sparse_add_one_section(int nid,
unsigned long start_pfn, struct vmem_altmap *altmap);
extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
unsigned long map_offset, struct vmem_altmap *altmap);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index f626e7e5f57b..5b3a3d7b4466 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -253,7 +253,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
if (pfn_valid(phys_start_pfn))
return -EEXIST;
- ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap);
+ ret = sparse_add_one_section(nid, phys_start_pfn, altmap);
if (ret < 0)
return ret;
diff --git a/mm/sparse.c b/mm/sparse.c
index 5825f276485f..2472bf23278a 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -662,7 +662,7 @@ static void free_map_bootmem(struct page *memmap)
* set. If this is <=0, then that means that the passed-in
* map was not consumed and must be freed.
*/
-int __meminit sparse_add_one_section(struct pglist_data *pgdat,
+int __meminit sparse_add_one_section(int nid,
unsigned long start_pfn, struct vmem_altmap *altmap)
{
unsigned long section_nr = pfn_to_section_nr(start_pfn);
@@ -675,11 +675,11 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
* no locking for this, because it does its own
* plus, it does a kmalloc
*/
- ret = sparse_index_init(section_nr, pgdat->node_id);
+ ret = sparse_index_init(section_nr, nid);
if (ret < 0 && ret != -EEXIST)
return ret;
ret = 0;
- memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap);
+ memmap = kmalloc_section_memmap(section_nr, nid, altmap);
if (!memmap)
return -ENOMEM;
usemap = __kmalloc_section_usemap();
--
2.15.1
^ permalink raw reply related [flat|nested] 38+ messages in thread
* Re: [PATCH v3 2/2] mm, sparse: pass nid instead of pgdat to sparse_add_one_section()
2018-11-29 15:53 ` [PATCH v3 2/2] mm, sparse: pass nid instead of pgdat to sparse_add_one_section() Wei Yang
@ 2018-11-29 16:01 ` David Hildenbrand
2018-11-30 1:22 ` Wei Yang
2018-11-29 17:15 ` Michal Hocko
1 sibling, 1 reply; 38+ messages in thread
From: David Hildenbrand @ 2018-11-29 16:01 UTC (permalink / raw)
To: Wei Yang, mhocko, dave.hansen, osalvador; +Cc: akpm, linux-mm
On 29.11.18 16:53, Wei Yang wrote:
> Since the information needed in sparse_add_one_section() is node id to
> allocate proper memory, it is not necessary to pass its pgdat.
>
> This patch changes the prototype of sparse_add_one_section() to pass
> node id directly. This is intended to reduce misleading that
> sparse_add_one_section() would touch pgdat.
>
> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
> ---
> include/linux/memory_hotplug.h | 2 +-
> mm/memory_hotplug.c | 2 +-
> mm/sparse.c | 6 +++---
> 3 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 45a5affcab8a..3787d4e913e6 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -333,7 +333,7 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
> unsigned long nr_pages, struct vmem_altmap *altmap);
> extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
> extern bool is_memblock_offlined(struct memory_block *mem);
> -extern int sparse_add_one_section(struct pglist_data *pgdat,
> +extern int sparse_add_one_section(int nid,
> unsigned long start_pfn, struct vmem_altmap *altmap);
While you touch that, can you fixup the alignment of the other parameters?
Apart from that
Reviewed-by: David Hildenbrand <david@redhat.com>
> extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
> unsigned long map_offset, struct vmem_altmap *altmap);
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index f626e7e5f57b..5b3a3d7b4466 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -253,7 +253,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
> if (pfn_valid(phys_start_pfn))
> return -EEXIST;
>
> - ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap);
> + ret = sparse_add_one_section(nid, phys_start_pfn, altmap);
> if (ret < 0)
> return ret;
>
> diff --git a/mm/sparse.c b/mm/sparse.c
> index 5825f276485f..2472bf23278a 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -662,7 +662,7 @@ static void free_map_bootmem(struct page *memmap)
> * set. If this is <=0, then that means that the passed-in
> * map was not consumed and must be freed.
> */
> -int __meminit sparse_add_one_section(struct pglist_data *pgdat,
> +int __meminit sparse_add_one_section(int nid,
> unsigned long start_pfn, struct vmem_altmap *altmap)
> {
> unsigned long section_nr = pfn_to_section_nr(start_pfn);
> @@ -675,11 +675,11 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
> * no locking for this, because it does its own
> * plus, it does a kmalloc
> */
> - ret = sparse_index_init(section_nr, pgdat->node_id);
> + ret = sparse_index_init(section_nr, nid);
> if (ret < 0 && ret != -EEXIST)
> return ret;
> ret = 0;
> - memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap);
> + memmap = kmalloc_section_memmap(section_nr, nid, altmap);
> if (!memmap)
> return -ENOMEM;
> usemap = __kmalloc_section_usemap();
>
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v3 2/2] mm, sparse: pass nid instead of pgdat to sparse_add_one_section()
2018-11-29 16:01 ` David Hildenbrand
@ 2018-11-30 1:22 ` Wei Yang
2018-11-30 9:20 ` David Hildenbrand
0 siblings, 1 reply; 38+ messages in thread
From: Wei Yang @ 2018-11-30 1:22 UTC (permalink / raw)
To: David Hildenbrand
Cc: Wei Yang, mhocko, dave.hansen, osalvador, akpm, linux-mm
On Thu, Nov 29, 2018 at 05:01:51PM +0100, David Hildenbrand wrote:
>On 29.11.18 16:53, Wei Yang wrote:
>> Since the information needed in sparse_add_one_section() is node id to
>> allocate proper memory, it is not necessary to pass its pgdat.
>>
>> This patch changes the prototype of sparse_add_one_section() to pass
>> node id directly. This is intended to reduce misleading that
>> sparse_add_one_section() would touch pgdat.
>>
>> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
>> ---
>> include/linux/memory_hotplug.h | 2 +-
>> mm/memory_hotplug.c | 2 +-
>> mm/sparse.c | 6 +++---
>> 3 files changed, 5 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
>> index 45a5affcab8a..3787d4e913e6 100644
>> --- a/include/linux/memory_hotplug.h
>> +++ b/include/linux/memory_hotplug.h
>> @@ -333,7 +333,7 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
>> unsigned long nr_pages, struct vmem_altmap *altmap);
>> extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
>> extern bool is_memblock_offlined(struct memory_block *mem);
>> -extern int sparse_add_one_section(struct pglist_data *pgdat,
>> +extern int sparse_add_one_section(int nid,
>> unsigned long start_pfn, struct vmem_altmap *altmap);
>
>While you touch that, can you fixup the alignment of the other parameters?
>
If I am correct, the code style of alignment is like this?
extern int sparse_add_one_section(int nid, unsigned long start_pfn,
struct vmem_altmap *altmap);
>Apart from that
>
>Reviewed-by: David Hildenbrand <david@redhat.com>
>
--
Wei Yang
Help you, Help me
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v3 2/2] mm, sparse: pass nid instead of pgdat to sparse_add_one_section()
2018-11-30 1:22 ` Wei Yang
@ 2018-11-30 9:20 ` David Hildenbrand
0 siblings, 0 replies; 38+ messages in thread
From: David Hildenbrand @ 2018-11-30 9:20 UTC (permalink / raw)
To: Wei Yang; +Cc: mhocko, dave.hansen, osalvador, akpm, linux-mm
On 30.11.18 02:22, Wei Yang wrote:
> On Thu, Nov 29, 2018 at 05:01:51PM +0100, David Hildenbrand wrote:
>> On 29.11.18 16:53, Wei Yang wrote:
>>> Since the information needed in sparse_add_one_section() is node id to
>>> allocate proper memory, it is not necessary to pass its pgdat.
>>>
>>> This patch changes the prototype of sparse_add_one_section() to pass
>>> node id directly. This is intended to reduce misleading that
>>> sparse_add_one_section() would touch pgdat.
>>>
>>> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
>>> ---
>>> include/linux/memory_hotplug.h | 2 +-
>>> mm/memory_hotplug.c | 2 +-
>>> mm/sparse.c | 6 +++---
>>> 3 files changed, 5 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
>>> index 45a5affcab8a..3787d4e913e6 100644
>>> --- a/include/linux/memory_hotplug.h
>>> +++ b/include/linux/memory_hotplug.h
>>> @@ -333,7 +333,7 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
>>> unsigned long nr_pages, struct vmem_altmap *altmap);
>>> extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
>>> extern bool is_memblock_offlined(struct memory_block *mem);
>>> -extern int sparse_add_one_section(struct pglist_data *pgdat,
>>> +extern int sparse_add_one_section(int nid,
>>> unsigned long start_pfn, struct vmem_altmap *altmap);
>>
>> While you touch that, can you fixup the alignment of the other parameters?
>>
>
> If I am correct, the code style of alignment is like this?
>
> extern int sparse_add_one_section(int nid, unsigned long start_pfn,
> struct vmem_altmap *altmap);
Yes, all parameters should start at the same indentation. (some people
don't care and produce this "mess", I tend to care :) )
>
>> Apart from that
>>
>> Reviewed-by: David Hildenbrand <david@redhat.com>
>>
>
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v3 2/2] mm, sparse: pass nid instead of pgdat to sparse_add_one_section()
2018-11-29 15:53 ` [PATCH v3 2/2] mm, sparse: pass nid instead of pgdat to sparse_add_one_section() Wei Yang
2018-11-29 16:01 ` David Hildenbrand
@ 2018-11-29 17:15 ` Michal Hocko
2018-11-29 23:57 ` Wei Yang
1 sibling, 1 reply; 38+ messages in thread
From: Michal Hocko @ 2018-11-29 17:15 UTC (permalink / raw)
To: Wei Yang; +Cc: dave.hansen, osalvador, david, akpm, linux-mm
On Thu 29-11-18 23:53:16, Wei Yang wrote:
> Since the information needed in sparse_add_one_section() is node id to
> allocate proper memory, it is not necessary to pass its pgdat.
>
> This patch changes the prototype of sparse_add_one_section() to pass
> node id directly. This is intended to reduce misleading that
> sparse_add_one_section() would touch pgdat.
>
> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
>From a quick look, this looks ok.
Acked-by: Michal Hocko <mhocko@suse.com>
Thanks for splitting up it from the original patch.
> ---
> include/linux/memory_hotplug.h | 2 +-
> mm/memory_hotplug.c | 2 +-
> mm/sparse.c | 6 +++---
> 3 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 45a5affcab8a..3787d4e913e6 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -333,7 +333,7 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
> unsigned long nr_pages, struct vmem_altmap *altmap);
> extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
> extern bool is_memblock_offlined(struct memory_block *mem);
> -extern int sparse_add_one_section(struct pglist_data *pgdat,
> +extern int sparse_add_one_section(int nid,
> unsigned long start_pfn, struct vmem_altmap *altmap);
> extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
> unsigned long map_offset, struct vmem_altmap *altmap);
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index f626e7e5f57b..5b3a3d7b4466 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -253,7 +253,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
> if (pfn_valid(phys_start_pfn))
> return -EEXIST;
>
> - ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap);
> + ret = sparse_add_one_section(nid, phys_start_pfn, altmap);
> if (ret < 0)
> return ret;
>
> diff --git a/mm/sparse.c b/mm/sparse.c
> index 5825f276485f..2472bf23278a 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -662,7 +662,7 @@ static void free_map_bootmem(struct page *memmap)
> * set. If this is <=0, then that means that the passed-in
> * map was not consumed and must be freed.
> */
> -int __meminit sparse_add_one_section(struct pglist_data *pgdat,
> +int __meminit sparse_add_one_section(int nid,
> unsigned long start_pfn, struct vmem_altmap *altmap)
> {
> unsigned long section_nr = pfn_to_section_nr(start_pfn);
> @@ -675,11 +675,11 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
> * no locking for this, because it does its own
> * plus, it does a kmalloc
> */
> - ret = sparse_index_init(section_nr, pgdat->node_id);
> + ret = sparse_index_init(section_nr, nid);
> if (ret < 0 && ret != -EEXIST)
> return ret;
> ret = 0;
> - memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap);
> + memmap = kmalloc_section_memmap(section_nr, nid, altmap);
> if (!memmap)
> return -ENOMEM;
> usemap = __kmalloc_section_usemap();
> --
> 2.15.1
>
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v3 2/2] mm, sparse: pass nid instead of pgdat to sparse_add_one_section()
2018-11-29 17:15 ` Michal Hocko
@ 2018-11-29 23:57 ` Wei Yang
0 siblings, 0 replies; 38+ messages in thread
From: Wei Yang @ 2018-11-29 23:57 UTC (permalink / raw)
To: Michal Hocko; +Cc: Wei Yang, dave.hansen, osalvador, david, akpm, linux-mm
On Thu, Nov 29, 2018 at 06:15:42PM +0100, Michal Hocko wrote:
>On Thu 29-11-18 23:53:16, Wei Yang wrote:
>> Since the information needed in sparse_add_one_section() is node id to
>> allocate proper memory, it is not necessary to pass its pgdat.
>>
>> This patch changes the prototype of sparse_add_one_section() to pass
>> node id directly. This is intended to reduce misleading that
>> sparse_add_one_section() would touch pgdat.
>>
>> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
>
>>>From a quick look, this looks ok.
>
>Acked-by: Michal Hocko <mhocko@suse.com>
>
>Thanks for splitting up it from the original patch.
>
Thanks for your suggestion.
>> ---
>> include/linux/memory_hotplug.h | 2 +-
>> mm/memory_hotplug.c | 2 +-
>> mm/sparse.c | 6 +++---
>> 3 files changed, 5 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
>> index 45a5affcab8a..3787d4e913e6 100644
>> --- a/include/linux/memory_hotplug.h
>> +++ b/include/linux/memory_hotplug.h
>> @@ -333,7 +333,7 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
>> unsigned long nr_pages, struct vmem_altmap *altmap);
>> extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
>> extern bool is_memblock_offlined(struct memory_block *mem);
>> -extern int sparse_add_one_section(struct pglist_data *pgdat,
>> +extern int sparse_add_one_section(int nid,
>> unsigned long start_pfn, struct vmem_altmap *altmap);
>> extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
>> unsigned long map_offset, struct vmem_altmap *altmap);
>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>> index f626e7e5f57b..5b3a3d7b4466 100644
>> --- a/mm/memory_hotplug.c
>> +++ b/mm/memory_hotplug.c
>> @@ -253,7 +253,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
>> if (pfn_valid(phys_start_pfn))
>> return -EEXIST;
>>
>> - ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap);
>> + ret = sparse_add_one_section(nid, phys_start_pfn, altmap);
>> if (ret < 0)
>> return ret;
>>
>> diff --git a/mm/sparse.c b/mm/sparse.c
>> index 5825f276485f..2472bf23278a 100644
>> --- a/mm/sparse.c
>> +++ b/mm/sparse.c
>> @@ -662,7 +662,7 @@ static void free_map_bootmem(struct page *memmap)
>> * set. If this is <=0, then that means that the passed-in
>> * map was not consumed and must be freed.
>> */
>> -int __meminit sparse_add_one_section(struct pglist_data *pgdat,
>> +int __meminit sparse_add_one_section(int nid,
>> unsigned long start_pfn, struct vmem_altmap *altmap)
>> {
>> unsigned long section_nr = pfn_to_section_nr(start_pfn);
>> @@ -675,11 +675,11 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
>> * no locking for this, because it does its own
>> * plus, it does a kmalloc
>> */
>> - ret = sparse_index_init(section_nr, pgdat->node_id);
>> + ret = sparse_index_init(section_nr, nid);
>> if (ret < 0 && ret != -EEXIST)
>> return ret;
>> ret = 0;
>> - memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap);
>> + memmap = kmalloc_section_memmap(section_nr, nid, altmap);
>> if (!memmap)
>> return -ENOMEM;
>> usemap = __kmalloc_section_usemap();
>> --
>> 2.15.1
>>
>
>--
>Michal Hocko
>SUSE Labs
--
Wei Yang
Help you, Help me
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v3 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section()
2018-11-29 15:53 ` [PATCH v3 1/2] " Wei Yang
2018-11-29 15:53 ` [PATCH v3 2/2] mm, sparse: pass nid instead of pgdat to sparse_add_one_section() Wei Yang
@ 2018-11-29 16:06 ` David Hildenbrand
2018-11-29 17:17 ` Michal Hocko
2018-11-30 4:28 ` Wei Yang
2018-11-29 17:14 ` Michal Hocko
2018-12-04 8:56 ` [PATCH v4 " Wei Yang
3 siblings, 2 replies; 38+ messages in thread
From: David Hildenbrand @ 2018-11-29 16:06 UTC (permalink / raw)
To: Wei Yang, mhocko, dave.hansen, osalvador; +Cc: akpm, linux-mm
On 29.11.18 16:53, Wei Yang wrote:
> pgdat_resize_lock is used to protect pgdat's memory region information
> like: node_start_pfn, node_present_pages, etc. While in function
> sparse_add/remove_one_section(), pgdat_resize_lock is used to protect
> initialization/release of one mem_section. This looks not proper.
>
> Based on current implementation, even remove this lock, mem_section
> is still away from contention, because it is protected by global
> mem_hotpulg_lock.
s/mem_hotpulg_lock/mem_hotplug_lock/
>
> Following is the current call trace of sparse_add/remove_one_section()
>
> mem_hotplug_begin()
> arch_add_memory()
> add_pages()
> __add_pages()
> __add_section()
> sparse_add_one_section()
> mem_hotplug_done()
>
> mem_hotplug_begin()
> arch_remove_memory()
> __remove_pages()
> __remove_section()
> sparse_remove_one_section()
> mem_hotplug_done()
>
> The comment above the pgdat_resize_lock also mentions "Holding this will
> also guarantee that any pfn_valid() stays that way.", which is true with
> the current implementation and false after this patch. But current
> implementation doesn't meet this comment. There isn't any pfn walkers
> to take the lock so this looks like a relict from the past. This patch
> also removes this comment.
Should we start to document which lock is expected to protect what?
I suggest adding what you just found out to
Documentation/admin-guide/mm/memory-hotplug.rst "Locking Internals".
Maybe a new subsection for mem_hotplug_lock. And eventually also
pgdat_resize_lock.
>
> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
>
> ---
> v3:
> * adjust the changelog with the reason for this change
> * remove a comment for pgdat_resize_lock
> * separate the prototype change of sparse_add_one_section() to
> another one
> v2:
> * adjust changelog to show this procedure is serialized by global
> mem_hotplug_lock
> ---
> include/linux/mmzone.h | 2 --
> mm/sparse.c | 9 +--------
> 2 files changed, 1 insertion(+), 10 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 1bb749bee284..0a66085d7ced 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -638,8 +638,6 @@ typedef struct pglist_data {
> /*
> * Must be held any time you expect node_start_pfn,
> * node_present_pages, node_spanned_pages or nr_zones stay constant.
> - * Holding this will also guarantee that any pfn_valid() stays that
> - * way.
> *
> * pgdat_resize_lock() and pgdat_resize_unlock() are provided to
> * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG
> diff --git a/mm/sparse.c b/mm/sparse.c
> index 33307fc05c4d..5825f276485f 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -669,7 +669,6 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
> struct mem_section *ms;
> struct page *memmap;
> unsigned long *usemap;
> - unsigned long flags;
> int ret;
>
> /*
> @@ -689,8 +688,6 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
> return -ENOMEM;
> }
>
> - pgdat_resize_lock(pgdat, &flags);
> -
> ms = __pfn_to_section(start_pfn);
> if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
> ret = -EEXIST;
> @@ -707,7 +704,6 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
> sparse_init_one_section(ms, section_nr, memmap, usemap);
>
> out:
> - pgdat_resize_unlock(pgdat, &flags);
> if (ret < 0) {
> kfree(usemap);
> __kfree_section_memmap(memmap, altmap);
> @@ -769,10 +765,8 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
> unsigned long map_offset, struct vmem_altmap *altmap)
> {
> struct page *memmap = NULL;
> - unsigned long *usemap = NULL, flags;
> - struct pglist_data *pgdat = zone->zone_pgdat;
> + unsigned long *usemap = NULL;
>
> - pgdat_resize_lock(pgdat, &flags);
> if (ms->section_mem_map) {
> usemap = ms->pageblock_flags;
> memmap = sparse_decode_mem_map(ms->section_mem_map,
> @@ -780,7 +774,6 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
> ms->section_mem_map = 0;
> ms->pageblock_flags = NULL;
> }
> - pgdat_resize_unlock(pgdat, &flags);
>
> clear_hwpoisoned_pages(memmap + map_offset,
> PAGES_PER_SECTION - map_offset);
>
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v3 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section()
2018-11-29 16:06 ` [PATCH v3 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section() David Hildenbrand
@ 2018-11-29 17:17 ` Michal Hocko
2018-11-30 4:28 ` Wei Yang
1 sibling, 0 replies; 38+ messages in thread
From: Michal Hocko @ 2018-11-29 17:17 UTC (permalink / raw)
To: David Hildenbrand; +Cc: Wei Yang, dave.hansen, osalvador, akpm, linux-mm
On Thu 29-11-18 17:06:15, David Hildenbrand wrote:
> I suggest adding what you just found out to
[...]
> Documentation/admin-guide/mm/memory-hotplug.rst "Locking Internals".
> Maybe a new subsection for mem_hotplug_lock. And eventually also
> pgdat_resize_lock.
That would be really great! I guess I have suggested something like that
to Oscar already and he provided a highlevel overview.
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v3 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section()
2018-11-29 16:06 ` [PATCH v3 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section() David Hildenbrand
2018-11-29 17:17 ` Michal Hocko
@ 2018-11-30 4:28 ` Wei Yang
2018-11-30 9:19 ` David Hildenbrand
2018-12-03 11:25 ` David Hildenbrand
1 sibling, 2 replies; 38+ messages in thread
From: Wei Yang @ 2018-11-30 4:28 UTC (permalink / raw)
To: David Hildenbrand
Cc: Wei Yang, mhocko, dave.hansen, osalvador, akpm, linux-mm
On Thu, Nov 29, 2018 at 05:06:15PM +0100, David Hildenbrand wrote:
>On 29.11.18 16:53, Wei Yang wrote:
>> pgdat_resize_lock is used to protect pgdat's memory region information
>> like: node_start_pfn, node_present_pages, etc. While in function
>> sparse_add/remove_one_section(), pgdat_resize_lock is used to protect
>> initialization/release of one mem_section. This looks not proper.
>>
>> Based on current implementation, even remove this lock, mem_section
>> is still away from contention, because it is protected by global
>> mem_hotpulg_lock.
>
>s/mem_hotpulg_lock/mem_hotplug_lock/
>
>>
>> Following is the current call trace of sparse_add/remove_one_section()
>>
>> mem_hotplug_begin()
>> arch_add_memory()
>> add_pages()
>> __add_pages()
>> __add_section()
>> sparse_add_one_section()
>> mem_hotplug_done()
>>
>> mem_hotplug_begin()
>> arch_remove_memory()
>> __remove_pages()
>> __remove_section()
>> sparse_remove_one_section()
>> mem_hotplug_done()
>>
>> The comment above the pgdat_resize_lock also mentions "Holding this will
>> also guarantee that any pfn_valid() stays that way.", which is true with
>> the current implementation and false after this patch. But current
>> implementation doesn't meet this comment. There isn't any pfn walkers
>> to take the lock so this looks like a relict from the past. This patch
>> also removes this comment.
>
>Should we start to document which lock is expected to protect what?
>
>I suggest adding what you just found out to
>Documentation/admin-guide/mm/memory-hotplug.rst "Locking Internals".
>Maybe a new subsection for mem_hotplug_lock. And eventually also
>pgdat_resize_lock.
Well, I am not good at document writting. Below is my first trial. Look
forward your comments.
BTW, in case I would send a new version with this, would I put this into
a separate one or merge this into current one?
diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst
index 5c4432c96c4b..1548820a0762 100644
--- a/Documentation/admin-guide/mm/memory-hotplug.rst
+++ b/Documentation/admin-guide/mm/memory-hotplug.rst
@@ -396,6 +396,20 @@ Need more implementation yet....
Locking Internals
=================
+There are three locks involved in memory-hotplug, two global lock and one local
+lock:
+
+- device_hotplug_lock
+- mem_hotplug_lock
+- device_lock
+
+Currently, they are twisted together for all kinds of reasons. The following
+part is divded into device_hotplug_lock and mem_hotplug_lock parts
+respectively to describe those tricky situations.
+
+device_hotplug_lock
+---------------------
+
When adding/removing memory that uses memory block devices (i.e. ordinary RAM),
the device_hotplug_lock should be held to:
@@ -417,14 +431,21 @@ memory faster than expected:
As the device is visible to user space before taking the device_lock(), this
can result in a lock inversion.
+mem_hotplug_lock
+---------------------
+
onlining/offlining of memory should be done via device_online()/
-device_offline() - to make sure it is properly synchronized to actions
-via sysfs. Holding device_hotplug_lock is advised (to e.g. protect online_type)
+device_offline() - to make sure it is properly synchronized to actions via
+sysfs. Even mem_hotplug_lock is used to protect the process, because of the
+lock inversion described above, holding device_hotplug_lock is still advised
+(to e.g. protect online_type)
When adding/removing/onlining/offlining memory or adding/removing
heterogeneous/device memory, we should always hold the mem_hotplug_lock in
write mode to serialise memory hotplug (e.g. access to global/zone
-variables).
+variables). Currently, we take advantage of this to serialise sparsemem's
+mem_section handling in sparse_add_one_section() and
+sparse_remove_one_section().
In addition, mem_hotplug_lock (in contrast to device_hotplug_lock) in read
mode allows for a quite efficient get_online_mems/put_online_mems
>
>
>Thanks,
>
>David / dhildenb
--
Wei Yang
Help you, Help me
^ permalink raw reply related [flat|nested] 38+ messages in thread
* Re: [PATCH v3 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section()
2018-11-30 4:28 ` Wei Yang
@ 2018-11-30 9:19 ` David Hildenbrand
2018-11-30 9:52 ` Michal Hocko
2018-12-01 0:31 ` Wei Yang
2018-12-03 11:25 ` David Hildenbrand
1 sibling, 2 replies; 38+ messages in thread
From: David Hildenbrand @ 2018-11-30 9:19 UTC (permalink / raw)
To: Wei Yang; +Cc: mhocko, dave.hansen, osalvador, akpm, linux-mm
>> I suggest adding what you just found out to
>> Documentation/admin-guide/mm/memory-hotplug.rst "Locking Internals".
>> Maybe a new subsection for mem_hotplug_lock. And eventually also
>> pgdat_resize_lock.
>
> Well, I am not good at document writting. Below is my first trial. Look
> forward your comments.
I'll have a look, maybe also Oscar and Michal can have a look. I guess
we don't have to cover all now, we can add more details as we discover them.
>
> BTW, in case I would send a new version with this, would I put this into
> a separate one or merge this into current one?
I would put this into a separate patch.
>
> diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst
> index 5c4432c96c4b..1548820a0762 100644
> --- a/Documentation/admin-guide/mm/memory-hotplug.rst
> +++ b/Documentation/admin-guide/mm/memory-hotplug.rst
> @@ -396,6 +396,20 @@ Need more implementation yet....
> Locking Internals
> =================
>
> +There are three locks involved in memory-hotplug, two global lock and one local
> +lock:
> +
> +- device_hotplug_lock
> +- mem_hotplug_lock
> +- device_lock
> +
> +Currently, they are twisted together for all kinds of reasons. The following
> +part is divded into device_hotplug_lock and mem_hotplug_lock parts
s/divded/divided/
> +respectively to describe those tricky situations.
> +
> +device_hotplug_lock
> +---------------------
> +
> When adding/removing memory that uses memory block devices (i.e. ordinary RAM),
> the device_hotplug_lock should be held to:
>
> @@ -417,14 +431,21 @@ memory faster than expected:
> As the device is visible to user space before taking the device_lock(), this
> can result in a lock inversion.
>
> +mem_hotplug_lock
> +---------------------
> +
I would this section start after the following paragraph, as most of
that paragraph belongs to the device_hotplug_lock.
> onlining/offlining of memory should be done via device_online()/
> -device_offline() - to make sure it is properly synchronized to actions
> -via sysfs. Holding device_hotplug_lock is advised (to e.g. protect online_type)
> +device_offline() - to make sure it is properly synchronized to actions via
> +sysfs. Even mem_hotplug_lock is used to protect the process, because of the
> +lock inversion described above, holding device_hotplug_lock is still advised
> +(to e.g. protect online_type)
>
> When adding/removing/onlining/offlining memory or adding/removing
> heterogeneous/device memory, we should always hold the mem_hotplug_lock in
> write mode to serialise memory hotplug (e.g. access to global/zone
> -variables).
> +variables). Currently, we take advantage of this to serialise sparsemem's
> +mem_section handling in sparse_add_one_section() and
> +sparse_remove_one_section().
>
> In addition, mem_hotplug_lock (in contrast to device_hotplug_lock) in read
> mode allows for a quite efficient get_online_mems/put_online_mems
>
>>
>>
>> Thanks,
>>
>> David / dhildenb
>
Apart from that looks good to me, thanks!
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v3 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section()
2018-11-30 9:19 ` David Hildenbrand
@ 2018-11-30 9:52 ` Michal Hocko
2018-12-04 8:53 ` Wei Yang
2018-12-01 0:31 ` Wei Yang
1 sibling, 1 reply; 38+ messages in thread
From: Michal Hocko @ 2018-11-30 9:52 UTC (permalink / raw)
To: osalvador; +Cc: Wei Yang, dave.hansen, David Hildenbrand, akpm, linux-mm
On Fri 30-11-18 10:19:13, David Hildenbrand wrote:
> >> I suggest adding what you just found out to
> >> Documentation/admin-guide/mm/memory-hotplug.rst "Locking Internals".
> >> Maybe a new subsection for mem_hotplug_lock. And eventually also
> >> pgdat_resize_lock.
> >
> > Well, I am not good at document writting. Below is my first trial. Look
> > forward your comments.
>
> I'll have a look, maybe also Oscar and Michal can have a look. I guess
> we don't have to cover all now, we can add more details as we discover them.
Oscar, didn't you have something already?
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v3 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section()
2018-11-30 9:52 ` Michal Hocko
@ 2018-12-04 8:53 ` Wei Yang
0 siblings, 0 replies; 38+ messages in thread
From: Wei Yang @ 2018-12-04 8:53 UTC (permalink / raw)
To: Michal Hocko
Cc: osalvador, Wei Yang, dave.hansen, David Hildenbrand, akpm, linux-mm
On Fri, Nov 30, 2018 at 10:52:30AM +0100, Michal Hocko wrote:
>On Fri 30-11-18 10:19:13, David Hildenbrand wrote:
>> >> I suggest adding what you just found out to
>> >> Documentation/admin-guide/mm/memory-hotplug.rst "Locking Internals".
>> >> Maybe a new subsection for mem_hotplug_lock. And eventually also
>> >> pgdat_resize_lock.
>> >
>> > Well, I am not good at document writting. Below is my first trial. Look
>> > forward your comments.
>>
>> I'll have a look, maybe also Oscar and Michal can have a look. I guess
>> we don't have to cover all now, we can add more details as we discover them.
>
>Oscar, didn't you have something already?
Since we prefer to address the document in a separate patch, I will send
out v4 with changes suggested from Michal and David first.
>--
>Michal Hocko
>SUSE Labs
--
Wei Yang
Help you, Help me
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v3 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section()
2018-11-30 9:19 ` David Hildenbrand
2018-11-30 9:52 ` Michal Hocko
@ 2018-12-01 0:31 ` Wei Yang
1 sibling, 0 replies; 38+ messages in thread
From: Wei Yang @ 2018-12-01 0:31 UTC (permalink / raw)
To: David Hildenbrand
Cc: Wei Yang, mhocko, dave.hansen, osalvador, akpm, linux-mm
On Fri, Nov 30, 2018 at 10:19:13AM +0100, David Hildenbrand wrote:
>>> I suggest adding what you just found out to
>>> Documentation/admin-guide/mm/memory-hotplug.rst "Locking Internals".
>>> Maybe a new subsection for mem_hotplug_lock. And eventually also
>>> pgdat_resize_lock.
>>
>> Well, I am not good at document writting. Below is my first trial. Look
>> forward your comments.
>
>I'll have a look, maybe also Oscar and Michal can have a look. I guess
>we don't have to cover all now, we can add more details as we discover them.
>
>>
>> BTW, in case I would send a new version with this, would I put this into
>> a separate one or merge this into current one?
>
>I would put this into a separate patch.
>
>>
>> diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst
>> index 5c4432c96c4b..1548820a0762 100644
>> --- a/Documentation/admin-guide/mm/memory-hotplug.rst
>> +++ b/Documentation/admin-guide/mm/memory-hotplug.rst
>> @@ -396,6 +396,20 @@ Need more implementation yet....
>> Locking Internals
>> =================
>>
>> +There are three locks involved in memory-hotplug, two global lock and one local
>> +lock:
>> +
>> +- device_hotplug_lock
>> +- mem_hotplug_lock
>> +- device_lock
>> +
>> +Currently, they are twisted together for all kinds of reasons. The following
>> +part is divded into device_hotplug_lock and mem_hotplug_lock parts
>
>s/divded/divided/
>
>> +respectively to describe those tricky situations.
>> +
>> +device_hotplug_lock
>> +---------------------
>> +
>> When adding/removing memory that uses memory block devices (i.e. ordinary RAM),
>> the device_hotplug_lock should be held to:
>>
>> @@ -417,14 +431,21 @@ memory faster than expected:
>> As the device is visible to user space before taking the device_lock(), this
>> can result in a lock inversion.
>>
>> +mem_hotplug_lock
>> +---------------------
>> +
>
>I would this section start after the following paragraph, as most of
>that paragraph belongs to the device_hotplug_lock.
>
>
>> onlining/offlining of memory should be done via device_online()/
>> -device_offline() - to make sure it is properly synchronized to actions
>> -via sysfs. Holding device_hotplug_lock is advised (to e.g. protect online_type)
>> +device_offline() - to make sure it is properly synchronized to actions via
>> +sysfs. Even mem_hotplug_lock is used to protect the process, because of the
>> +lock inversion described above, holding device_hotplug_lock is still advised
>> +(to e.g. protect online_type)
>>
>> When adding/removing/onlining/offlining memory or adding/removing
>> heterogeneous/device memory, we should always hold the mem_hotplug_lock in
>> write mode to serialise memory hotplug (e.g. access to global/zone
>> -variables).
>> +variables). Currently, we take advantage of this to serialise sparsemem's
>> +mem_section handling in sparse_add_one_section() and
>> +sparse_remove_one_section().
>>
>> In addition, mem_hotplug_lock (in contrast to device_hotplug_lock) in read
>> mode allows for a quite efficient get_online_mems/put_online_mems
>>
>>>
>>>
>>> Thanks,
>>>
>>> David / dhildenb
>>
>
>Apart from that looks good to me, thanks!
>
Thanks :-)
>
>--
>
>Thanks,
>
>David / dhildenb
--
Wei Yang
Help you, Help me
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v3 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section()
2018-11-30 4:28 ` Wei Yang
2018-11-30 9:19 ` David Hildenbrand
@ 2018-12-03 11:25 ` David Hildenbrand
2018-12-03 21:06 ` Wei Yang
1 sibling, 1 reply; 38+ messages in thread
From: David Hildenbrand @ 2018-12-03 11:25 UTC (permalink / raw)
To: Wei Yang; +Cc: mhocko, dave.hansen, osalvador, akpm, linux-mm
On 30.11.18 05:28, Wei Yang wrote:
> On Thu, Nov 29, 2018 at 05:06:15PM +0100, David Hildenbrand wrote:
>> On 29.11.18 16:53, Wei Yang wrote:
>>> pgdat_resize_lock is used to protect pgdat's memory region information
>>> like: node_start_pfn, node_present_pages, etc. While in function
>>> sparse_add/remove_one_section(), pgdat_resize_lock is used to protect
>>> initialization/release of one mem_section. This looks not proper.
>>>
>>> Based on current implementation, even remove this lock, mem_section
>>> is still away from contention, because it is protected by global
>>> mem_hotpulg_lock.
>>
>> s/mem_hotpulg_lock/mem_hotplug_lock/
>>
>>>
>>> Following is the current call trace of sparse_add/remove_one_section()
>>>
>>> mem_hotplug_begin()
>>> arch_add_memory()
>>> add_pages()
>>> __add_pages()
>>> __add_section()
>>> sparse_add_one_section()
>>> mem_hotplug_done()
>>>
>>> mem_hotplug_begin()
>>> arch_remove_memory()
>>> __remove_pages()
>>> __remove_section()
>>> sparse_remove_one_section()
>>> mem_hotplug_done()
>>>
>>> The comment above the pgdat_resize_lock also mentions "Holding this will
>>> also guarantee that any pfn_valid() stays that way.", which is true with
>>> the current implementation and false after this patch. But current
>>> implementation doesn't meet this comment. There isn't any pfn walkers
>>> to take the lock so this looks like a relict from the past. This patch
>>> also removes this comment.
>>
>> Should we start to document which lock is expected to protect what?
>>
>> I suggest adding what you just found out to
>> Documentation/admin-guide/mm/memory-hotplug.rst "Locking Internals".
>> Maybe a new subsection for mem_hotplug_lock. And eventually also
>> pgdat_resize_lock.
>
> Well, I am not good at document writting. Below is my first trial. Look
> forward your comments.
>
> BTW, in case I would send a new version with this, would I put this into
> a separate one or merge this into current one?
>
> diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst
> index 5c4432c96c4b..1548820a0762 100644
> --- a/Documentation/admin-guide/mm/memory-hotplug.rst
> +++ b/Documentation/admin-guide/mm/memory-hotplug.rst
BTW, it really should go into
Documentation/core-api/memory-hotplug.rst
Something got wrong while merging this in linux-next, so now we have
duplicate documentation and the one in
Documentation/admin-guide/mm/memory-hotplug.rst about locking internals
has to go.
> @@ -396,6 +396,20 @@ Need more implementation yet....
> Locking Internals
> =================
>
> +There are three locks involved in memory-hotplug, two global lock and one local
> +lock:
> +
> +- device_hotplug_lock
> +- mem_hotplug_lock
> +- device_lock
> +
> +Currently, they are twisted together for all kinds of reasons. The following
> +part is divded into device_hotplug_lock and mem_hotplug_lock parts
> +respectively to describe those tricky situations.
> +
> +device_hotplug_lock
> +---------------------
> +
> When adding/removing memory that uses memory block devices (i.e. ordinary RAM),
> the device_hotplug_lock should be held to:
>
> @@ -417,14 +431,21 @@ memory faster than expected:
> As the device is visible to user space before taking the device_lock(), this
> can result in a lock inversion.
>
> +mem_hotplug_lock
> +---------------------
> +
> onlining/offlining of memory should be done via device_online()/
> -device_offline() - to make sure it is properly synchronized to actions
> -via sysfs. Holding device_hotplug_lock is advised (to e.g. protect online_type)
> +device_offline() - to make sure it is properly synchronized to actions via
> +sysfs. Even mem_hotplug_lock is used to protect the process, because of the
> +lock inversion described above, holding device_hotplug_lock is still advised
> +(to e.g. protect online_type)
>
> When adding/removing/onlining/offlining memory or adding/removing
> heterogeneous/device memory, we should always hold the mem_hotplug_lock in
> write mode to serialise memory hotplug (e.g. access to global/zone
> -variables).
> +variables). Currently, we take advantage of this to serialise sparsemem's
> +mem_section handling in sparse_add_one_section() and
> +sparse_remove_one_section().
>
> In addition, mem_hotplug_lock (in contrast to device_hotplug_lock) in read
> mode allows for a quite efficient get_online_mems/put_online_mems
>
>>
>>
>> Thanks,
>>
>> David / dhildenb
>
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v3 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section()
2018-12-03 11:25 ` David Hildenbrand
@ 2018-12-03 21:06 ` Wei Yang
0 siblings, 0 replies; 38+ messages in thread
From: Wei Yang @ 2018-12-03 21:06 UTC (permalink / raw)
To: David Hildenbrand
Cc: Wei Yang, mhocko, dave.hansen, osalvador, akpm, linux-mm
On Mon, Dec 03, 2018 at 12:25:20PM +0100, David Hildenbrand wrote:
>On 30.11.18 05:28, Wei Yang wrote:
>> On Thu, Nov 29, 2018 at 05:06:15PM +0100, David Hildenbrand wrote:
>>> On 29.11.18 16:53, Wei Yang wrote:
>>>> pgdat_resize_lock is used to protect pgdat's memory region information
>>>> like: node_start_pfn, node_present_pages, etc. While in function
>>>> sparse_add/remove_one_section(), pgdat_resize_lock is used to protect
>>>> initialization/release of one mem_section. This looks not proper.
>>>>
>>>> Based on current implementation, even remove this lock, mem_section
>>>> is still away from contention, because it is protected by global
>>>> mem_hotpulg_lock.
>>>
>>> s/mem_hotpulg_lock/mem_hotplug_lock/
>>>
>>>>
>>>> Following is the current call trace of sparse_add/remove_one_section()
>>>>
>>>> mem_hotplug_begin()
>>>> arch_add_memory()
>>>> add_pages()
>>>> __add_pages()
>>>> __add_section()
>>>> sparse_add_one_section()
>>>> mem_hotplug_done()
>>>>
>>>> mem_hotplug_begin()
>>>> arch_remove_memory()
>>>> __remove_pages()
>>>> __remove_section()
>>>> sparse_remove_one_section()
>>>> mem_hotplug_done()
>>>>
>>>> The comment above the pgdat_resize_lock also mentions "Holding this will
>>>> also guarantee that any pfn_valid() stays that way.", which is true with
>>>> the current implementation and false after this patch. But current
>>>> implementation doesn't meet this comment. There isn't any pfn walkers
>>>> to take the lock so this looks like a relict from the past. This patch
>>>> also removes this comment.
>>>
>>> Should we start to document which lock is expected to protect what?
>>>
>>> I suggest adding what you just found out to
>>> Documentation/admin-guide/mm/memory-hotplug.rst "Locking Internals".
>>> Maybe a new subsection for mem_hotplug_lock. And eventually also
>>> pgdat_resize_lock.
>>
>> Well, I am not good at document writting. Below is my first trial. Look
>> forward your comments.
>>
>> BTW, in case I would send a new version with this, would I put this into
>> a separate one or merge this into current one?
>>
>> diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst
>> index 5c4432c96c4b..1548820a0762 100644
>> --- a/Documentation/admin-guide/mm/memory-hotplug.rst
>> +++ b/Documentation/admin-guide/mm/memory-hotplug.rst
>
>BTW, it really should go into
>
>Documentation/core-api/memory-hotplug.rst
>
>Something got wrong while merging this in linux-next, so now we have
>duplicate documentation and the one in
>Documentation/admin-guide/mm/memory-hotplug.rst about locking internals
>has to go.
>
Sounds reasonable.
Admin may not necessary need to understand the internal locking.
--
Wei Yang
Help you, Help me
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v3 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section()
2018-11-29 15:53 ` [PATCH v3 1/2] " Wei Yang
2018-11-29 15:53 ` [PATCH v3 2/2] mm, sparse: pass nid instead of pgdat to sparse_add_one_section() Wei Yang
2018-11-29 16:06 ` [PATCH v3 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section() David Hildenbrand
@ 2018-11-29 17:14 ` Michal Hocko
2018-12-04 8:56 ` [PATCH v4 " Wei Yang
3 siblings, 0 replies; 38+ messages in thread
From: Michal Hocko @ 2018-11-29 17:14 UTC (permalink / raw)
To: Wei Yang; +Cc: dave.hansen, osalvador, david, akpm, linux-mm
On Thu 29-11-18 23:53:15, Wei Yang wrote:
> pgdat_resize_lock is used to protect pgdat's memory region information
> like: node_start_pfn, node_present_pages, etc. While in function
> sparse_add/remove_one_section(), pgdat_resize_lock is used to protect
> initialization/release of one mem_section. This looks not proper.
>
> Based on current implementation, even remove this lock, mem_section
> is still away from contention, because it is protected by global
> mem_hotpulg_lock.
I guess you wanted to say.
These code paths are currently protected by mem_hotpulg_lock currently
but should there ever be any reason for locking at the sparse layer a
dedicated lock should be introduced.
>
> Following is the current call trace of sparse_add/remove_one_section()
>
> mem_hotplug_begin()
> arch_add_memory()
> add_pages()
> __add_pages()
> __add_section()
> sparse_add_one_section()
> mem_hotplug_done()
>
> mem_hotplug_begin()
> arch_remove_memory()
> __remove_pages()
> __remove_section()
> sparse_remove_one_section()
> mem_hotplug_done()
>
> The comment above the pgdat_resize_lock also mentions "Holding this will
> also guarantee that any pfn_valid() stays that way.", which is true with
> the current implementation and false after this patch. But current
> implementation doesn't meet this comment. There isn't any pfn walkers
> to take the lock so this looks like a relict from the past. This patch
> also removes this comment.
>
> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Other than that
Acked-by: Michal Hocko <mhocko@suse.com>
>
> ---
> v3:
> * adjust the changelog with the reason for this change
> * remove a comment for pgdat_resize_lock
> * separate the prototype change of sparse_add_one_section() to
> another one
> v2:
> * adjust changelog to show this procedure is serialized by global
> mem_hotplug_lock
> ---
> include/linux/mmzone.h | 2 --
> mm/sparse.c | 9 +--------
> 2 files changed, 1 insertion(+), 10 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 1bb749bee284..0a66085d7ced 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -638,8 +638,6 @@ typedef struct pglist_data {
> /*
> * Must be held any time you expect node_start_pfn,
> * node_present_pages, node_spanned_pages or nr_zones stay constant.
> - * Holding this will also guarantee that any pfn_valid() stays that
> - * way.
> *
> * pgdat_resize_lock() and pgdat_resize_unlock() are provided to
> * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG
> diff --git a/mm/sparse.c b/mm/sparse.c
> index 33307fc05c4d..5825f276485f 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -669,7 +669,6 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
> struct mem_section *ms;
> struct page *memmap;
> unsigned long *usemap;
> - unsigned long flags;
> int ret;
>
> /*
> @@ -689,8 +688,6 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
> return -ENOMEM;
> }
>
> - pgdat_resize_lock(pgdat, &flags);
> -
> ms = __pfn_to_section(start_pfn);
> if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
> ret = -EEXIST;
> @@ -707,7 +704,6 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
> sparse_init_one_section(ms, section_nr, memmap, usemap);
>
> out:
> - pgdat_resize_unlock(pgdat, &flags);
> if (ret < 0) {
> kfree(usemap);
> __kfree_section_memmap(memmap, altmap);
> @@ -769,10 +765,8 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
> unsigned long map_offset, struct vmem_altmap *altmap)
> {
> struct page *memmap = NULL;
> - unsigned long *usemap = NULL, flags;
> - struct pglist_data *pgdat = zone->zone_pgdat;
> + unsigned long *usemap = NULL;
>
> - pgdat_resize_lock(pgdat, &flags);
> if (ms->section_mem_map) {
> usemap = ms->pageblock_flags;
> memmap = sparse_decode_mem_map(ms->section_mem_map,
> @@ -780,7 +774,6 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
> ms->section_mem_map = 0;
> ms->pageblock_flags = NULL;
> }
> - pgdat_resize_unlock(pgdat, &flags);
>
> clear_hwpoisoned_pages(memmap + map_offset,
> PAGES_PER_SECTION - map_offset);
> --
> 2.15.1
>
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v4 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section()
2018-11-29 15:53 ` [PATCH v3 1/2] " Wei Yang
` (2 preceding siblings ...)
2018-11-29 17:14 ` Michal Hocko
@ 2018-12-04 8:56 ` Wei Yang
2018-12-04 8:56 ` [PATCH v4 2/2] mm, sparse: pass nid instead of pgdat to sparse_add_one_section() Wei Yang
2018-12-04 9:24 ` [PATCH v4 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section() David Hildenbrand
3 siblings, 2 replies; 38+ messages in thread
From: Wei Yang @ 2018-12-04 8:56 UTC (permalink / raw)
To: mhocko, dave.hansen, osalvador, david; +Cc: akpm, linux-mm, Wei Yang
pgdat_resize_lock is used to protect pgdat's memory region information
like: node_start_pfn, node_present_pages, etc. While in function
sparse_add/remove_one_section(), pgdat_resize_lock is used to protect
initialization/release of one mem_section. This looks not proper.
These code paths are currently protected by mem_hotplug_lock currently
but should there ever be any reason for locking at the sparse layer a
dedicated lock should be introduced.
Following is the current call trace of sparse_add/remove_one_section()
mem_hotplug_begin()
arch_add_memory()
add_pages()
__add_pages()
__add_section()
sparse_add_one_section()
mem_hotplug_done()
mem_hotplug_begin()
arch_remove_memory()
__remove_pages()
__remove_section()
sparse_remove_one_section()
mem_hotplug_done()
The comment above the pgdat_resize_lock also mentions "Holding this will
also guarantee that any pfn_valid() stays that way.", which is true with
the current implementation and false after this patch. But current
implementation doesn't meet this comment. There isn't any pfn walkers
to take the lock so this looks like a relict from the past. This patch
also removes this comment.
[Michal: changelog suggestion]
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.com>
---
v4:
* fix typo in changelog
* adjust second paragraph of changelog
v3:
* adjust the changelog with the reason for this change
* remove a comment for pgdat_resize_lock
* separate the prototype change of sparse_add_one_section() to
another one
v2:
* adjust changelog to show this procedure is serialized by global
mem_hotplug_lock
---
include/linux/mmzone.h | 2 --
mm/sparse.c | 9 +--------
2 files changed, 1 insertion(+), 10 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d76177cb8436..be126113b499 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -639,8 +639,6 @@ typedef struct pglist_data {
/*
* Must be held any time you expect node_start_pfn,
* node_present_pages, node_spanned_pages or nr_zones stay constant.
- * Holding this will also guarantee that any pfn_valid() stays that
- * way.
*
* pgdat_resize_lock() and pgdat_resize_unlock() are provided to
* manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG
diff --git a/mm/sparse.c b/mm/sparse.c
index 33307fc05c4d..5825f276485f 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -669,7 +669,6 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
struct mem_section *ms;
struct page *memmap;
unsigned long *usemap;
- unsigned long flags;
int ret;
/*
@@ -689,8 +688,6 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
return -ENOMEM;
}
- pgdat_resize_lock(pgdat, &flags);
-
ms = __pfn_to_section(start_pfn);
if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
ret = -EEXIST;
@@ -707,7 +704,6 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
sparse_init_one_section(ms, section_nr, memmap, usemap);
out:
- pgdat_resize_unlock(pgdat, &flags);
if (ret < 0) {
kfree(usemap);
__kfree_section_memmap(memmap, altmap);
@@ -769,10 +765,8 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
unsigned long map_offset, struct vmem_altmap *altmap)
{
struct page *memmap = NULL;
- unsigned long *usemap = NULL, flags;
- struct pglist_data *pgdat = zone->zone_pgdat;
+ unsigned long *usemap = NULL;
- pgdat_resize_lock(pgdat, &flags);
if (ms->section_mem_map) {
usemap = ms->pageblock_flags;
memmap = sparse_decode_mem_map(ms->section_mem_map,
@@ -780,7 +774,6 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
ms->section_mem_map = 0;
ms->pageblock_flags = NULL;
}
- pgdat_resize_unlock(pgdat, &flags);
clear_hwpoisoned_pages(memmap + map_offset,
PAGES_PER_SECTION - map_offset);
--
2.15.1
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v4 2/2] mm, sparse: pass nid instead of pgdat to sparse_add_one_section()
2018-12-04 8:56 ` [PATCH v4 " Wei Yang
@ 2018-12-04 8:56 ` Wei Yang
2018-12-04 9:24 ` [PATCH v4 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section() David Hildenbrand
1 sibling, 0 replies; 38+ messages in thread
From: Wei Yang @ 2018-12-04 8:56 UTC (permalink / raw)
To: mhocko, dave.hansen, osalvador, david; +Cc: akpm, linux-mm, Wei Yang
Since the information needed in sparse_add_one_section() is node id to
allocate proper memory, it is not necessary to pass its pgdat.
This patch changes the prototype of sparse_add_one_section() to pass
node id directly. This is intended to reduce misleading that
sparse_add_one_section() would touch pgdat.
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
---
* adjust parameter alignment
---
include/linux/memory_hotplug.h | 4 ++--
mm/memory_hotplug.c | 2 +-
mm/sparse.c | 8 ++++----
3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 45a5affcab8a..b81cc29482d8 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -333,8 +333,8 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages, struct vmem_altmap *altmap);
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
extern bool is_memblock_offlined(struct memory_block *mem);
-extern int sparse_add_one_section(struct pglist_data *pgdat,
- unsigned long start_pfn, struct vmem_altmap *altmap);
+extern int sparse_add_one_section(int nid, unsigned long start_pfn,
+ struct vmem_altmap *altmap);
extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
unsigned long map_offset, struct vmem_altmap *altmap);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index f626e7e5f57b..5b3a3d7b4466 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -253,7 +253,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
if (pfn_valid(phys_start_pfn))
return -EEXIST;
- ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap);
+ ret = sparse_add_one_section(nid, phys_start_pfn, altmap);
if (ret < 0)
return ret;
diff --git a/mm/sparse.c b/mm/sparse.c
index 5825f276485f..a4fdbcb21514 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -662,8 +662,8 @@ static void free_map_bootmem(struct page *memmap)
* set. If this is <=0, then that means that the passed-in
* map was not consumed and must be freed.
*/
-int __meminit sparse_add_one_section(struct pglist_data *pgdat,
- unsigned long start_pfn, struct vmem_altmap *altmap)
+int __meminit sparse_add_one_section(int nid, unsigned long start_pfn,
+ struct vmem_altmap *altmap)
{
unsigned long section_nr = pfn_to_section_nr(start_pfn);
struct mem_section *ms;
@@ -675,11 +675,11 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
* no locking for this, because it does its own
* plus, it does a kmalloc
*/
- ret = sparse_index_init(section_nr, pgdat->node_id);
+ ret = sparse_index_init(section_nr, nid);
if (ret < 0 && ret != -EEXIST)
return ret;
ret = 0;
- memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap);
+ memmap = kmalloc_section_memmap(section_nr, nid, altmap);
if (!memmap)
return -ENOMEM;
usemap = __kmalloc_section_usemap();
--
2.15.1
^ permalink raw reply related [flat|nested] 38+ messages in thread
* Re: [PATCH v4 1/2] mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section()
2018-12-04 8:56 ` [PATCH v4 " Wei Yang
2018-12-04 8:56 ` [PATCH v4 2/2] mm, sparse: pass nid instead of pgdat to sparse_add_one_section() Wei Yang
@ 2018-12-04 9:24 ` David Hildenbrand
1 sibling, 0 replies; 38+ messages in thread
From: David Hildenbrand @ 2018-12-04 9:24 UTC (permalink / raw)
To: Wei Yang, mhocko, dave.hansen, osalvador; +Cc: akpm, linux-mm
On 04.12.18 09:56, Wei Yang wrote:
> pgdat_resize_lock is used to protect pgdat's memory region information
> like: node_start_pfn, node_present_pages, etc. While in function
> sparse_add/remove_one_section(), pgdat_resize_lock is used to protect
> initialization/release of one mem_section. This looks not proper.
>
> These code paths are currently protected by mem_hotplug_lock currently
> but should there ever be any reason for locking at the sparse layer a
> dedicated lock should be introduced.
>
> Following is the current call trace of sparse_add/remove_one_section()
>
> mem_hotplug_begin()
> arch_add_memory()
> add_pages()
> __add_pages()
> __add_section()
> sparse_add_one_section()
> mem_hotplug_done()
>
> mem_hotplug_begin()
> arch_remove_memory()
> __remove_pages()
> __remove_section()
> sparse_remove_one_section()
> mem_hotplug_done()
>
> The comment above the pgdat_resize_lock also mentions "Holding this will
> also guarantee that any pfn_valid() stays that way.", which is true with
> the current implementation and false after this patch. But current
> implementation doesn't meet this comment. There isn't any pfn walkers
> to take the lock so this looks like a relict from the past. This patch
> also removes this comment.
>
> [Michal: changelog suggestion]
>
> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
> Acked-by: Michal Hocko <mhocko@suse.com>
>
> ---
> v4:
> * fix typo in changelog
> * adjust second paragraph of changelog
> v3:
> * adjust the changelog with the reason for this change
> * remove a comment for pgdat_resize_lock
> * separate the prototype change of sparse_add_one_section() to
> another one
> v2:
> * adjust changelog to show this procedure is serialized by global
> mem_hotplug_lock
> ---
> include/linux/mmzone.h | 2 --
> mm/sparse.c | 9 +--------
> 2 files changed, 1 insertion(+), 10 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index d76177cb8436..be126113b499 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -639,8 +639,6 @@ typedef struct pglist_data {
> /*
> * Must be held any time you expect node_start_pfn,
> * node_present_pages, node_spanned_pages or nr_zones stay constant.
> - * Holding this will also guarantee that any pfn_valid() stays that
> - * way.
> *
> * pgdat_resize_lock() and pgdat_resize_unlock() are provided to
> * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG
> diff --git a/mm/sparse.c b/mm/sparse.c
> index 33307fc05c4d..5825f276485f 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -669,7 +669,6 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
> struct mem_section *ms;
> struct page *memmap;
> unsigned long *usemap;
> - unsigned long flags;
> int ret;
>
> /*
> @@ -689,8 +688,6 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
> return -ENOMEM;
> }
>
> - pgdat_resize_lock(pgdat, &flags);
> -
> ms = __pfn_to_section(start_pfn);
> if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
> ret = -EEXIST;
> @@ -707,7 +704,6 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
> sparse_init_one_section(ms, section_nr, memmap, usemap);
>
> out:
> - pgdat_resize_unlock(pgdat, &flags);
> if (ret < 0) {
> kfree(usemap);
> __kfree_section_memmap(memmap, altmap);
> @@ -769,10 +765,8 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
> unsigned long map_offset, struct vmem_altmap *altmap)
> {
> struct page *memmap = NULL;
> - unsigned long *usemap = NULL, flags;
> - struct pglist_data *pgdat = zone->zone_pgdat;
> + unsigned long *usemap = NULL;
>
> - pgdat_resize_lock(pgdat, &flags);
> if (ms->section_mem_map) {
> usemap = ms->pageblock_flags;
> memmap = sparse_decode_mem_map(ms->section_mem_map,
> @@ -780,7 +774,6 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
> ms->section_mem_map = 0;
> ms->pageblock_flags = NULL;
> }
> - pgdat_resize_unlock(pgdat, &flags);
>
> clear_hwpoisoned_pages(memmap + map_offset,
> PAGES_PER_SECTION - map_offset);
>
Reviewed-by: David Hildenbrand <david@redhat.com>
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 38+ messages in thread