[v3] mm/sparse.c: Use kvmalloc_node/kvfree to alloc/free memmap for the classic sparse
diff mbox series

Message ID 20200312141749.GL27711@MiWiFi-R3L-srv
State In Next
Commit e1624434ca251b55099942cd9ef63c65edbce75e
Headers show
Series
  • [v3] mm/sparse.c: Use kvmalloc_node/kvfree to alloc/free memmap for the classic sparse
Related show

Commit Message

Baoquan He March 12, 2020, 2:17 p.m. UTC
This change makes populate_section_memmap()/depopulate_section_memmap
much simpler.

Suggested-by: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Baoquan He <bhe@redhat.com>
---
v2->v3:
  Remove __GFP_NOWARN and use array_size when calling kvmalloc_node()
  per Matthew's comments.

 mm/sparse.c | 27 +++------------------------
 1 file changed, 3 insertions(+), 24 deletions(-)

Comments

Michal Hocko March 13, 2020, 2:56 p.m. UTC | #1
On Thu 12-03-20 22:17:49, Baoquan He wrote:
> This change makes populate_section_memmap()/depopulate_section_memmap
> much simpler.

Not only and you should make it more explicit. It also tries to allocate
memmaps from the target numa node so this is a functional change. I
would prefer to have that in a separate patch in case we hit some weird
NUMA setups which would choke on memory less nodes and similar horrors.

> Suggested-by: Michal Hocko <mhocko@kernel.org>
> Signed-off-by: Baoquan He <bhe@redhat.com>

I do not see any reason this shouldn't work. Btw. did you get to test
it?

Feel free to add
Acked-by: Michal Hocko <mhocko@suse.com>
to both patches if you go and split.

> ---
> v2->v3:
>   Remove __GFP_NOWARN and use array_size when calling kvmalloc_node()
>   per Matthew's comments.
> 
>  mm/sparse.c | 27 +++------------------------
>  1 file changed, 3 insertions(+), 24 deletions(-)
> 
> diff --git a/mm/sparse.c b/mm/sparse.c
> index bf6c00a28045..bb99633575b5 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -734,35 +734,14 @@ static void free_map_bootmem(struct page *memmap)
>  struct page * __meminit populate_section_memmap(unsigned long pfn,
>  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
>  {
> -	struct page *page, *ret;
> -	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
> -
> -	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
> -	if (page)
> -		goto got_map_page;
> -
> -	ret = vmalloc(memmap_size);
> -	if (ret)
> -		goto got_map_ptr;
> -
> -	return NULL;
> -got_map_page:
> -	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
> -got_map_ptr:
> -
> -	return ret;
> +	return kvmalloc_node(array_size(sizeof(struct page),
> +			PAGES_PER_SECTION), GFP_KERNEL, nid);
>  }
>  
>  static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
>  		struct vmem_altmap *altmap)
>  {
> -	struct page *memmap = pfn_to_page(pfn);
> -
> -	if (is_vmalloc_addr(memmap))
> -		vfree(memmap);
> -	else
> -		free_pages((unsigned long)memmap,
> -			   get_order(sizeof(struct page) * PAGES_PER_SECTION));
> +	kvfree(pfn_to_page(pfn));
>  }
>  
>  static void free_map_bootmem(struct page *memmap)
> -- 
> 2.17.2
>
David Hildenbrand March 13, 2020, 3:04 p.m. UTC | #2
On 12.03.20 15:17, Baoquan He wrote:
> This change makes populate_section_memmap()/depopulate_section_memmap
> much simpler.
> 
> Suggested-by: Michal Hocko <mhocko@kernel.org>
> Signed-off-by: Baoquan He <bhe@redhat.com>
> ---
> v2->v3:
>   Remove __GFP_NOWARN and use array_size when calling kvmalloc_node()
>   per Matthew's comments.
> 
>  mm/sparse.c | 27 +++------------------------
>  1 file changed, 3 insertions(+), 24 deletions(-)
> 
> diff --git a/mm/sparse.c b/mm/sparse.c
> index bf6c00a28045..bb99633575b5 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -734,35 +734,14 @@ static void free_map_bootmem(struct page *memmap)
>  struct page * __meminit populate_section_memmap(unsigned long pfn,
>  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
>  {
> -	struct page *page, *ret;
> -	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
> -
> -	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
> -	if (page)
> -		goto got_map_page;
> -
> -	ret = vmalloc(memmap_size);
> -	if (ret)
> -		goto got_map_ptr;
> -
> -	return NULL;
> -got_map_page:
> -	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
> -got_map_ptr:
> -
> -	return ret;
> +	return kvmalloc_node(array_size(sizeof(struct page),
> +			PAGES_PER_SECTION), GFP_KERNEL, nid);


Indentation of the parameters looks wrong/weird. Maybe just calculate
memmap_size outside of the call, makes it easier to read IMHO.

Apart from that, looks good to me.

Reviewed-by: David Hildenbrand <david@redhat.com>
Baoquan He March 14, 2020, 12:53 a.m. UTC | #3
On 03/13/20 at 03:56pm, Michal Hocko wrote:
> On Thu 12-03-20 22:17:49, Baoquan He wrote:
> > This change makes populate_section_memmap()/depopulate_section_memmap
> > much simpler.
> 
> Not only and you should make it more explicit. It also tries to allocate
> memmaps from the target numa node so this is a functional change. I
> would prefer to have that in a separate patch in case we hit some weird
> NUMA setups which would choke on memory less nodes and similar horrors.

Yes, splitting sounds more reasonable, I would love to do that. One
question is I noticed Andrew had picked this into -mm tree, if I post a
new patchset including these two small patches, whether it's convenient
to drop the old one and get these two merged.

Sorry, I don't know very well how this works in mm maintaining.

> 
> > Suggested-by: Michal Hocko <mhocko@kernel.org>
> > Signed-off-by: Baoquan He <bhe@redhat.com>
> 
> I do not see any reason this shouldn't work. Btw. did you get to test
> it?
> 
> Feel free to add
> Acked-by: Michal Hocko <mhocko@suse.com>
> to both patches if you go and split.
> 
> > ---
> > v2->v3:
> >   Remove __GFP_NOWARN and use array_size when calling kvmalloc_node()
> >   per Matthew's comments.
> > 
> >  mm/sparse.c | 27 +++------------------------
> >  1 file changed, 3 insertions(+), 24 deletions(-)
> > 
> > diff --git a/mm/sparse.c b/mm/sparse.c
> > index bf6c00a28045..bb99633575b5 100644
> > --- a/mm/sparse.c
> > +++ b/mm/sparse.c
> > @@ -734,35 +734,14 @@ static void free_map_bootmem(struct page *memmap)
> >  struct page * __meminit populate_section_memmap(unsigned long pfn,
> >  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
> >  {
> > -	struct page *page, *ret;
> > -	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
> > -
> > -	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
> > -	if (page)
> > -		goto got_map_page;
> > -
> > -	ret = vmalloc(memmap_size);
> > -	if (ret)
> > -		goto got_map_ptr;
> > -
> > -	return NULL;
> > -got_map_page:
> > -	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
> > -got_map_ptr:
> > -
> > -	return ret;
> > +	return kvmalloc_node(array_size(sizeof(struct page),
> > +			PAGES_PER_SECTION), GFP_KERNEL, nid);
> >  }
> >  
> >  static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
> >  		struct vmem_altmap *altmap)
> >  {
> > -	struct page *memmap = pfn_to_page(pfn);
> > -
> > -	if (is_vmalloc_addr(memmap))
> > -		vfree(memmap);
> > -	else
> > -		free_pages((unsigned long)memmap,
> > -			   get_order(sizeof(struct page) * PAGES_PER_SECTION));
> > +	kvfree(pfn_to_page(pfn));
> >  }
> >  
> >  static void free_map_bootmem(struct page *memmap)
> > -- 
> > 2.17.2
> > 
> 
> -- 
> Michal Hocko
> SUSE Labs
>
Baoquan He March 14, 2020, 1:12 a.m. UTC | #4
On 03/13/20 at 03:56pm, Michal Hocko wrote:
> On Thu 12-03-20 22:17:49, Baoquan He wrote:
> > This change makes populate_section_memmap()/depopulate_section_memmap
> > much simpler.
> 
> Not only and you should make it more explicit. It also tries to allocate
> memmaps from the target numa node so this is a functional change. I
> would prefer to have that in a separate patch in case we hit some weird
> NUMA setups which would choke on memory less nodes and similar horrors.
> 
> > Suggested-by: Michal Hocko <mhocko@kernel.org>
> > Signed-off-by: Baoquan He <bhe@redhat.com>
> 
> I do not see any reason this shouldn't work. Btw. did you get to test
> it?

Forget replying to this comment. Yes, I have tested it before each post.
Michal Hocko March 14, 2020, 12:56 p.m. UTC | #5
On Sat 14-03-20 08:53:34, Baoquan He wrote:
> On 03/13/20 at 03:56pm, Michal Hocko wrote:
> > On Thu 12-03-20 22:17:49, Baoquan He wrote:
> > > This change makes populate_section_memmap()/depopulate_section_memmap
> > > much simpler.
> > 
> > Not only and you should make it more explicit. It also tries to allocate
> > memmaps from the target numa node so this is a functional change. I
> > would prefer to have that in a separate patch in case we hit some weird
> > NUMA setups which would choke on memory less nodes and similar horrors.
> 
> Yes, splitting sounds more reasonable, I would love to do that. One
> question is I noticed Andrew had picked this into -mm tree, if I post a
> new patchset including these two small patches, whether it's convenient
> to drop the old one and get these two merged.

Andrew usually just drops the previous version and replaces it by the
new one. So just post a new version. Thanks!
Baoquan He March 15, 2020, 1:01 p.m. UTC | #6
On 03/14/20 at 01:56pm, Michal Hocko wrote:
> On Sat 14-03-20 08:53:34, Baoquan He wrote:
> > On 03/13/20 at 03:56pm, Michal Hocko wrote:
> > > On Thu 12-03-20 22:17:49, Baoquan He wrote:
> > > > This change makes populate_section_memmap()/depopulate_section_memmap
> > > > much simpler.
> > > 
> > > Not only and you should make it more explicit. It also tries to allocate
> > > memmaps from the target numa node so this is a functional change. I
> > > would prefer to have that in a separate patch in case we hit some weird
> > > NUMA setups which would choke on memory less nodes and similar horrors.
> > 
> > Yes, splitting sounds more reasonable, I would love to do that. One
> > question is I noticed Andrew had picked this into -mm tree, if I post a
> > new patchset including these two small patches, whether it's convenient
> > to drop the old one and get these two merged.
> 
> Andrew usually just drops the previous version and replaces it by the
> new one. So just post a new version. Thanks!

I see, will post a new version, thanks.
Baoquan He March 16, 2020, 7:14 a.m. UTC | #7
On 03/13/20 at 04:04pm, David Hildenbrand wrote:
> On 12.03.20 15:17, Baoquan He wrote:
> > This change makes populate_section_memmap()/depopulate_section_memmap
> > much simpler.
> > 
> > Suggested-by: Michal Hocko <mhocko@kernel.org>
> > Signed-off-by: Baoquan He <bhe@redhat.com>
> > ---
> > v2->v3:
> >   Remove __GFP_NOWARN and use array_size when calling kvmalloc_node()
> >   per Matthew's comments.
> > 
> >  mm/sparse.c | 27 +++------------------------
> >  1 file changed, 3 insertions(+), 24 deletions(-)
> > 
> > diff --git a/mm/sparse.c b/mm/sparse.c
> > index bf6c00a28045..bb99633575b5 100644
> > --- a/mm/sparse.c
> > +++ b/mm/sparse.c
> > @@ -734,35 +734,14 @@ static void free_map_bootmem(struct page *memmap)
> >  struct page * __meminit populate_section_memmap(unsigned long pfn,
> >  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
> >  {
> > -	struct page *page, *ret;
> > -	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
> > -
> > -	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
> > -	if (page)
> > -		goto got_map_page;
> > -
> > -	ret = vmalloc(memmap_size);
> > -	if (ret)
> > -		goto got_map_ptr;
> > -
> > -	return NULL;
> > -got_map_page:
> > -	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
> > -got_map_ptr:
> > -
> > -	return ret;
> > +	return kvmalloc_node(array_size(sizeof(struct page),
> > +			PAGES_PER_SECTION), GFP_KERNEL, nid);
> 
> 
> Indentation of the parameters looks wrong/weird. Maybe just calculate
> memmap_size outside of the call, makes it easier to read IMHO.

I'll fix the indentation issue. Adding variable memmap_size seems not so
necessary.

> 
> Apart from that, looks good to me.
> 
> Reviewed-by: David Hildenbrand <david@redhat.com>
> 
> -- 
> Thanks,
> 
> David / dhildenb

Patch
diff mbox series

diff --git a/mm/sparse.c b/mm/sparse.c
index bf6c00a28045..bb99633575b5 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -734,35 +734,14 @@  static void free_map_bootmem(struct page *memmap)
 struct page * __meminit populate_section_memmap(unsigned long pfn,
 		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
 {
-	struct page *page, *ret;
-	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
-
-	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
-	if (page)
-		goto got_map_page;
-
-	ret = vmalloc(memmap_size);
-	if (ret)
-		goto got_map_ptr;
-
-	return NULL;
-got_map_page:
-	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
-got_map_ptr:
-
-	return ret;
+	return kvmalloc_node(array_size(sizeof(struct page),
+			PAGES_PER_SECTION), GFP_KERNEL, nid);
 }
 
 static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
 		struct vmem_altmap *altmap)
 {
-	struct page *memmap = pfn_to_page(pfn);
-
-	if (is_vmalloc_addr(memmap))
-		vfree(memmap);
-	else
-		free_pages((unsigned long)memmap,
-			   get_order(sizeof(struct page) * PAGES_PER_SECTION));
+	kvfree(pfn_to_page(pfn));
 }
 
 static void free_map_bootmem(struct page *memmap)