Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
From: Muchun Song <songmuchun@bytedance.com>
To: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Jonathan Corbet <corbet@lwn.net>,
	Thomas Gleixner <tglx@linutronix.de>,
	mingo@redhat.com, bp@alien8.de, x86@kernel.org, hpa@zytor.com,
	dave.hansen@linux.intel.com, luto@kernel.org,
	Peter Zijlstra <peterz@infradead.org>,
	viro@zeniv.linux.org.uk,
	Andrew Morton <akpm@linux-foundation.org>,
	paulmck@kernel.org, mchehab+huawei@kernel.org,
	pawan.kumar.gupta@linux.intel.com,
	Randy Dunlap <rdunlap@infradead.org>,
	oneukum@suse.com, anshuman.khandual@arm.com, jroedel@suse.de,
	Mina Almasry <almasrymina@google.com>,
	David Rientjes <rientjes@google.com>,
	Matthew Wilcox <willy@infradead.org>,
	Xiongchun duan <duanxiongchun@bytedance.com>,
	linux-doc@vger.kernel.org, LKML <linux-kernel@vger.kernel.org>,
	Linux Memory Management List <linux-mm@kvack.org>,
	linux-fsdevel <linux-fsdevel@vger.kernel.org>
Subject: Re: [External] Re: [PATCH v2 07/19] mm/hugetlb: Free the vmemmap pages associated with each hugetlb page
Date: Thu, 29 Oct 2020 14:13:36 +0800
Message-ID: <CAMZfGtUUkkkeENXOOLPacverqyudxntTenMKrtpfHnLOBJaX5Q@mail.gmail.com> (raw)
In-Reply-To: <8658f431-56c4-9774-861a-9c3b54d1910a@oracle.com>

On Thu, Oct 29, 2020 at 7:42 AM Mike Kravetz <mike.kravetz@oracle.com> wrote:
>
> On 10/26/20 7:51 AM, Muchun Song wrote:
> > When we allocate a hugetlb page from the buddy, we should free the
> > unused vmemmap pages associated with it. We can do that in the
> > prep_new_huge_page().
> >
> > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > ---
> >  arch/x86/include/asm/hugetlb.h          |   7 +
> >  arch/x86/include/asm/pgtable_64_types.h |   8 +
> >  include/linux/hugetlb.h                 |   7 +
> >  mm/hugetlb.c                            | 190 ++++++++++++++++++++++++
> >  4 files changed, 212 insertions(+)
> >
> > diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
> > index f5e882f999cd..7c3eb60c2198 100644
> > --- a/arch/x86/include/asm/hugetlb.h
> > +++ b/arch/x86/include/asm/hugetlb.h
> > @@ -4,10 +4,17 @@
> >
> >  #include <asm/page.h>
> >  #include <asm-generic/hugetlb.h>
> > +#include <asm/pgtable.h>
> >
> >  #ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
> >  #define VMEMMAP_HPAGE_SHIFT                  PMD_SHIFT
> >  #define arch_vmemmap_support_huge_mapping()  boot_cpu_has(X86_FEATURE_PSE)
> > +
> > +#define vmemmap_pmd_huge vmemmap_pmd_huge
> > +static inline bool vmemmap_pmd_huge(pmd_t *pmd)
> > +{
> > +     return pmd_large(*pmd);
> > +}
> >  #endif
> >
> >  #define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE)
> > diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
> > index 52e5f5f2240d..bedbd2e7d06c 100644
> > --- a/arch/x86/include/asm/pgtable_64_types.h
> > +++ b/arch/x86/include/asm/pgtable_64_types.h
> > @@ -139,6 +139,14 @@ extern unsigned int ptrs_per_p4d;
> >  # define VMEMMAP_START               __VMEMMAP_BASE_L4
> >  #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
> >
> > +/*
> > + * VMEMMAP_SIZE - allows the whole linear region to be covered by
> > + *                a struct page array.
> > + */
> > +#define VMEMMAP_SIZE         (1UL << (__VIRTUAL_MASK_SHIFT - PAGE_SHIFT - \
> > +                                      1 + ilog2(sizeof(struct page))))
> > +#define VMEMMAP_END          (VMEMMAP_START + VMEMMAP_SIZE)
> > +
> >  #define VMALLOC_END          (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
> >
> >  #define MODULES_VADDR                (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
> > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> > index ace304a6196c..919f47d77117 100644
> > --- a/include/linux/hugetlb.h
> > +++ b/include/linux/hugetlb.h
> > @@ -601,6 +601,13 @@ static inline bool arch_vmemmap_support_huge_mapping(void)
> >  }
> >  #endif
> >
> > +#ifndef vmemmap_pmd_huge
>
> Let's add
> #define vmemmap_pmd_huge vmemmap_pmd_huge
> just in case code gets moved around in header file.

OK, will do.

>
> > +static inline bool vmemmap_pmd_huge(pmd_t *pmd)
> > +{
> > +     return pmd_huge(*pmd);
> > +}
> > +#endif
> > +
> >  #ifndef VMEMMAP_HPAGE_SHIFT
> >  #define VMEMMAP_HPAGE_SHIFT          PMD_SHIFT
> >  #endif
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index d6ae9b6876be..aa012d603e06 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -1293,10 +1293,20 @@ static inline void destroy_compound_gigantic_page(struct page *page,
> >  #endif
> >
> >  #ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
> > +#include <linux/bootmem_info.h>
> > +
> >  #define RESERVE_VMEMMAP_NR   2U
> > +#define RESERVE_VMEMMAP_SIZE (RESERVE_VMEMMAP_NR << PAGE_SHIFT)
>
> Since RESERVE_VMEMMAP_SIZE is not used here, perhaps it should be added
> in the patch where it is first used.

Will do.

>
> >
> >  #define page_huge_pte(page)  ((page)->pmd_huge_pte)
> >
> > +#define vmemmap_hpage_addr_end(addr, end)                            \
> > +({                                                                   \
> > +     unsigned long __boundary;                                       \
> > +     __boundary = ((addr) + VMEMMAP_HPAGE_SIZE) & VMEMMAP_HPAGE_MASK;\
> > +     (__boundary - 1 < (end) - 1) ? __boundary : (end);              \
> > +})
> > +
> >  static inline unsigned int nr_free_vmemmap(struct hstate *h)
> >  {
> >       return h->nr_free_vmemmap_pages;
> > @@ -1416,6 +1426,181 @@ static void __init hugetlb_vmemmap_init(struct hstate *h)
> >       pr_info("HugeTLB: can free %d vmemmap pages for %s\n",
> >               h->nr_free_vmemmap_pages, h->name);
> >  }
> > +
> > +static inline spinlock_t *vmemmap_pmd_lockptr(pmd_t *pmd)
> > +{
> > +     static DEFINE_SPINLOCK(pgtable_lock);
> > +
> > +     return &pgtable_lock;
> > +}
>
> This is just a global lock.  Correct?  And hugetlb specific?

Yes, it is a global lock. Originally, I wanted to use the pmd lock(e.g.
pmd_lockptr()). But we need to allocate memory for the spinlock and
initialize it when ALLOC_SPLIT_PTLOCKS. It may increase the
complexity.

And I think that here alloc/free hugetlb pages is not a frequent operation.
So I finally use a global lock. Maybe it is enough.

>
> It should be OK as the page table entries for huegtlb pages will not
> overlap with other entries.

Does "hugetlb specific" mean the pmd lock? or per hugetlb lock?
If it is pmd lock, this is fine to me. If not, it may not be enough.
Because the lock also guards the splitting of pmd pgtable.

Thanks.
>
> > +
> > +/*
> > + * Walk a vmemmap address to the pmd it maps.
> > + */
> > +static pmd_t *vmemmap_to_pmd(const void *page)
> > +{
> > +     unsigned long addr = (unsigned long)page;
> > +     pgd_t *pgd;
> > +     p4d_t *p4d;
> > +     pud_t *pud;
> > +     pmd_t *pmd;
> > +
> > +     if (addr < VMEMMAP_START || addr >= VMEMMAP_END)
> > +             return NULL;
> > +
> > +     pgd = pgd_offset_k(addr);
> > +     if (pgd_none(*pgd))
> > +             return NULL;
> > +     p4d = p4d_offset(pgd, addr);
> > +     if (p4d_none(*p4d))
> > +             return NULL;
> > +     pud = pud_offset(p4d, addr);
> > +
> > +     WARN_ON_ONCE(pud_bad(*pud));
> > +     if (pud_none(*pud) || pud_bad(*pud))
> > +             return NULL;
> > +     pmd = pmd_offset(pud, addr);
> > +
> > +     return pmd;
> > +}
>
> That routine is not really hugetlb specific.  Perhaps we could move it
> to sparse-vmemmap.c?  Or elsewhere?

Yeah, we can move it to sparse-vmemmap.c, maybe better.

>
> --
> Mike Kravetz



-- 
Yours,
Muchun

  reply index

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-26 14:50 [PATCH v2 00/19] Free some vmemmap pages of " Muchun Song
2020-10-26 14:50 ` [PATCH v2 01/19] mm/memory_hotplug: Move bootmem info registration API to bootmem_info.c Muchun Song
2020-10-26 14:50 ` [PATCH v2 02/19] mm/memory_hotplug: Move {get,put}_page_bootmem() " Muchun Song
2020-10-26 14:50 ` [PATCH v2 03/19] mm/hugetlb: Introduce a new config HUGETLB_PAGE_FREE_VMEMMAP Muchun Song
2020-10-29 10:29   ` Oscar Salvador
2020-10-29 13:34     ` [External] " Muchun Song
2020-10-26 14:50 ` [PATCH v2 04/19] mm/hugetlb: Introduce nr_free_vmemmap_pages in the struct hstate Muchun Song
2020-10-27 22:03   ` Mike Kravetz
2020-10-29 13:26   ` Oscar Salvador
2020-10-29 13:41     ` [External] " Muchun Song
2020-10-26 14:51 ` [PATCH v2 05/19] mm/hugetlb: Introduce pgtable allocation/freeing helpers Muchun Song
2020-10-28  0:32   ` Mike Kravetz
2020-10-28  7:26     ` [External] " Muchun Song
2020-10-28 23:42       ` Mike Kravetz
2020-11-05 13:23   ` Oscar Salvador
2020-11-05 16:08     ` [External] " Muchun Song
2020-11-06  9:46       ` Oscar Salvador
2020-11-06 16:43         ` Muchun Song
2020-10-26 14:51 ` [PATCH v2 06/19] mm/bootmem_info: Introduce {free,prepare}_vmemmap_page() Muchun Song
2020-10-26 14:51 ` [PATCH v2 07/19] mm/hugetlb: Free the vmemmap pages associated with each hugetlb page Muchun Song
2020-10-26 16:01   ` Matthew Wilcox
2020-10-27  2:58     ` [External] " Muchun Song
2020-10-28 23:42   ` Mike Kravetz
2020-10-29  6:13     ` Muchun Song [this message]
2020-10-29 21:59       ` [External] " Mike Kravetz
2020-10-30  2:58         ` Muchun Song
2020-10-26 14:51 ` [PATCH v2 08/19] mm/hugetlb: Defer freeing of hugetlb pages Muchun Song
2020-10-26 14:51 ` [PATCH v2 09/19] mm/hugetlb: Allocate the vmemmap pages associated with each hugetlb page Muchun Song
2020-10-26 14:51 ` [PATCH v2 10/19] mm/hugetlb: Introduce remap_huge_page_pmd_vmemmap helper Muchun Song
2020-10-26 14:51 ` [PATCH v2 11/19] mm/hugetlb: Use PG_slab to indicate split pmd Muchun Song
2020-10-26 14:51 ` [PATCH v2 12/19] mm/hugetlb: Support freeing vmemmap pages of gigantic page Muchun Song
2020-10-26 14:51 ` [PATCH v2 13/19] mm/hugetlb: Add a BUILD_BUG_ON to check if struct page size is a power of two Muchun Song
2020-10-26 14:51 ` [PATCH v2 14/19] mm/hugetlb: Clear PageHWPoison on the non-error memory page Muchun Song
2020-10-26 14:51 ` [PATCH v2 15/19] mm/hugetlb: Flush work when dissolving hugetlb page Muchun Song
2020-10-26 14:51 ` [PATCH v2 16/19] mm/hugetlb: Add a kernel parameter hugetlb_free_vmemmap Muchun Song
2020-10-26 14:51 ` [PATCH v2 17/19] mm/hugetlb: Merge pte to huge pmd only for gigantic page Muchun Song
2020-10-26 14:51 ` [PATCH v2 18/19] mm/hugetlb: Gather discrete indexes of tail page Muchun Song
2020-10-26 14:51 ` [PATCH v2 19/19] mm/hugetlb: Add BUILD_BUG_ON to catch invalid usage of tail struct page Muchun Song
2020-10-26 15:53 ` [PATCH v2 00/19] Free some vmemmap pages of hugetlb page Matthew Wilcox
2020-10-27  2:54   ` [External] " Muchun Song
2020-10-30  9:14 ` Michal Hocko
2020-10-30 10:24   ` [External] " Muchun Song
2020-10-30 15:19     ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAMZfGtUUkkkeENXOOLPacverqyudxntTenMKrtpfHnLOBJaX5Q@mail.gmail.com \
    --to=songmuchun@bytedance.com \
    --cc=akpm@linux-foundation.org \
    --cc=almasrymina@google.com \
    --cc=anshuman.khandual@arm.com \
    --cc=bp@alien8.de \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=duanxiongchun@bytedance.com \
    --cc=hpa@zytor.com \
    --cc=jroedel@suse.de \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mchehab+huawei@kernel.org \
    --cc=mike.kravetz@oracle.com \
    --cc=mingo@redhat.com \
    --cc=oneukum@suse.com \
    --cc=paulmck@kernel.org \
    --cc=pawan.kumar.gupta@linux.intel.com \
    --cc=peterz@infradead.org \
    --cc=rdunlap@infradead.org \
    --cc=rientjes@google.com \
    --cc=tglx@linutronix.de \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git