On 24 Mar 2020, at 1:22, Anshuman Khandual wrote: > This adds new tests validating arch page table helpers for these following > core memory features. These tests create and test specific mapping types at > various page table levels. > > 1. SPECIAL mapping > 2. PROTNONE mapping > 3. DEVMAP mapping > 4. SOFTDIRTY mapping > 5. SWAP mapping > 6. MIGRATION mapping > 7. HUGETLB mapping > 8. THP mapping > > Cc: Andrew Morton > Cc: Mike Rapoport > Cc: Vineet Gupta > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Benjamin Herrenschmidt > Cc: Paul Mackerras > Cc: Michael Ellerman > Cc: Heiko Carstens > Cc: Vasily Gorbik > Cc: Christian Borntraeger > Cc: Thomas Gleixner > Cc: Ingo Molnar > Cc: Borislav Petkov > Cc: "H. Peter Anvin" > Cc: Kirill A. Shutemov > Cc: Paul Walmsley > Cc: Palmer Dabbelt > Cc: linux-snps-arc@lists.infradead.org > Cc: linux-arm-kernel@lists.infradead.org > Cc: linuxppc-dev@lists.ozlabs.org > Cc: linux-s390@vger.kernel.org > Cc: linux-riscv@lists.infradead.org > Cc: x86@kernel.org > Cc: linux-arch@vger.kernel.org > Cc: linux-kernel@vger.kernel.org > Suggested-by: Catalin Marinas > Signed-off-by: Anshuman Khandual > --- > mm/debug_vm_pgtable.c | 291 +++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 290 insertions(+), 1 deletion(-) > > diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c > index 98990a515268..15055a8f6478 100644 > --- a/mm/debug_vm_pgtable.c > +++ b/mm/debug_vm_pgtable.c > @@ -289,6 +289,267 @@ static void __init pmd_populate_tests(struct mm_struct *mm, pmd_t *pmdp, > WARN_ON(pmd_bad(pmd)); > } > > +static void __init pte_special_tests(unsigned long pfn, pgprot_t prot) > +{ > + pte_t pte = pfn_pte(pfn, prot); > + > + if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) > + return; > + > + WARN_ON(!pte_special(pte_mkspecial(pte))); > +} > + > +static void __init pte_protnone_tests(unsigned long pfn, pgprot_t prot) > +{ > + pte_t pte = pfn_pte(pfn, prot); > + > + if (!IS_ENABLED(CONFIG_NUMA_BALANCING)) > + return; > + > + WARN_ON(!pte_protnone(pte)); > + WARN_ON(!pte_present(pte)); > +} > + > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > +static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot) > +{ > + pmd_t pmd = pfn_pmd(pfn, prot); > + > + if (!IS_ENABLED(CONFIG_NUMA_BALANCING)) > + return; > + > + WARN_ON(!pmd_protnone(pmd)); > + WARN_ON(!pmd_present(pmd)); > +} > +#else > +static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot) { } > +#endif > + > +#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP > +static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot) > +{ > + pte_t pte = pfn_pte(pfn, prot); > + > + WARN_ON(!pte_devmap(pte_mkdevmap(pte))); > +} > + > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > +static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) > +{ > + pmd_t pmd = pfn_pmd(pfn, prot); > + > + WARN_ON(!pmd_devmap(pmd_mkdevmap(pmd))); > +} > + > +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD > +static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) > +{ > + pud_t pud = pfn_pud(pfn, prot); > + > + WARN_ON(!pud_devmap(pud_mkdevmap(pud))); > +} > +#else > +static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { } > +#endif > +#else > +static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) { } > +static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { } > +#endif > +#else > +static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot) { } > +static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) { } > +static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { } > +#endif > + > +static void __init pte_soft_dirty_tests(unsigned long pfn, pgprot_t prot) > +{ > + pte_t pte = pfn_pte(pfn, prot); > + > + if (!IS_ENABLED(CONFIG_HAVE_ARCH_SOFT_DIRTY)) > + return; > + > + WARN_ON(!pte_soft_dirty(pte_mksoft_dirty(pte))); > + WARN_ON(pte_soft_dirty(pte_clear_soft_dirty(pte))); > +} > + > +static void __init pte_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot) > +{ > + pte_t pte = pfn_pte(pfn, prot); > + > + if (!IS_ENABLED(CONFIG_HAVE_ARCH_SOFT_DIRTY)) > + return; > + > + WARN_ON(!pte_swp_soft_dirty(pte_swp_mksoft_dirty(pte))); > + WARN_ON(pte_swp_soft_dirty(pte_swp_clear_soft_dirty(pte))); > +} > + > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > +static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot) > +{ > + pmd_t pmd = pfn_pmd(pfn, prot); > + > + if (!IS_ENABLED(CONFIG_HAVE_ARCH_SOFT_DIRTY)) > + return; > + > + WARN_ON(!pmd_soft_dirty(pmd_mksoft_dirty(pmd))); > + WARN_ON(pmd_soft_dirty(pmd_clear_soft_dirty(pmd))); > +} > + > +static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot) > +{ > + pmd_t pmd = pfn_pmd(pfn, prot); > + > + if (!IS_ENABLED(CONFIG_HAVE_ARCH_SOFT_DIRTY) || > + !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) > + return; > + > + WARN_ON(!pmd_swp_soft_dirty(pmd_swp_mksoft_dirty(pmd))); > + WARN_ON(pmd_swp_soft_dirty(pmd_swp_clear_soft_dirty(pmd))); > +} > +#else > +static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot) { } > +static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot) > +{ > +} > +#endif > + > +static void __init pte_swap_tests(unsigned long pfn, pgprot_t prot) > +{ > + swp_entry_t swp; > + pte_t pte; > + > + pte = pfn_pte(pfn, prot); > + swp = __pte_to_swp_entry(pte); > + WARN_ON(!pte_same(pte, __swp_entry_to_pte(swp))); > +} > + > +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION > +static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot) > +{ > + swp_entry_t swp; > + pmd_t pmd; > + > + pmd = pfn_pmd(pfn, prot); > + swp = __pmd_to_swp_entry(pmd); > + WARN_ON(!pmd_same(pmd, __swp_entry_to_pmd(swp))); > +} > +#else > +static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot) { } > +#endif > + > +static void __init swap_migration_tests(void) > +{ > + struct page *page; > + swp_entry_t swp; > + > + if (!IS_ENABLED(CONFIG_MIGRATION)) > + return; > + /* > + * swap_migration_tests() requires a dedicated page as it needs to > + * be locked before creating a migration entry from it. Locking the > + * page that actually maps kernel text ('start_kernel') can be real > + * problematic. Lets allocate a dedicated page explicitly for this > + * purpose that will be freed subsequently. > + */ > + page = alloc_page(GFP_KERNEL); > + if (!page) { > + pr_err("page allocation failed\n"); > + return; > + } > + > + /* > + * make_migration_entry() expects given page to be > + * locked, otherwise it stumbles upon a BUG_ON(). > + */ > + __SetPageLocked(page); > + swp = make_migration_entry(page, 1); > + WARN_ON(!is_migration_entry(swp)); > + WARN_ON(!is_write_migration_entry(swp)); > + > + make_migration_entry_read(&swp); > + WARN_ON(!is_migration_entry(swp)); > + WARN_ON(is_write_migration_entry(swp)); > + > + swp = make_migration_entry(page, 0); > + WARN_ON(!is_migration_entry(swp)); > + WARN_ON(is_write_migration_entry(swp)); > + __ClearPageLocked(page); > + __free_page(page); > +} > + > +#ifdef CONFIG_HUGETLB_PAGE > +static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot) > +{ > + struct page *page; > + pte_t pte; > + > + /* > + * Accessing the page associated with the pfn is safe here, > + * as it was previously derived from a real kernel symbol. > + */ > + page = pfn_to_page(pfn); > + pte = mk_huge_pte(page, prot); > + > + WARN_ON(!huge_pte_dirty(huge_pte_mkdirty(pte))); > + WARN_ON(!huge_pte_write(huge_pte_mkwrite(huge_pte_wrprotect(pte)))); > + WARN_ON(huge_pte_write(huge_pte_wrprotect(huge_pte_mkwrite(pte)))); > + > +#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB > + pte = pfn_pte(pfn, prot); > + > + WARN_ON(!pte_huge(pte_mkhuge(pte))); > +#endif > +} > +#else > +static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot) { } > +#endif > + > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > +static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot) > +{ > + pmd_t pmd; > + > + /* > + * pmd_trans_huge() and pmd_present() must return positive > + * after MMU invalidation with pmd_mknotpresent(). > + */ > + pmd = pfn_pmd(pfn, prot); > + WARN_ON(!pmd_trans_huge(pmd_mkhuge(pmd))); > + > +#ifndef __HAVE_ARCH_PMDP_INVALIDATE > + WARN_ON(!pmd_trans_huge(pmd_mknotpresent(pmd_mkhuge(pmd)))); > + WARN_ON(!pmd_present(pmd_mknotpresent(pmd_mkhuge(pmd)))); > +#endif I think we need a better comment here, because requiring pmd_trans_huge() and pmd_present() returning true after pmd_mknotpresent() is not straightforward. According to Andrea Arcangeli’s email (https://lore.kernel.org/linux-mm/20181017020930.GN30832@redhat.com/), This behavior is an optimization for transparent huge page. pmd_trans_huge() must be true if pmd_page() returns you a valid THP to avoid taking the pmd_lock when others walk over non transhuge pmds (i.e. there are no THP allocated). Especially when we split a THP, removing the present bit from the pmd, pmd_trans_huge() still needs to return true. pmd_present() should be true whenever pmd_trans_huge() returns true. I think it is also worth either putting Andres’s email or the link to it in the rst file in your 3rd patch. It is a good documentation for this special case. — Best Regards, Yan Zi