From: Michal Hocko <mhocko@kernel.org>
To: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: linux-mm@kvack.org,
"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
Hugh Dickins <hughd@google.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dave Hansen <dave.hansen@intel.com>,
Andrea Arcangeli <aarcange@redhat.com>,
Mel Gorman <mgorman@techsingularity.net>,
Vlastimil Babka <vbabka@suse.cz>,
Pavel Emelyanov <xemul@parallels.com>,
Zi Yan <zi.yan@cs.rutgers.edu>,
Balbir Singh <bsingharora@gmail.com>,
linux-kernel@vger.kernel.org,
Naoya Horiguchi <nao.horiguchi@gmail.com>
Subject: Re: [PATCH v2 05/12] mm: thp: add core routines for thp/pmd migration
Date: Mon, 28 Nov 2016 15:31:32 +0100 [thread overview]
Message-ID: <20161128143132.GN14788@dhcp22.suse.cz> (raw)
In-Reply-To: <1478561517-4317-6-git-send-email-n-horiguchi@ah.jp.nec.com>
On Tue 08-11-16 08:31:50, Naoya Horiguchi wrote:
> This patch prepares thp migration's core code. These code will be open when
> unmap_and_move() stops unconditionally splitting thp and get_new_page() starts
> to allocate destination thps.
this description is underdocumented to say the least. Could you
provide a high level documentation here please?
> Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
> ---
> ChangeLog v1 -> v2:
> - support pte-mapped thp, doubly-mapped thp
> ---
> arch/x86/include/asm/pgtable_64.h | 2 +
> include/linux/swapops.h | 61 +++++++++++++++
> mm/huge_memory.c | 154 ++++++++++++++++++++++++++++++++++++++
> mm/migrate.c | 44 ++++++++++-
> mm/pgtable-generic.c | 3 +-
> 5 files changed, 262 insertions(+), 2 deletions(-)
>
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/arch/x86/include/asm/pgtable_64.h v4.9-rc2-mmotm-2016-10-27-18-27_patched/arch/x86/include/asm/pgtable_64.h
> index 1cc82ec..3a1b48e 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/arch/x86/include/asm/pgtable_64.h
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/arch/x86/include/asm/pgtable_64.h
> @@ -167,7 +167,9 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
> ((type) << (SWP_TYPE_FIRST_BIT)) \
> | ((offset) << SWP_OFFSET_FIRST_BIT) })
> #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
> +#define __pmd_to_swp_entry(pte) ((swp_entry_t) { pmd_val((pmd)) })
> #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
> +#define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val })
>
> extern int kern_addr_valid(unsigned long addr);
> extern void cleanup_highmap(void);
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/include/linux/swapops.h v4.9-rc2-mmotm-2016-10-27-18-27_patched/include/linux/swapops.h
> index 5c3a5f3..b6b22a2 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/include/linux/swapops.h
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/include/linux/swapops.h
> @@ -163,6 +163,67 @@ static inline int is_write_migration_entry(swp_entry_t entry)
>
> #endif
>
> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
> +extern void set_pmd_migration_entry(struct page *page,
> + struct vm_area_struct *vma, unsigned long address);
> +
> +extern int remove_migration_pmd(struct page *new, pmd_t *pmd,
> + struct vm_area_struct *vma, unsigned long addr, void *old);
> +
> +extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd);
> +
> +static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
> +{
> + swp_entry_t arch_entry;
> +
> + arch_entry = __pmd_to_swp_entry(pmd);
> + return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
> +}
> +
> +static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
> +{
> + swp_entry_t arch_entry;
> +
> + arch_entry = __swp_entry(swp_type(entry), swp_offset(entry));
> + return __swp_entry_to_pmd(arch_entry);
> +}
> +
> +static inline int is_pmd_migration_entry(pmd_t pmd)
> +{
> + return !pmd_present(pmd) && is_migration_entry(pmd_to_swp_entry(pmd));
> +}
> +#else
> +static inline void set_pmd_migration_entry(struct page *page,
> + struct vm_area_struct *vma, unsigned long address)
> +{
> +}
> +
> +static inline int remove_migration_pmd(struct page *new, pmd_t *pmd,
> + struct vm_area_struct *vma, unsigned long addr, void *old)
> +{
> + return 0;
> +}
> +
> +static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { }
> +
> +static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
> +{
> + return swp_entry(0, 0);
> +}
> +
> +static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
> +{
> + pmd_t pmd = {};
> +
> + return pmd;
> +}
> +
> +static inline int is_pmd_migration_entry(pmd_t pmd)
> +{
> + return 0;
> +}
> +#endif
> +
> #ifdef CONFIG_MEMORY_FAILURE
>
> extern atomic_long_t num_poisoned_pages __read_mostly;
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/mm/huge_memory.c v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/huge_memory.c
> index 0509d17..b3022b3 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/mm/huge_memory.c
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/huge_memory.c
> @@ -2310,3 +2310,157 @@ static int __init split_huge_pages_debugfs(void)
> }
> late_initcall(split_huge_pages_debugfs);
> #endif
> +
> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
> +void set_pmd_migration_entry(struct page *page, struct vm_area_struct *vma,
> + unsigned long addr)
> +{
> + struct mm_struct *mm = vma->vm_mm;
> + pgd_t *pgd;
> + pud_t *pud;
> + pmd_t *pmd;
> + pmd_t pmdval;
> + swp_entry_t entry;
> + spinlock_t *ptl;
> +
> + pgd = pgd_offset(mm, addr);
> + if (!pgd_present(*pgd))
> + return;
> + pud = pud_offset(pgd, addr);
> + if (!pud_present(*pud))
> + return;
> + pmd = pmd_offset(pud, addr);
> + pmdval = *pmd;
> + barrier();
> + if (!pmd_present(pmdval))
> + return;
> +
> + mmu_notifier_invalidate_range_start(mm, addr, addr + HPAGE_PMD_SIZE);
> + if (pmd_trans_huge(pmdval)) {
> + pmd_t pmdswp;
> +
> + ptl = pmd_lock(mm, pmd);
> + if (!pmd_present(*pmd))
> + goto unlock_pmd;
> + if (unlikely(!pmd_trans_huge(*pmd)))
> + goto unlock_pmd;
> + if (pmd_page(*pmd) != page)
> + goto unlock_pmd;
> +
> + pmdval = pmdp_huge_get_and_clear(mm, addr, pmd);
> + if (pmd_dirty(pmdval))
> + set_page_dirty(page);
> + entry = make_migration_entry(page, pmd_write(pmdval));
> + pmdswp = swp_entry_to_pmd(entry);
> + pmdswp = pmd_mkhuge(pmdswp);
> + set_pmd_at(mm, addr, pmd, pmdswp);
> + page_remove_rmap(page, true);
> + put_page(page);
> +unlock_pmd:
> + spin_unlock(ptl);
> + } else { /* pte-mapped thp */
> + pte_t *pte;
> + pte_t pteval;
> + struct page *tmp = compound_head(page);
> + unsigned long address = addr & HPAGE_PMD_MASK;
> + pte_t swp_pte;
> + int i;
> +
> + pte = pte_offset_map(pmd, address);
> + ptl = pte_lockptr(mm, pmd);
> + spin_lock(ptl);
> + for (i = 0; i < HPAGE_PMD_NR; i++, pte++, tmp++) {
> + if (!(pte_present(*pte) &&
> + page_to_pfn(tmp) == pte_pfn(*pte)))
> + continue;
> + pteval = ptep_clear_flush(vma, address, pte);
> + if (pte_dirty(pteval))
> + set_page_dirty(tmp);
> + entry = make_migration_entry(tmp, pte_write(pteval));
> + swp_pte = swp_entry_to_pte(entry);
> + set_pte_at(mm, address, pte, swp_pte);
> + page_remove_rmap(tmp, false);
> + put_page(tmp);
> + }
> + pte_unmap_unlock(pte, ptl);
> + }
> + mmu_notifier_invalidate_range_end(mm, addr, addr + HPAGE_PMD_SIZE);
> + return;
> +}
> +
> +int remove_migration_pmd(struct page *new, pmd_t *pmd,
> + struct vm_area_struct *vma, unsigned long addr, void *old)
> +{
> + struct mm_struct *mm = vma->vm_mm;
> + spinlock_t *ptl;
> + pmd_t pmde;
> + swp_entry_t entry;
> +
> + pmde = *pmd;
> + barrier();
> +
> + if (!pmd_present(pmde)) {
> + if (is_migration_entry(pmd_to_swp_entry(pmde))) {
> + unsigned long mmun_start = addr & HPAGE_PMD_MASK;
> + unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
> +
> + ptl = pmd_lock(mm, pmd);
> + entry = pmd_to_swp_entry(*pmd);
> + if (migration_entry_to_page(entry) != old)
> + goto unlock_ptl;
> + get_page(new);
> + pmde = pmd_mkold(mk_huge_pmd(new, vma->vm_page_prot));
> + if (is_write_migration_entry(entry))
> + pmde = maybe_pmd_mkwrite(pmde, vma);
> + flush_cache_range(vma, mmun_start, mmun_end);
> + page_add_anon_rmap(new, vma, mmun_start, true);
> + pmdp_huge_clear_flush_notify(vma, mmun_start, pmd);
> + set_pmd_at(mm, mmun_start, pmd, pmde);
> + flush_tlb_range(vma, mmun_start, mmun_end);
> + if (vma->vm_flags & VM_LOCKED)
> + mlock_vma_page(new);
> + update_mmu_cache_pmd(vma, addr, pmd);
> +unlock_ptl:
> + spin_unlock(ptl);
> + }
> + } else { /* pte-mapped thp */
> + pte_t *ptep;
> + pte_t pte;
> + int i;
> + struct page *tmpnew = compound_head(new);
> + struct page *tmpold = compound_head((struct page *)old);
> + unsigned long address = addr & HPAGE_PMD_MASK;
> +
> + ptep = pte_offset_map(pmd, addr);
> + ptl = pte_lockptr(mm, pmd);
> + spin_lock(ptl);
> +
> + for (i = 0; i < HPAGE_PMD_NR;
> + i++, ptep++, tmpnew++, tmpold++, address += PAGE_SIZE) {
> + pte = *ptep;
> + if (!is_swap_pte(pte))
> + continue;
> + entry = pte_to_swp_entry(pte);
> + if (!is_migration_entry(entry) ||
> + migration_entry_to_page(entry) != tmpold)
> + continue;
> + get_page(tmpnew);
> + pte = pte_mkold(mk_pte(tmpnew,
> + READ_ONCE(vma->vm_page_prot)));
> + if (pte_swp_soft_dirty(*ptep))
> + pte = pte_mksoft_dirty(pte);
> + if (is_write_migration_entry(entry))
> + pte = maybe_mkwrite(pte, vma);
> + flush_dcache_page(tmpnew);
> + set_pte_at(mm, address, ptep, pte);
> + if (PageAnon(new))
> + page_add_anon_rmap(tmpnew, vma, address, false);
> + else
> + page_add_file_rmap(tmpnew, false);
> + update_mmu_cache(vma, address, ptep);
> + }
> + pte_unmap_unlock(ptep, ptl);
> + }
> + return SWAP_AGAIN;
> +}
> +#endif
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/mm/migrate.c v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/migrate.c
> index 66ce6b4..54f2eb6 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/mm/migrate.c
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/migrate.c
> @@ -198,6 +198,8 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
> {
> struct mm_struct *mm = vma->vm_mm;
> swp_entry_t entry;
> + pgd_t *pgd;
> + pud_t *pud;
> pmd_t *pmd;
> pte_t *ptep, pte;
> spinlock_t *ptl;
> @@ -208,10 +210,29 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
> goto out;
> ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);
> } else {
> - pmd = mm_find_pmd(mm, addr);
> + pmd_t pmde;
> +
> + pgd = pgd_offset(mm, addr);
> + if (!pgd_present(*pgd))
> + goto out;
> + pud = pud_offset(pgd, addr);
> + if (!pud_present(*pud))
> + goto out;
> + pmd = pmd_offset(pud, addr);
> if (!pmd)
> goto out;
>
> + if (PageTransCompound(new)) {
> + remove_migration_pmd(new, pmd, vma, addr, old);
> + goto out;
> + }
> +
> + pmde = *pmd;
> + barrier();
> +
> + if (!pmd_present(pmde) || pmd_trans_huge(pmde))
> + goto out;
> +
> ptep = pte_offset_map(pmd, addr);
>
> /*
> @@ -344,6 +365,27 @@ void migration_entry_wait_huge(struct vm_area_struct *vma,
> __migration_entry_wait(mm, pte, ptl);
> }
>
> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
> +void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
> +{
> + spinlock_t *ptl;
> + struct page *page;
> +
> + ptl = pmd_lock(mm, pmd);
> + if (!is_pmd_migration_entry(*pmd))
> + goto unlock;
> + page = migration_entry_to_page(pmd_to_swp_entry(*pmd));
> + if (!get_page_unless_zero(page))
> + goto unlock;
> + spin_unlock(ptl);
> + wait_on_page_locked(page);
> + put_page(page);
> + return;
> +unlock:
> + spin_unlock(ptl);
> +}
> +#endif
> +
> #ifdef CONFIG_BLOCK
> /* Returns true if all buffers are successfully locked */
> static bool buffer_migrate_lock_buffers(struct buffer_head *head,
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/mm/pgtable-generic.c v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/pgtable-generic.c
> index 71c5f91..6012343 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/mm/pgtable-generic.c
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/pgtable-generic.c
> @@ -118,7 +118,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
> {
> pmd_t pmd;
> VM_BUG_ON(address & ~HPAGE_PMD_MASK);
> - VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
> + VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
> + !pmd_devmap(*pmdp));
> pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
> flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
> return pmd;
> --
> 2.7.0
--
Michal Hocko
SUSE Labs
next prev parent reply other threads:[~2016-11-28 14:32 UTC|newest]
Thread overview: 71+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-11-07 23:31 [PATCH v2 00/12] mm: page migration enhancement for thp Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 01/12] mm: x86: move _PAGE_SWP_SOFT_DIRTY from bit 7 to bit 6 Naoya Horiguchi
2016-11-10 23:29 ` Dave Hansen
2016-11-11 1:08 ` Naoya Horiguchi
2016-11-11 11:12 ` Kirill A. Shutemov
2016-11-07 23:31 ` [PATCH v2 02/12] mm: mempolicy: add queue_pages_node_check() Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 03/12] mm: thp: introduce separate TTU flag for thp freezing Naoya Horiguchi
2016-11-10 8:31 ` Anshuman Khandual
2016-11-10 9:09 ` Naoya Horiguchi
2016-11-11 3:18 ` Anshuman Khandual
2016-11-11 11:16 ` Kirill A. Shutemov
2016-11-07 23:31 ` [PATCH v2 04/12] mm: thp: introduce CONFIG_ARCH_ENABLE_THP_MIGRATION Naoya Horiguchi
2016-11-11 11:18 ` Kirill A. Shutemov
2016-11-15 2:05 ` Naoya Horiguchi
2016-11-28 14:21 ` Michal Hocko
2016-11-29 7:50 ` Naoya Horiguchi
2016-11-29 8:45 ` Michal Hocko
2016-11-07 23:31 ` [PATCH v2 05/12] mm: thp: add core routines for thp/pmd migration Naoya Horiguchi
2016-11-08 3:05 ` kbuild test robot
2016-11-08 6:46 ` Naoya Horiguchi
2016-11-08 8:13 ` Anshuman Khandual
2016-11-08 8:16 ` Naoya Horiguchi
2016-11-10 8:29 ` Anshuman Khandual
2016-11-10 9:43 ` Naoya Horiguchi
2016-11-14 11:45 ` Kirill A. Shutemov
2016-11-15 4:57 ` Naoya Horiguchi
2016-11-28 14:31 ` Michal Hocko [this message]
2016-11-29 7:57 ` Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 06/12] mm: thp: enable thp migration in generic path Naoya Horiguchi
2016-11-28 14:33 ` Michal Hocko
2016-11-29 8:16 ` Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 07/12] mm: thp: check pmd migration entry in common path Naoya Horiguchi
2016-11-08 0:23 ` kbuild test robot
2016-11-08 1:22 ` Naoya Horiguchi
2016-11-10 8:36 ` Anshuman Khandual
2016-11-10 9:12 ` Naoya Horiguchi
2016-11-10 9:08 ` Hillf Danton
2016-11-10 9:21 ` Naoya Horiguchi
2016-11-10 9:28 ` Hillf Danton
2016-11-10 9:34 ` Naoya Horiguchi
2016-11-17 23:56 ` Kirill A. Shutemov
2016-11-29 6:46 ` Naoya Horiguchi
2016-11-28 14:35 ` Michal Hocko
2016-11-07 23:31 ` [PATCH v2 08/12] mm: soft-dirty: keep soft-dirty bits over thp migration Naoya Horiguchi
2016-11-10 8:38 ` Anshuman Khandual
2016-11-10 9:13 ` Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 09/12] mm: hwpoison: soft offline supports " Naoya Horiguchi
2016-11-10 10:31 ` Balbir Singh
2016-11-10 23:58 ` Naoya Horiguchi
2016-11-14 23:22 ` Balbir Singh
2016-11-07 23:31 ` [PATCH v2 10/12] mm: mempolicy: mbind and migrate_pages support " Naoya Horiguchi
2016-11-25 12:27 ` Anshuman Khandual
2016-11-29 7:07 ` Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 11/12] mm: migrate: move_pages() supports " Naoya Horiguchi
2016-11-18 0:01 ` Kirill A. Shutemov
2016-11-07 23:31 ` [PATCH v2 12/12] mm: memory_hotplug: memory hotremove " Naoya Horiguchi
2016-11-08 0:29 ` kbuild test robot
2016-11-08 0:30 ` kbuild test robot
2016-11-08 1:36 ` Naoya Horiguchi
2016-11-08 1:41 ` [PATCH] mm: fix unused variable warning Naoya Horiguchi
2016-11-09 2:32 ` [PATCH v2 00/12] mm: page migration enhancement for thp Balbir Singh
2016-11-09 4:59 ` Naoya Horiguchi
2016-11-09 21:28 ` Balbir Singh
2016-11-09 10:33 ` Anshuman Khandual
2016-11-09 23:52 ` Naoya Horiguchi
2016-11-10 14:01 ` Zi Yan
2016-11-11 3:48 ` Anshuman Khandual
2017-01-30 10:47 ` Anshuman Khandual
2017-01-31 19:54 ` Zi Yan
2017-02-03 3:12 ` Hillf Danton
2017-02-03 3:24 ` Zi Yan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20161128143132.GN14788@dhcp22.suse.cz \
--to=mhocko@kernel.org \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=bsingharora@gmail.com \
--cc=dave.hansen@intel.com \
--cc=hughd@google.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@techsingularity.net \
--cc=n-horiguchi@ah.jp.nec.com \
--cc=nao.horiguchi@gmail.com \
--cc=vbabka@suse.cz \
--cc=xemul@parallels.com \
--cc=zi.yan@cs.rutgers.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).