From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752652AbdC0KdM (ORCPT ); Mon, 27 Mar 2017 06:33:12 -0400 Received: from terminus.zytor.com ([65.50.211.136]:46527 "EHLO terminus.zytor.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752267AbdC0KdD (ORCPT ); Mon, 27 Mar 2017 06:33:03 -0400 Date: Mon, 27 Mar 2017 03:25:10 -0700 From: tip-bot for Xiong Zhang Message-ID: Cc: luto@amacapital.net, dave.hansen@intel.com, jpoimboe@redhat.com, luto@kernel.org, tglx@linutronix.de, xiong.y.zhang@intel.com, brgerst@gmail.com, hpa@zytor.com, akpm@linux-foundation.org, mingo@kernel.org, bp@alien8.de, arnd@arndb.de, peterz@infradead.org, linux-kernel@vger.kernel.org, mhocko@suse.com, kirill.shutemov@linux.intel.com, dvlasenk@redhat.com, torvalds@linux-foundation.org Reply-To: tglx@linutronix.de, luto@kernel.org, xiong.y.zhang@intel.com, jpoimboe@redhat.com, dave.hansen@intel.com, luto@amacapital.net, bp@alien8.de, akpm@linux-foundation.org, hpa@zytor.com, brgerst@gmail.com, mingo@kernel.org, peterz@infradead.org, linux-kernel@vger.kernel.org, arnd@arndb.de, torvalds@linux-foundation.org, kirill.shutemov@linux.intel.com, mhocko@suse.com, dvlasenk@redhat.com In-Reply-To: <20170317185515.8636-6-kirill.shutemov@linux.intel.com> References: <20170317185515.8636-6-kirill.shutemov@linux.intel.com> To: linux-tip-commits@vger.kernel.org Subject: [tip:x86/mm] x86/xen: Change __xen_pgd_walk() and xen_cleanmfnmap() to support p4d Git-Commit-ID: 907cd439029091bcbd67f03cbe45a4c124347731 X-Mailer: tip-git-log-daemon Robot-ID: Robot-Unsubscribe: Contact to get blacklisted from these emails MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset=UTF-8 Content-Disposition: inline Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Commit-ID: 907cd439029091bcbd67f03cbe45a4c124347731 Gitweb: http://git.kernel.org/tip/907cd439029091bcbd67f03cbe45a4c124347731 Author: Xiong Zhang AuthorDate: Fri, 17 Mar 2017 21:55:14 +0300 Committer: Ingo Molnar CommitDate: Mon, 27 Mar 2017 08:56:49 +0200 x86/xen: Change __xen_pgd_walk() and xen_cleanmfnmap() to support p4d Split these helpers into a couple of per-level functions and add support for an additional page table level. Signed-off-by: Xiong Zhang [ Split off into separate patch ] Signed-off-by: Kirill A. Shutemov Acked-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Michal Hocko Cc: Peter Zijlstra Cc: linux-arch@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20170317185515.8636-6-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.c | 245 ++++++++++++++++++++++++++++++++--------------------- arch/x86/xen/mmu.h | 1 + 2 files changed, 150 insertions(+), 96 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ebbfe00..e6adebb 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -593,6 +593,64 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val) } #endif /* CONFIG_PGTABLE_LEVELS == 4 */ +static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, + int (*func)(struct mm_struct *mm, struct page *, enum pt_level), + bool last, unsigned long limit) +{ + int i, nr, flush = 0; + + nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD; + for (i = 0; i < nr; i++) { + if (!pmd_none(pmd[i])) + flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE); + } + return flush; +} + +static int xen_pud_walk(struct mm_struct *mm, pud_t *pud, + int (*func)(struct mm_struct *mm, struct page *, enum pt_level), + bool last, unsigned long limit) +{ + int i, nr, flush = 0; + + nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD; + for (i = 0; i < nr; i++) { + pmd_t *pmd; + + if (pud_none(pud[i])) + continue; + + pmd = pmd_offset(&pud[i], 0); + if (PTRS_PER_PMD > 1) + flush |= (*func)(mm, virt_to_page(pmd), PT_PMD); + flush |= xen_pmd_walk(mm, pmd, func, + last && i == nr - 1, limit); + } + return flush; +} + +static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, + int (*func)(struct mm_struct *mm, struct page *, enum pt_level), + bool last, unsigned long limit) +{ + int i, nr, flush = 0; + + nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D; + for (i = 0; i < nr; i++) { + pud_t *pud; + + if (p4d_none(p4d[i])) + continue; + + pud = pud_offset(&p4d[i], 0); + if (PTRS_PER_PUD > 1) + flush |= (*func)(mm, virt_to_page(pud), PT_PUD); + flush |= xen_pud_walk(mm, pud, func, + last && i == nr - 1, limit); + } + return flush; +} + /* * (Yet another) pagetable walker. This one is intended for pinning a * pagetable. This means that it walks a pagetable and calls the @@ -613,10 +671,8 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, enum pt_level), unsigned long limit) { - int flush = 0; + int i, nr, flush = 0; unsigned hole_low, hole_high; - unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; - unsigned pgdidx, pudidx, pmdidx; /* The limit is the last byte to be touched */ limit--; @@ -633,65 +689,22 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, hole_low = pgd_index(USER_LIMIT); hole_high = pgd_index(PAGE_OFFSET); - pgdidx_limit = pgd_index(limit); -#if PTRS_PER_PUD > 1 - pudidx_limit = pud_index(limit); -#else - pudidx_limit = 0; -#endif -#if PTRS_PER_PMD > 1 - pmdidx_limit = pmd_index(limit); -#else - pmdidx_limit = 0; -#endif - - for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { - pud_t *pud; + nr = pgd_index(limit) + 1; + for (i = 0; i < nr; i++) { + p4d_t *p4d; - if (pgdidx >= hole_low && pgdidx < hole_high) + if (i >= hole_low && i < hole_high) continue; - if (!pgd_val(pgd[pgdidx])) + if (pgd_none(pgd[i])) continue; - pud = pud_offset(&pgd[pgdidx], 0); - - if (PTRS_PER_PUD > 1) /* not folded */ - flush |= (*func)(mm, virt_to_page(pud), PT_PUD); - - for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) { - pmd_t *pmd; - - if (pgdidx == pgdidx_limit && - pudidx > pudidx_limit) - goto out; - - if (pud_none(pud[pudidx])) - continue; - - pmd = pmd_offset(&pud[pudidx], 0); - - if (PTRS_PER_PMD > 1) /* not folded */ - flush |= (*func)(mm, virt_to_page(pmd), PT_PMD); - - for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) { - struct page *pte; - - if (pgdidx == pgdidx_limit && - pudidx == pudidx_limit && - pmdidx > pmdidx_limit) - goto out; - - if (pmd_none(pmd[pmdidx])) - continue; - - pte = pmd_page(pmd[pmdidx]); - flush |= (*func)(mm, pte, PT_PTE); - } - } + p4d = p4d_offset(&pgd[i], 0); + if (PTRS_PER_P4D > 1) + flush |= (*func)(mm, virt_to_page(p4d), PT_P4D); + flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); } -out: /* Do the top level last, so that the callbacks can use it as a cue to do final things like tlb flushes. */ flush |= (*func)(mm, virt_to_page(pgd), PT_PGD); @@ -1150,57 +1163,97 @@ static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin) xen_free_ro_pages(pa, PAGE_SIZE); } +static void __init xen_cleanmfnmap_pmd(pmd_t *pmd, bool unpin) +{ + unsigned long pa; + pte_t *pte_tbl; + int i; + + if (pmd_large(*pmd)) { + pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK; + xen_free_ro_pages(pa, PMD_SIZE); + return; + } + + pte_tbl = pte_offset_kernel(pmd, 0); + for (i = 0; i < PTRS_PER_PTE; i++) { + if (pte_none(pte_tbl[i])) + continue; + pa = pte_pfn(pte_tbl[i]) << PAGE_SHIFT; + xen_free_ro_pages(pa, PAGE_SIZE); + } + set_pmd(pmd, __pmd(0)); + xen_cleanmfnmap_free_pgtbl(pte_tbl, unpin); +} + +static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin) +{ + unsigned long pa; + pmd_t *pmd_tbl; + int i; + + if (pud_large(*pud)) { + pa = pud_val(*pud) & PHYSICAL_PAGE_MASK; + xen_free_ro_pages(pa, PUD_SIZE); + return; + } + + pmd_tbl = pmd_offset(pud, 0); + for (i = 0; i < PTRS_PER_PMD; i++) { + if (pmd_none(pmd_tbl[i])) + continue; + xen_cleanmfnmap_pmd(pmd_tbl + i, unpin); + } + set_pud(pud, __pud(0)); + xen_cleanmfnmap_free_pgtbl(pmd_tbl, unpin); +} + +static void __init xen_cleanmfnmap_p4d(p4d_t *p4d, bool unpin) +{ + unsigned long pa; + pud_t *pud_tbl; + int i; + + if (p4d_large(*p4d)) { + pa = p4d_val(*p4d) & PHYSICAL_PAGE_MASK; + xen_free_ro_pages(pa, P4D_SIZE); + return; + } + + pud_tbl = pud_offset(p4d, 0); + for (i = 0; i < PTRS_PER_PUD; i++) { + if (pud_none(pud_tbl[i])) + continue; + xen_cleanmfnmap_pud(pud_tbl + i, unpin); + } + set_p4d(p4d, __p4d(0)); + xen_cleanmfnmap_free_pgtbl(pud_tbl, unpin); +} + /* * Since it is well isolated we can (and since it is perhaps large we should) * also free the page tables mapping the initial P->M table. */ static void __init xen_cleanmfnmap(unsigned long vaddr) { - unsigned long va = vaddr & PMD_MASK; - unsigned long pa; - pgd_t *pgd = pgd_offset_k(va); - pud_t *pud_page = pud_offset(pgd, 0); - pud_t *pud; - pmd_t *pmd; - pte_t *pte; + pgd_t *pgd; + p4d_t *p4d; unsigned int i; bool unpin; unpin = (vaddr == 2 * PGDIR_SIZE); - set_pgd(pgd, __pgd(0)); - do { - pud = pud_page + pud_index(va); - if (pud_none(*pud)) { - va += PUD_SIZE; - } else if (pud_large(*pud)) { - pa = pud_val(*pud) & PHYSICAL_PAGE_MASK; - xen_free_ro_pages(pa, PUD_SIZE); - va += PUD_SIZE; - } else { - pmd = pmd_offset(pud, va); - if (pmd_large(*pmd)) { - pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK; - xen_free_ro_pages(pa, PMD_SIZE); - } else if (!pmd_none(*pmd)) { - pte = pte_offset_kernel(pmd, va); - set_pmd(pmd, __pmd(0)); - for (i = 0; i < PTRS_PER_PTE; ++i) { - if (pte_none(pte[i])) - break; - pa = pte_pfn(pte[i]) << PAGE_SHIFT; - xen_free_ro_pages(pa, PAGE_SIZE); - } - xen_cleanmfnmap_free_pgtbl(pte, unpin); - } - va += PMD_SIZE; - if (pmd_index(va)) - continue; - set_pud(pud, __pud(0)); - xen_cleanmfnmap_free_pgtbl(pmd, unpin); - } - - } while (pud_index(va) || pmd_index(va)); - xen_cleanmfnmap_free_pgtbl(pud_page, unpin); + vaddr &= PMD_MASK; + pgd = pgd_offset_k(vaddr); + p4d = p4d_offset(pgd, 0); + for (i = 0; i < PTRS_PER_P4D; i++) { + if (p4d_none(p4d[i])) + continue; + xen_cleanmfnmap_p4d(p4d + i, unpin); + } + if (IS_ENABLED(CONFIG_X86_5LEVEL)) { + set_pgd(pgd, __pgd(0)); + xen_cleanmfnmap_free_pgtbl(p4d, unpin); + } } static void __init xen_pagetable_p2m_free(void) diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 73809bb..3fe2b32 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -5,6 +5,7 @@ enum pt_level { PT_PGD, + PT_P4D, PT_PUD, PT_PMD, PT_PTE