From: Matthew Wilcox <willy@infradead.org>
To: Peter Zijlstra <peterz@infradead.org>
Cc: kan.liang@linux.intel.com, mingo@kernel.org, acme@kernel.org,
mark.rutland@arm.com, alexander.shishkin@linux.intel.com,
jolsa@redhat.com, eranian@google.com,
christophe.leroy@csgroup.eu, npiggin@gmail.com,
linuxppc-dev@lists.ozlabs.org, mpe@ellerman.id.au,
will@kernel.org, aneesh.kumar@linux.ibm.com,
sparclinux@vger.kernel.org, davem@davemloft.net,
catalin.marinas@arm.com, linux-arch@vger.kernel.org,
linux-kernel@vger.kernel.org, ak@linux.intel.com,
dave.hansen@intel.com, kirill.shutemov@linux.intel.com
Subject: Re: [PATCH v2 3/6] perf/core: Fix arch_perf_get_page_size()
Date: Thu, 26 Nov 2020 12:34:58 +0000 [thread overview]
Message-ID: <20201126123458.GO4327@casper.infradead.org> (raw)
In-Reply-To: <20201126121121.164675154@infradead.org>
On Thu, Nov 26, 2020 at 01:01:17PM +0100, Peter Zijlstra wrote:
> The (new) page-table walker in arch_perf_get_page_size() is broken in
> various ways. Specifically while it is used in a lockless manner, it
> doesn't depend on CONFIG_HAVE_FAST_GUP nor uses the proper _lockless
> offset methods, nor is careful to only read each entry only once.
>
> Also the hugetlb support is broken due to calling pte_page() without
> first checking pte_special().
>
> Rewrite the whole thing to be a proper lockless page-table walker and
> employ the new pXX_leaf_size() pgtable functions to determine the
> pagetable size without looking at the page-frames.
>
> Fixes: 51b646b2d9f8 ("perf,mm: Handle non-page-table-aligned hugetlbfs")
> Fixes: 8d97e71811aa ("perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE")
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Tested-by: Kan Liang <kan.liang@linux.intel.com>
> ---
> arch/arm64/include/asm/pgtable.h | 3 +
> arch/sparc/include/asm/pgtable_64.h | 13 ++++
> arch/sparc/mm/hugetlbpage.c | 19 ++++--
> include/linux/pgtable.h | 16 +++++
> kernel/events/core.c | 102 +++++++++++++-----------------------
> 5 files changed, 82 insertions(+), 71 deletions(-)
This diffstat doesn't match the patch in this email ...
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -52,6 +52,7 @@
> #include <linux/mount.h>
> #include <linux/min_heap.h>
> #include <linux/highmem.h>
> +#include <linux/pgtable.h>
>
> #include "internal.h"
>
> @@ -7001,90 +7001,62 @@ static u64 perf_virt_to_phys(u64 virt)
> return phys_addr;
> }
>
> -#ifdef CONFIG_MMU
> -
> /*
> - * Return the MMU page size of a given virtual address.
> - *
> - * This generic implementation handles page-table aligned huge pages, as well
> - * as non-page-table aligned hugetlbfs compound pages.
> - *
> - * If an architecture supports and uses non-page-table aligned pages in their
> - * kernel mapping it will need to provide it's own implementation of this
> - * function.
> + * Return the pagetable size of a given virtual address.
> */
> -__weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr)
> +static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr)
> {
> - struct page *page;
> - pgd_t *pgd;
> - p4d_t *p4d;
> - pud_t *pud;
> - pmd_t *pmd;
> - pte_t *pte;
> + u64 size = 0;
>
> - pgd = pgd_offset(mm, addr);
> - if (pgd_none(*pgd))
> - return 0;
> +#ifdef CONFIG_HAVE_FAST_GUP
> + pgd_t *pgdp, pgd;
> + p4d_t *p4dp, p4d;
> + pud_t *pudp, pud;
> + pmd_t *pmdp, pmd;
> + pte_t *ptep, pte;
>
> - p4d = p4d_offset(pgd, addr);
> - if (!p4d_present(*p4d))
> + pgdp = pgd_offset(mm, addr);
> + pgd = READ_ONCE(*pgdp);
> + if (pgd_none(pgd))
> return 0;
>
> - if (p4d_leaf(*p4d))
> - return 1ULL << P4D_SHIFT;
> + if (pgd_leaf(pgd))
> + return pgd_leaf_size(pgd);
>
> - pud = pud_offset(p4d, addr);
> - if (!pud_present(*pud))
> + p4dp = p4d_offset_lockless(pgdp, pgd, addr);
> + p4d = READ_ONCE(*p4dp);
> + if (!p4d_present(p4d))
> return 0;
>
> - if (pud_leaf(*pud)) {
> -#ifdef pud_page
> - page = pud_page(*pud);
> - if (PageHuge(page))
> - return page_size(compound_head(page));
> -#endif
> - return 1ULL << PUD_SHIFT;
> - }
> + if (p4d_leaf(p4d))
> + return p4d_leaf_size(p4d);
>
> - pmd = pmd_offset(pud, addr);
> - if (!pmd_present(*pmd))
> + pudp = pud_offset_lockless(p4dp, p4d, addr);
> + pud = READ_ONCE(*pudp);
> + if (!pud_present(pud))
> return 0;
>
> - if (pmd_leaf(*pmd)) {
> -#ifdef pmd_page
> - page = pmd_page(*pmd);
> - if (PageHuge(page))
> - return page_size(compound_head(page));
> -#endif
> - return 1ULL << PMD_SHIFT;
> - }
> + if (pud_leaf(pud))
> + return pud_leaf_size(pud);
>
> - pte = pte_offset_map(pmd, addr);
> - if (!pte_present(*pte)) {
> - pte_unmap(pte);
> + pmdp = pmd_offset_lockless(pudp, pud, addr);
> + pmd = READ_ONCE(*pmdp);
> + if (!pmd_present(pmd))
> return 0;
> - }
>
> - page = pte_page(*pte);
> - if (PageHuge(page)) {
> - u64 size = page_size(compound_head(page));
> - pte_unmap(pte);
> - return size;
> - }
> -
> - pte_unmap(pte);
> - return PAGE_SIZE;
> -}
> + if (pmd_leaf(pmd))
> + return pmd_leaf_size(pmd);
>
> -#else
> + ptep = pte_offset_map(&pmd, addr);
> + pte = ptep_get_lockless(ptep);
> + if (pte_present(pte))
> + size = pte_leaf_size(pte);
> + pte_unmap(ptep);
> +#endif /* CONFIG_HAVE_FAST_GUP */
>
> -static u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr)
> -{
> - return 0;
> + return size;
> }
>
> -#endif
> -
> static u64 perf_get_page_size(unsigned long addr)
> {
> struct mm_struct *mm;
> @@ -7109,7 +7081,7 @@ static u64 perf_get_page_size(unsigned l
> mm = &init_mm;
> }
>
> - size = arch_perf_get_page_size(mm, addr);
> + size = perf_get_pgtable_size(mm, addr);
>
> local_irq_restore(flags);
>
>
>
next prev parent reply other threads:[~2020-11-26 12:35 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-11-26 12:01 [PATCH v2 0/6] perf/mm: Fix PERF_SAMPLE_*_PAGE_SIZE Peter Zijlstra
2020-11-26 12:01 ` [PATCH v2 1/6] mm/gup: Provide gup_get_pte() more generic Peter Zijlstra
2020-11-26 12:43 ` Matthew Wilcox
2020-11-26 13:02 ` Peter Zijlstra
2020-12-03 9:07 ` [tip: perf/core] " tip-bot2 for Peter Zijlstra
2020-12-03 9:24 ` tip-bot2 for Peter Zijlstra
2020-11-26 12:01 ` [PATCH v2 2/6] mm: Introduce pXX_leaf_size() Peter Zijlstra
2020-11-26 12:43 ` Matthew Wilcox
2020-12-03 9:07 ` [tip: perf/core] " tip-bot2 for Peter Zijlstra
2020-12-03 9:24 ` tip-bot2 for Peter Zijlstra
2020-11-26 12:01 ` [PATCH v2 3/6] perf/core: Fix arch_perf_get_page_size() Peter Zijlstra
2020-11-26 12:34 ` Matthew Wilcox [this message]
2020-11-26 12:42 ` Peter Zijlstra
2020-11-26 12:56 ` Matthew Wilcox
2020-11-26 13:06 ` Peter Zijlstra
2020-11-26 13:27 ` Matthew Wilcox
2020-12-03 9:07 ` [tip: perf/core] " tip-bot2 for Peter Zijlstra
2020-12-03 9:24 ` tip-bot2 for Peter Zijlstra
2020-11-26 12:01 ` [PATCH v2 4/6] arm64/mm: Implement pXX_leaf_size() support Peter Zijlstra
2020-11-26 12:57 ` Peter Zijlstra
2020-11-26 14:32 ` Will Deacon
2020-12-03 9:07 ` [tip: perf/core] " tip-bot2 for Peter Zijlstra
2020-12-03 9:24 ` tip-bot2 for Peter Zijlstra
2020-11-26 12:01 ` [PATCH v2 5/6] sparc64/mm: " Peter Zijlstra
2020-12-03 9:07 ` [tip: perf/core] " tip-bot2 for Peter Zijlstra
2020-12-03 9:24 ` tip-bot2 for Peter Zijlstra
2020-12-09 18:44 ` tip-bot2 for Peter Zijlstra
2020-11-26 12:01 ` [PATCH v2 6/6] powerpc/8xx: " Peter Zijlstra
2020-12-03 9:07 ` [tip: perf/core] " tip-bot2 for Peter Zijlstra
2020-12-03 9:24 ` tip-bot2 for Peter Zijlstra
2020-12-09 18:44 ` tip-bot2 for Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201126123458.GO4327@casper.infradead.org \
--to=willy@infradead.org \
--cc=acme@kernel.org \
--cc=ak@linux.intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=aneesh.kumar@linux.ibm.com \
--cc=catalin.marinas@arm.com \
--cc=christophe.leroy@csgroup.eu \
--cc=dave.hansen@intel.com \
--cc=davem@davemloft.net \
--cc=eranian@google.com \
--cc=jolsa@redhat.com \
--cc=kan.liang@linux.intel.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=mark.rutland@arm.com \
--cc=mingo@kernel.org \
--cc=mpe@ellerman.id.au \
--cc=npiggin@gmail.com \
--cc=peterz@infradead.org \
--cc=sparclinux@vger.kernel.org \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).