linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Matthew Wilcox <willy@infradead.org>
Cc: "Liang, Kan" <kan.liang@linux.intel.com>,
	Will Deacon <will@kernel.org>,
	Michael Ellerman <mpe@ellerman.id.au>,
	mingo@redhat.com, acme@kernel.org, linux-kernel@vger.kernel.org,
	mark.rutland@arm.com, alexander.shishkin@linux.intel.com,
	jolsa@redhat.com, eranian@google.com, ak@linux.intel.com,
	dave.hansen@intel.com, kirill.shutemov@linux.intel.com,
	benh@kernel.crashing.org, paulus@samba.org,
	David Miller <davem@davemloft.net>,
	vbabka@suse.cz
Subject: Re: [PATCH V9 1/4] perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE
Date: Thu, 12 Nov 2020 12:36:45 +0100	[thread overview]
Message-ID: <20201112113645.GT2651@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20201112095358.GU2628@hirez.programming.kicks-ass.net>

On Thu, Nov 12, 2020 at 10:53:58AM +0100, Peter Zijlstra wrote:
> You just don't like it because you want me to be purely page-table
> based.

How's something like this then? I failed to untangle Power's many MMUs
though :/

---
 arch/arm64/include/asm/pgtable.h    |   3 ++
 arch/sparc/include/asm/pgtable_64.h |  14 +++++
 arch/sparc/mm/hugetlbpage.c         |  19 ++++---
 include/linux/pgtable.h             |  16 ++++++
 kernel/events/core.c                | 100 +++++++++++++-----------------------
 5 files changed, 82 insertions(+), 70 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 4ff12a7adcfd..1cd2d986b0ca 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -503,6 +503,9 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 				 PMD_TYPE_SECT)
 #define pmd_leaf(pmd)		pmd_sect(pmd)
 
+#define pmd_leaf_size(pmd)	(pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
+#define pte_leaf_size(pte)	(pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE)
+
 #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
 static inline bool pud_sect(pud_t pud) { return false; }
 static inline bool pud_table(pud_t pud) { return true; }
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 7ef6affa105e..1723e18ba89f 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -1121,6 +1121,20 @@ extern unsigned long cmdline_memory_size;
 
 asmlinkage void do_sparc64_fault(struct pt_regs *regs);
 
+#ifdef CONFIG_HUGETLB_PAGE
+
+#define pud_leaf_size pud_leaf_size
+extern unsigned long pud_leaf_size(pud_t pud);
+
+#define pmd_leaf_size pmd_leaf_size
+extern unsigned long pmd_leaf_size(pmd_t pmd);
+
+#define pte_leaf_size pte_leaf_size
+extern unsigned long pte_leaf_size(pte_t pte);
+#endif
+
+#endif /* CONFIG_HUGETLB_PAGE */
+
 #endif /* !(__ASSEMBLY__) */
 
 #endif /* !(_SPARC64_PGTABLE_H) */
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index ec423b5f17dd..3e806b87ec19 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -247,14 +247,17 @@ static unsigned int sun4u_huge_tte_to_shift(pte_t entry)
 	return shift;
 }
 
-static unsigned int huge_tte_to_shift(pte_t entry)
+static unsigned long __tte_to_shift(pte_t entry)
 {
-	unsigned long shift;
-
 	if (tlb_type == hypervisor)
-		shift = sun4v_huge_tte_to_shift(entry);
-	else
-		shift = sun4u_huge_tte_to_shift(entry);
+		return sun4v_huge_tte_to_shift(entry);
+
+	return sun4u_huge_tte_to_shift(entry);
+}
+
+static unsigned int huge_tte_to_shift(pte_t entry)
+{
+	unsigned long shift = __tte_to_shift(entry);
 
 	if (shift == PAGE_SHIFT)
 		WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n",
@@ -272,6 +275,10 @@ static unsigned long huge_tte_to_size(pte_t pte)
 	return size;
 }
 
+unsigned long pud_leaf_size(pud_t pud) { return 1UL << __tte_to_shift((pte_t)pud); }
+unsigned long pmd_leaf_size(pmd_t pmd) { return 1UL << __tte_to_shift((pte_t)pmd); }
+unsigned long pte_leaf_size(pte_t pte) { return 1UL << __tte_to_shift((pte_t)pte); }
+
 pte_t *huge_pte_alloc(struct mm_struct *mm,
 			unsigned long addr, unsigned long sz)
 {
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 71125a4676c4..35b9da397e55 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1481,4 +1481,20 @@ typedef unsigned int pgtbl_mod_mask;
 #define pmd_leaf(x)	0
 #endif
 
+#ifndef pgd_leaf_size(x)
+#define pgd_leaf_size(x) PGD_SIZE
+#endif
+#ifndef p4d_leaf_size(x)
+#define p4d_leaf_size(x) P4D_SIZE
+#endif
+#ifndef pud_leaf_size(x)
+#define pud_leaf_size(x) PUD_SIZE
+#endif
+#ifndef pmd_leaf_size(x)
+#define pmd_leaf_size(x) PMD_SIZE
+#endif
+#ifndef pte_leaf_size(x)
+#define pte_leaf_size(x) PAGE_SIZE
+#endif
+
 #endif /* _LINUX_PGTABLE_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d2f3ca792936..fca04fcfc9ea 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7001,90 +7001,62 @@ static u64 perf_virt_to_phys(u64 virt)
 	return phys_addr;
 }
 
-#ifdef CONFIG_MMU
-
 /*
  * Return the MMU page size of a given virtual address.
- *
- * This generic implementation handles page-table aligned huge pages, as well
- * as non-page-table aligned hugetlbfs compound pages.
- *
- * If an architecture supports and uses non-page-table aligned pages in their
- * kernel mapping it will need to provide it's own implementation of this
- * function.
  */
-__weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr)
+static u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr)
 {
+	u64 size = 0;
+#ifdef CONFIG_HAVE_FAST_GUP
 	struct page *page;
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-
-	pgd = pgd_offset(mm, addr);
-	if (pgd_none(*pgd))
+	pgd_t *pgdp, pgd;
+	p4d_t *p4dp, p4d;
+	pud_t *pudp, pud;
+	pmd_t *pmdp, pmd;
+	pte_t *ptep, pte;
+
+	pgdp = pgd_offset(mm, addr);
+	pgd = READ_ONCE(*pgdp);
+	if (pgd_none(pgd))
 		return 0;
 
-	p4d = p4d_offset(pgd, addr);
-	if (!p4d_present(*p4d))
-		return 0;
+	if (pgd_leaf(pgd))
+		return pgd_leaf_size(pgd);
 
-	if (p4d_leaf(*p4d))
-		return 1ULL << P4D_SHIFT;
-
-	pud = pud_offset(p4d, addr);
-	if (!pud_present(*pud))
+	p4dp = p4d_offset_lockless(pgdp, pgd, addr);
+	p4d = READ_ONCE(*p4dp);
+	if (!p4d_present(p4d))
 		return 0;
 
-	if (pud_leaf(*pud)) {
-#ifdef pud_page
-		page = pud_page(*pud);
-		if (PageHuge(page))
-			return page_size(compound_head(page));
-#endif
-		return 1ULL << PUD_SHIFT;
-	}
+	if (p4d_leaf(p4d))
+		return p4d_leaf_size(p4d);
 
-	pmd = pmd_offset(pud, addr);
-	if (!pmd_present(*pmd))
+	pudp = pud_offset_lockless(p4dp, p4d, addr);
+	pud = READ_ONCE(*pudp);
+	if (!pud_present(pud))
 		return 0;
 
-	if (pmd_leaf(*pmd)) {
-#ifdef pmd_page
-		page = pmd_page(*pmd);
-		if (PageHuge(page))
-			return page_size(compound_head(page));
-#endif
-		return 1ULL << PMD_SHIFT;
-	}
+	if (pud_leaf(pud))
+		return pud_leaf_size(pud);
 
-	pte = pte_offset_map(pmd, addr);
-	if (!pte_present(*pte)) {
-		pte_unmap(pte);
+	pmdp = pmd_offset_lockless(pudp, pud, addr);
+	pmd = READ_ONCE(*pmdp);
+	if (!pmd_present(pmd))
 		return 0;
-	}
-
-	page = pte_page(*pte);
-	if (PageHuge(page)) {
-		u64 size = page_size(compound_head(page));
-		pte_unmap(pte);
-		return size;
-	}
 
-	pte_unmap(pte);
-	return PAGE_SIZE;
-}
+	if (pmd_leaf(pmd))
+		return pmd_leaf_size(pmd);
 
-#else
+	ptep = pte_offset_map(&pmd, addr);
+	pte = READ_ONCE(*ptep); // gup_get_pte()
+	if (pte_present(pte))
+		size = pte_leaf_size(pte);
+	pte_unmap(ptep);
 
-static u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr)
-{
-	return 0;
+#endif /* CONFIG_HAVE_FAST_GUP */
+	return size;
 }
 
-#endif
-
 static u64 perf_get_page_size(unsigned long addr)
 {
 	struct mm_struct *mm;

  reply	other threads:[~2020-11-12 11:37 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-01 13:57 [PATCH V9 0/4] Add the page size in the perf record (kernel) kan.liang
2020-10-01 13:57 ` [PATCH V9 1/4] perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE kan.liang
2020-10-09  9:09   ` Peter Zijlstra
2020-10-09  9:16     ` Peter Zijlstra
2020-10-09  9:37     ` Will Deacon
2020-10-09  9:53       ` Peter Zijlstra
2020-10-20  2:49         ` Leo Yan
2020-10-20  7:19           ` Peter Zijlstra
2020-10-20  8:16             ` Leo Yan
2020-10-09 12:29     ` Liang, Kan
2020-10-09 12:57       ` Peter Zijlstra
2020-10-09 13:28     ` Michael Ellerman
2020-10-12  8:48       ` Will Deacon
2020-10-13 14:57         ` Liang, Kan
2020-10-13 15:46           ` Peter Zijlstra
2020-10-13 16:34             ` Peter Zijlstra
2020-11-04 17:11               ` Liang, Kan
2020-11-10 15:20                 ` Liang, Kan
2020-11-11  9:57                 ` Peter Zijlstra
2020-11-11 11:22                   ` Peter Zijlstra
2020-11-11 12:43                     ` Peter Zijlstra
2020-11-11 15:30                       ` Matthew Wilcox
2020-11-11 15:52                         ` Peter Zijlstra
2020-11-11 15:57                         ` Peter Zijlstra
2020-11-11 16:38                           ` Matthew Wilcox
2020-11-11 17:22                             ` Peter Zijlstra
2020-11-11 18:26                               ` Matthew Wilcox
2020-11-11 20:00                                 ` Peter Zijlstra
2020-11-11 22:33                                   ` Matthew Wilcox
2020-11-12  9:53                                     ` Peter Zijlstra
2020-11-12 11:36                                       ` Peter Zijlstra [this message]
2020-11-12 14:01                                         ` Matthew Wilcox
2020-10-29 10:51   ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-10-01 13:57 ` [PATCH V9 2/4] perf/x86/intel: Support PERF_SAMPLE_DATA_PAGE_SIZE kan.liang
2020-10-29 10:51   ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-10-01 13:57 ` [PATCH V9 3/4] powerpc/perf: " kan.liang
2020-10-29 10:51   ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-10-01 13:57 ` [PATCH V9 4/4] perf/core: Add support for PERF_SAMPLE_CODE_PAGE_SIZE kan.liang
2020-10-29 10:51   ` [tip: perf/core] " tip-bot2 for Stephane Eranian

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201112113645.GT2651@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=benh@kernel.crashing.org \
    --cc=dave.hansen@intel.com \
    --cc=davem@davemloft.net \
    --cc=eranian@google.com \
    --cc=jolsa@redhat.com \
    --cc=kan.liang@linux.intel.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=mpe@ellerman.id.au \
    --cc=paulus@samba.org \
    --cc=vbabka@suse.cz \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).