linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Steven Price <steven.price@arm.com>
To: linux-mm@kvack.org
Cc: "Steven Price" <steven.price@arm.com>,
	"Andy Lutomirski" <luto@kernel.org>,
	"Ard Biesheuvel" <ard.biesheuvel@linaro.org>,
	"Arnd Bergmann" <arnd@arndb.de>, "Borislav Petkov" <bp@alien8.de>,
	"Catalin Marinas" <catalin.marinas@arm.com>,
	"Dave Hansen" <dave.hansen@linux.intel.com>,
	"Ingo Molnar" <mingo@redhat.com>,
	"James Morse" <james.morse@arm.com>,
	"Jérôme Glisse" <jglisse@redhat.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Will Deacon" <will.deacon@arm.com>,
	x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org,
	"Mark Rutland" <Mark.Rutland@arm.com>,
	"Liang, Kan" <kan.liang@linux.intel.com>
Subject: [PATCH v3 27/34] mm: pagewalk: Add 'depth' parameter to pte_hole
Date: Wed, 27 Feb 2019 17:06:01 +0000	[thread overview]
Message-ID: <20190227170608.27963-28-steven.price@arm.com> (raw)
In-Reply-To: <20190227170608.27963-1-steven.price@arm.com>

The pte_hole() callback is called at multiple levels of the page tables.
Code dumping the kernel page tables needs to know what at what depth
the missing entry is. Add this is an extra parameter to pte_hole().
When the depth isn't know (e.g. processing a vma) then -1 is passed.

The depth that is reported is the actual level where the entry is
missing (ignoring any folding that is in place), i.e. any levels where
PTRS_PER_P?D is set to 1 are ignored.

Note that depth starts at 0 for a PGD so that PUD/PMD/PTE retain their
natural numbers as levels 2/3/4.

Signed-off-by: Steven Price <steven.price@arm.com>
---
 fs/proc/task_mmu.c |  4 ++--
 include/linux/mm.h |  6 ++++--
 mm/hmm.c           |  2 +-
 mm/migrate.c       |  1 +
 mm/mincore.c       |  1 +
 mm/pagewalk.c      | 31 +++++++++++++++++++++++++------
 6 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f0ec9edab2f3..91131cd4e9e0 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -474,7 +474,7 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
 
 #ifdef CONFIG_SHMEM
 static int smaps_pte_hole(unsigned long addr, unsigned long end,
-		struct mm_walk *walk)
+			  __always_unused int depth, struct mm_walk *walk)
 {
 	struct mem_size_stats *mss = walk->private;
 
@@ -1203,7 +1203,7 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
 }
 
 static int pagemap_pte_hole(unsigned long start, unsigned long end,
-				struct mm_walk *walk)
+			    __always_unused int depth, struct mm_walk *walk)
 {
 	struct pagemapread *pm = walk->private;
 	unsigned long addr = start;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1a4b1615d012..4ae3634a9118 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1420,7 +1420,9 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
  *	       pmd_trans_huge() pmds.  They may simply choose to
  *	       split_huge_page() instead of handling it explicitly.
  * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
- * @pte_hole: if set, called for each hole at all levels
+ * @pte_hole: if set, called for each hole at all levels,
+ *            depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD, 4:PTE
+ *            any depths where PTRS_PER_P?D is equal to 1 are skipped
  * @hugetlb_entry: if set, called for each hugetlb entry
  * @test_walk: caller specific callback function to determine whether
  *             we walk over the current vma or not. Returning 0
@@ -1445,7 +1447,7 @@ struct mm_walk {
 	int (*pte_entry)(pte_t *pte, unsigned long addr,
 			 unsigned long next, struct mm_walk *walk);
 	int (*pte_hole)(unsigned long addr, unsigned long next,
-			struct mm_walk *walk);
+			int depth, struct mm_walk *walk);
 	int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
 			     unsigned long addr, unsigned long next,
 			     struct mm_walk *walk);
diff --git a/mm/hmm.c b/mm/hmm.c
index a04e4b810610..e3e6b8fda437 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -440,7 +440,7 @@ static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
 }
 
 static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
-			     struct mm_walk *walk)
+			     __always_unused int depth, struct mm_walk *walk)
 {
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
diff --git a/mm/migrate.c b/mm/migrate.c
index d4fd680be3b0..8b62a9fecb5c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2121,6 +2121,7 @@ struct migrate_vma {
 
 static int migrate_vma_collect_hole(unsigned long start,
 				    unsigned long end,
+				    __always_unused int depth,
 				    struct mm_walk *walk)
 {
 	struct migrate_vma *migrate = walk->private;
diff --git a/mm/mincore.c b/mm/mincore.c
index 218099b5ed31..c4edbc688241 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -104,6 +104,7 @@ static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
 }
 
 static int mincore_unmapped_range(unsigned long addr, unsigned long end,
+				   __always_unused int depth,
 				   struct mm_walk *walk)
 {
 	walk->private += __mincore_unmapped_range(addr, end,
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index dac0c848b458..57946bcd810c 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -4,6 +4,22 @@
 #include <linux/sched.h>
 #include <linux/hugetlb.h>
 
+/*
+ * We want to know the real level where a entry is located ignoring any
+ * folding of levels which may be happening. For example if p4d is folded then
+ * a missing entry found at level 1 (p4d) is actually at level 0 (pgd).
+ */
+static int real_depth(int depth)
+{
+	if (depth == 3 && PTRS_PER_PMD == 1)
+		depth = 2;
+	if (depth == 2 && PTRS_PER_PUD == 1)
+		depth = 1;
+	if (depth == 1 && PTRS_PER_P4D == 1)
+		depth = 0;
+	return depth;
+}
+
 static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 			  struct mm_walk *walk)
 {
@@ -31,6 +47,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
 	pmd_t *pmd;
 	unsigned long next;
 	int err = 0;
+	int depth = real_depth(3);
 
 	pmd = pmd_offset(pud, addr);
 	do {
@@ -38,7 +55,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
 		next = pmd_addr_end(addr, end);
 		if (pmd_none(*pmd)) {
 			if (walk->pte_hole)
-				err = walk->pte_hole(addr, next, walk);
+				err = walk->pte_hole(addr, next, depth, walk);
 			if (err)
 				break;
 			continue;
@@ -81,6 +98,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
 	pud_t *pud;
 	unsigned long next;
 	int err = 0;
+	int depth = real_depth(2);
 
 	pud = pud_offset(p4d, addr);
 	do {
@@ -88,7 +106,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
 		next = pud_addr_end(addr, end);
 		if (pud_none(*pud)) {
 			if (walk->pte_hole)
-				err = walk->pte_hole(addr, next, walk);
+				err = walk->pte_hole(addr, next, depth, walk);
 			if (err)
 				break;
 			continue;
@@ -123,13 +141,14 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
 	p4d_t *p4d;
 	unsigned long next;
 	int err = 0;
+	int depth = real_depth(1);
 
 	p4d = p4d_offset(pgd, addr);
 	do {
 		next = p4d_addr_end(addr, end);
 		if (p4d_none_or_clear_bad(p4d)) {
 			if (walk->pte_hole)
-				err = walk->pte_hole(addr, next, walk);
+				err = walk->pte_hole(addr, next, depth, walk);
 			if (err)
 				break;
 			continue;
@@ -160,7 +179,7 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd)) {
 			if (walk->pte_hole)
-				err = walk->pte_hole(addr, next, walk);
+				err = walk->pte_hole(addr, next, 0, walk);
 			if (err)
 				break;
 			continue;
@@ -206,7 +225,7 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
 		if (pte)
 			err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
 		else if (walk->pte_hole)
-			err = walk->pte_hole(addr, next, walk);
+			err = walk->pte_hole(addr, next, -1, walk);
 
 		if (err)
 			break;
@@ -249,7 +268,7 @@ static int walk_page_test(unsigned long start, unsigned long end,
 	if (vma->vm_flags & VM_PFNMAP) {
 		int err = 1;
 		if (walk->pte_hole)
-			err = walk->pte_hole(start, end, walk);
+			err = walk->pte_hole(start, end, -1, walk);
 		return err ? err : 1;
 	}
 	return 0;
-- 
2.20.1


  parent reply	other threads:[~2019-02-27 17:08 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-27 17:05 [PATCH v3 00/34] Convert x86 & arm64 to use generic page walk Steven Price
2019-02-27 17:05 ` [PATCH v3 01/34] alpha: mm: Add p?d_large() definitions Steven Price
2019-02-27 17:05 ` [PATCH v3 02/34] arc: " Steven Price
2019-02-27 18:18   ` Vineet Gupta
2019-02-27 17:05 ` [PATCH v3 03/34] arm: " Steven Price
2019-03-01 21:47   ` Kirill A. Shutemov
2019-03-04 11:56     ` Steven Price
2019-03-04 13:10       ` Kirill A. Shutemov
2019-02-27 17:05 ` [PATCH v3 04/34] arm64: " Steven Price
2019-02-27 17:05 ` [PATCH v3 05/34] c6x: " Steven Price
2019-03-01 21:48   ` Kirill A. Shutemov
2019-03-04 12:01     ` Steven Price
2019-03-04 13:11       ` Kirill A. Shutemov
2019-02-27 17:05 ` [PATCH v3 06/34] csky: " Steven Price
2019-03-01 21:51   ` Kirill A. Shutemov
2019-02-27 17:05 ` [PATCH v3 07/34] hexagon: " Steven Price
2019-02-27 17:05 ` [PATCH v3 08/34] ia64: " Steven Price
2019-03-01 21:57   ` Kirill A. Shutemov
2019-03-04 13:16     ` Steven Price
2019-03-04 19:06       ` Luck, Tony
2019-03-06 13:45         ` Steven Price
2019-02-27 17:05 ` [PATCH v3 09/34] m68k: " Steven Price
2019-02-27 19:27   ` Geert Uytterhoeven
2019-02-28 11:36     ` Mike Rapoport
2019-02-28 11:53       ` Geert Uytterhoeven
2019-02-28 12:04         ` Steven Price
2019-03-01 11:45           ` Mike Rapoport
2019-02-27 17:05 ` [PATCH v3 10/34] microblaze: " Steven Price
2019-02-27 17:05 ` [PATCH v3 11/34] mips: " Steven Price
2019-02-28  2:15   ` Paul Burton
2019-02-28 12:11     ` Steven Price
2019-02-28 18:55       ` Paul Burton
2019-03-01 11:02         ` Steven Price
2019-02-27 17:05 ` [PATCH v3 12/34] nds32: " Steven Price
2019-02-27 17:05 ` [PATCH v3 13/34] nios2: " Steven Price
2019-02-27 17:05 ` [PATCH v3 14/34] openrisc: " Steven Price
2019-02-27 17:05 ` [PATCH v3 15/34] parisc: " Steven Price
2019-02-27 18:54   ` Helge Deller
2019-03-01 22:12     ` Kirill A. Shutemov
2019-03-05 21:45       ` Helge Deller
2019-02-27 17:05 ` [PATCH v3 16/34] powerpc: " Steven Price
2019-02-27 17:05 ` [PATCH v3 17/34] riscv: " Steven Price
2019-02-27 17:05 ` [PATCH v3 18/34] s390: " Steven Price
2019-02-27 17:40   ` Martin Schwidefsky
2019-02-28 11:43     ` Steven Price
2019-02-27 17:05 ` [PATCH v3 19/34] sh: " Steven Price
2019-02-27 17:05 ` [PATCH v3 20/34] sparc: " Steven Price
2019-02-27 18:38   ` David Miller
2019-02-28 11:49     ` Steven Price
2019-02-27 17:05 ` [PATCH v3 21/34] um: " Steven Price
2019-02-27 17:05 ` [PATCH v3 22/34] unicore32: " Steven Price
2019-02-27 17:05 ` [PATCH v3 23/34] xtensa: " Steven Price
2019-02-27 17:29   ` Max Filippov
2019-02-27 17:05 ` [PATCH v3 24/34] mm: Add generic p?d_large() macros Steven Price
2019-02-27 17:05 ` [PATCH v3 25/34] mm: pagewalk: Add p4d_entry() and pgd_entry() Steven Price
2019-02-27 17:06 ` [PATCH v3 26/34] mm: pagewalk: Allow walking without vma Steven Price
2019-02-27 17:06 ` Steven Price [this message]
2019-02-27 17:38   ` [PATCH v3 27/34] mm: pagewalk: Add 'depth' parameter to pte_hole Dave Hansen
2019-02-28 11:28     ` Steven Price
2019-02-28 19:00       ` Dave Hansen
2019-03-01 11:24         ` Steven Price
2019-02-27 17:06 ` [PATCH v3 28/34] mm: pagewalk: Add test_p?d callbacks Steven Price
2019-02-27 17:06 ` [PATCH v3 29/34] arm64: mm: Convert mm/dump.c to use walk_page_range() Steven Price
2019-02-27 17:06 ` [PATCH v3 30/34] x86/mm: Point to struct seq_file from struct pg_state Steven Price
2019-02-27 17:06 ` [PATCH v3 31/34] x86/mm+efi: Convert ptdump_walk_pgd_level() to take a mm_struct Steven Price
2019-02-27 17:06 ` [PATCH v3 32/34] x86/mm: Convert ptdump_walk_pgd_level_debugfs() to take an mm_struct Steven Price
2019-02-27 17:06 ` [PATCH v3 33/34] x86/mm: Convert ptdump_walk_pgd_level_core() " Steven Price
2019-02-27 17:06 ` [PATCH v3 34/34] x86: mm: Convert dump_pagetables to use walk_page_range Steven Price

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190227170608.27963-28-steven.price@arm.com \
    --to=steven.price@arm.com \
    --cc=Mark.Rutland@arm.com \
    --cc=ard.biesheuvel@linaro.org \
    --cc=arnd@arndb.de \
    --cc=bp@alien8.de \
    --cc=catalin.marinas@arm.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=james.morse@arm.com \
    --cc=jglisse@redhat.com \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=will.deacon@arm.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).