Linux-mm Archive on lore.kernel.org
 help / color / Atom feed
From: Steven Price <steven.price@arm.com>
To: Andrew Morton <akpm@linux-foundation.org>, linux-mm@kvack.org
Cc: "Steven Price" <steven.price@arm.com>,
	"Andy Lutomirski" <luto@kernel.org>,
	"Ard Biesheuvel" <ard.biesheuvel@linaro.org>,
	"Arnd Bergmann" <arnd@arndb.de>, "Borislav Petkov" <bp@alien8.de>,
	"Catalin Marinas" <catalin.marinas@arm.com>,
	"Dave Hansen" <dave.hansen@linux.intel.com>,
	"Ingo Molnar" <mingo@redhat.com>,
	"James Morse" <james.morse@arm.com>,
	"Jérôme Glisse" <jglisse@redhat.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Will Deacon" <will@kernel.org>,
	x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org,
	"Mark Rutland" <Mark.Rutland@arm.com>,
	"Liang, Kan" <kan.liang@linux.intel.com>,
	"Zong Li" <zong.li@sifive.com>
Subject: [PATCH v15 14/23] mm: pagewalk: Add 'depth' parameter to pte_hole
Date: Fri,  1 Nov 2019 14:09:33 +0000
Message-ID: <20191101140942.51554-15-steven.price@arm.com> (raw)
In-Reply-To: <20191101140942.51554-1-steven.price@arm.com>

The pte_hole() callback is called at multiple levels of the page tables.
Code dumping the kernel page tables needs to know what at what depth
the missing entry is. Add this is an extra parameter to pte_hole().
When the depth isn't know (e.g. processing a vma) then -1 is passed.

The depth that is reported is the actual level where the entry is
missing (ignoring any folding that is in place), i.e. any levels where
PTRS_PER_P?D is set to 1 are ignored.

Note that depth starts at 0 for a PGD so that PUD/PMD/PTE retain their
natural numbers as levels 2/3/4.

Tested-by: Zong Li <zong.li@sifive.com>
Signed-off-by: Steven Price <steven.price@arm.com>
---
 fs/proc/task_mmu.c       |  4 ++--
 include/linux/pagewalk.h |  7 +++++--
 mm/hmm.c                 |  8 ++++----
 mm/migrate.c             |  5 +++--
 mm/mincore.c             |  1 +
 mm/pagewalk.c            | 31 +++++++++++++++++++++++++------
 6 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9442631fd4af..3ba9ae83bff5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -505,7 +505,7 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
 
 #ifdef CONFIG_SHMEM
 static int smaps_pte_hole(unsigned long addr, unsigned long end,
-		struct mm_walk *walk)
+			  __always_unused int depth, struct mm_walk *walk)
 {
 	struct mem_size_stats *mss = walk->private;
 
@@ -1282,7 +1282,7 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
 }
 
 static int pagemap_pte_hole(unsigned long start, unsigned long end,
-				struct mm_walk *walk)
+			    __always_unused int depth, struct mm_walk *walk)
 {
 	struct pagemapread *pm = walk->private;
 	unsigned long addr = start;
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index fe61448c5900..94c9ad171f1c 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -17,7 +17,10 @@ struct mm_walk;
  *			split_huge_page() instead of handling it explicitly.
  * @pte_entry:		if set, called for each non-empty PTE (lowest-level)
  *			entry
- * @pte_hole:		if set, called for each hole at all levels
+ * @pte_hole:		if set, called for each hole at all levels,
+ *			depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD
+ *			4:PTE. Any folded depths (where PTRS_PER_P?D is equal
+ *			to 1) are skipped.
  * @hugetlb_entry:	if set, called for each hugetlb entry
  * @test_walk:		caller specific callback function to determine whether
  *			we walk over the current vma or not. Returning 0 means
@@ -45,7 +48,7 @@ struct mm_walk_ops {
 	int (*pte_entry)(pte_t *pte, unsigned long addr,
 			 unsigned long next, struct mm_walk *walk);
 	int (*pte_hole)(unsigned long addr, unsigned long next,
-			struct mm_walk *walk);
+			int depth, struct mm_walk *walk);
 	int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
 			     unsigned long addr, unsigned long next,
 			     struct mm_walk *walk);
diff --git a/mm/hmm.c b/mm/hmm.c
index 902f5fa6bf93..df3d531c8f2d 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -376,7 +376,7 @@ static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
 }
 
 static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
-			     struct mm_walk *walk)
+			     __always_unused int depth, struct mm_walk *walk)
 {
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
@@ -564,7 +564,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 again:
 	pmd = READ_ONCE(*pmdp);
 	if (pmd_none(pmd))
-		return hmm_vma_walk_hole(start, end, walk);
+		return hmm_vma_walk_hole(start, end, -1, walk);
 
 	if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
 		bool fault, write_fault;
@@ -666,7 +666,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
 again:
 	pud = READ_ONCE(*pudp);
 	if (pud_none(pud))
-		return hmm_vma_walk_hole(start, end, walk);
+		return hmm_vma_walk_hole(start, end, -1, walk);
 
 	if (pud_huge(pud) && pud_devmap(pud)) {
 		unsigned long i, npages, pfn;
@@ -674,7 +674,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
 		bool fault, write_fault;
 
 		if (!pud_present(pud))
-			return hmm_vma_walk_hole(start, end, walk);
+			return hmm_vma_walk_hole(start, end, -1, walk);
 
 		i = (addr - range->start) >> PAGE_SHIFT;
 		npages = (end - addr) >> PAGE_SHIFT;
diff --git a/mm/migrate.c b/mm/migrate.c
index 4fe45d1428c8..435258df9a36 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2123,6 +2123,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 #ifdef CONFIG_DEVICE_PRIVATE
 static int migrate_vma_collect_hole(unsigned long start,
 				    unsigned long end,
+				    __always_unused int depth,
 				    struct mm_walk *walk)
 {
 	struct migrate_vma *migrate = walk->private;
@@ -2167,7 +2168,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 
 again:
 	if (pmd_none(*pmdp))
-		return migrate_vma_collect_hole(start, end, walk);
+		return migrate_vma_collect_hole(start, end, -1, walk);
 
 	if (pmd_trans_huge(*pmdp)) {
 		struct page *page;
@@ -2200,7 +2201,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 				return migrate_vma_collect_skip(start, end,
 								walk);
 			if (pmd_none(*pmdp))
-				return migrate_vma_collect_hole(start, end,
+				return migrate_vma_collect_hole(start, end, -1,
 								walk);
 		}
 	}
diff --git a/mm/mincore.c b/mm/mincore.c
index 49b6fa2f6aa1..0e6dd9948f1a 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -112,6 +112,7 @@ static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
 }
 
 static int mincore_unmapped_range(unsigned long addr, unsigned long end,
+				   __always_unused int depth,
 				   struct mm_walk *walk)
 {
 	walk->private += __mincore_unmapped_range(addr, end,
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 4616281f5b69..c7529dc4f82b 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -4,6 +4,22 @@
 #include <linux/sched.h>
 #include <linux/hugetlb.h>
 
+/*
+ * We want to know the real level where a entry is located ignoring any
+ * folding of levels which may be happening. For example if p4d is folded then
+ * a missing entry found at level 1 (p4d) is actually at level 0 (pgd).
+ */
+static int real_depth(int depth)
+{
+	if (depth == 3 && PTRS_PER_PMD == 1)
+		depth = 2;
+	if (depth == 2 && PTRS_PER_PUD == 1)
+		depth = 1;
+	if (depth == 1 && PTRS_PER_P4D == 1)
+		depth = 0;
+	return depth;
+}
+
 static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 			  struct mm_walk *walk)
 {
@@ -33,6 +49,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
 	unsigned long next;
 	const struct mm_walk_ops *ops = walk->ops;
 	int err = 0;
+	int depth = real_depth(3);
 
 	if (ops->test_pmd) {
 		err = ops->test_pmd(addr, end, pmd_offset(pud, 0UL), walk);
@@ -48,7 +65,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
 		next = pmd_addr_end(addr, end);
 		if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) {
 			if (ops->pte_hole)
-				err = ops->pte_hole(addr, next, walk);
+				err = ops->pte_hole(addr, next, depth, walk);
 			if (err)
 				break;
 			continue;
@@ -92,6 +109,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
 	unsigned long next;
 	const struct mm_walk_ops *ops = walk->ops;
 	int err = 0;
+	int depth = real_depth(2);
 
 	if (ops->test_pud) {
 		err = ops->test_pud(addr, end, pud_offset(p4d, 0UL), walk);
@@ -107,7 +125,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
 		next = pud_addr_end(addr, end);
 		if (pud_none(*pud) || (!walk->vma && !walk->no_vma)) {
 			if (ops->pte_hole)
-				err = ops->pte_hole(addr, next, walk);
+				err = ops->pte_hole(addr, next, depth, walk);
 			if (err)
 				break;
 			continue;
@@ -143,6 +161,7 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
 	unsigned long next;
 	const struct mm_walk_ops *ops = walk->ops;
 	int err = 0;
+	int depth = real_depth(1);
 
 	if (ops->test_p4d) {
 		err = ops->test_p4d(addr, end, p4d_offset(pgd, 0UL), walk);
@@ -157,7 +176,7 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
 		next = p4d_addr_end(addr, end);
 		if (p4d_none_or_clear_bad(p4d)) {
 			if (ops->pte_hole)
-				err = ops->pte_hole(addr, next, walk);
+				err = ops->pte_hole(addr, next, depth, walk);
 			if (err)
 				break;
 			continue;
@@ -189,7 +208,7 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd)) {
 			if (ops->pte_hole)
-				err = ops->pte_hole(addr, next, walk);
+				err = ops->pte_hole(addr, next, 0, walk);
 			if (err)
 				break;
 			continue;
@@ -236,7 +255,7 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
 		if (pte)
 			err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
 		else if (ops->pte_hole)
-			err = ops->pte_hole(addr, next, walk);
+			err = ops->pte_hole(addr, next, -1, walk);
 
 		if (err)
 			break;
@@ -280,7 +299,7 @@ static int walk_page_test(unsigned long start, unsigned long end,
 	if (vma->vm_flags & VM_PFNMAP) {
 		int err = 1;
 		if (ops->pte_hole)
-			err = ops->pte_hole(start, end, walk);
+			err = ops->pte_hole(start, end, -1, walk);
 		return err ? err : 1;
 	}
 	return 0;
-- 
2.20.1



  parent reply index

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-01 14:09 [PATCH v15 00/23] Generic page walk and ptdump Steven Price
2019-11-01 14:09 ` [PATCH v15 01/23] mm: Add generic p?d_leaf() macros Steven Price
2019-11-01 14:09 ` [PATCH v15 02/23] arc: mm: Add p?d_leaf() definitions Steven Price
2019-11-01 14:09 ` [PATCH v15 03/23] arm: " Steven Price
2019-11-01 14:09 ` [PATCH v15 04/23] arm64: " Steven Price
2019-11-01 14:09 ` [PATCH v15 05/23] mips: " Steven Price
2019-11-01 14:09 ` [PATCH v15 06/23] powerpc: " Steven Price
2019-11-01 14:09 ` [PATCH v15 07/23] riscv: " Steven Price
2019-11-01 14:09 ` [PATCH v15 08/23] s390: " Steven Price
2019-11-01 14:09 ` [PATCH v15 09/23] sparc: " Steven Price
2019-11-01 14:09 ` [PATCH v15 10/23] x86: " Steven Price
2019-11-01 14:09 ` [PATCH v15 11/23] mm: pagewalk: Add p4d_entry() and pgd_entry() Steven Price
2019-11-01 14:09 ` [PATCH v15 12/23] mm: pagewalk: Allow walking without vma Steven Price
2019-11-01 14:09 ` [PATCH v15 13/23] mm: pagewalk: Add test_p?d callbacks Steven Price
2019-11-01 14:09 ` Steven Price [this message]
2019-11-01 14:09 ` [PATCH v15 15/23] x86: mm: Point to struct seq_file from struct pg_state Steven Price
2019-11-01 14:09 ` [PATCH v15 16/23] x86: mm+efi: Convert ptdump_walk_pgd_level() to take a mm_struct Steven Price
2019-11-01 14:09 ` [PATCH v15 17/23] x86: mm: Convert ptdump_walk_pgd_level_debugfs() to take an mm_struct Steven Price
2019-11-01 14:09 ` [PATCH v15 18/23] x86: mm: Convert ptdump_walk_pgd_level_core() " Steven Price
2019-11-01 14:09 ` [PATCH v15 19/23] mm: Add generic ptdump Steven Price
2019-11-01 14:09 ` [PATCH v15 20/23] x86: mm: Convert dump_pagetables to use walk_page_range Steven Price
2019-11-01 14:09 ` [PATCH v15 21/23] arm64: mm: Convert mm/dump.c to use walk_page_range() Steven Price
2019-11-01 14:09 ` [PATCH v15 22/23] arm64: mm: Display non-present entries in ptdump Steven Price
2019-11-01 14:09 ` [PATCH v15 23/23] mm: ptdump: Reduce level numbers by 1 in note_page() Steven Price
2019-11-04 19:35 ` [PATCH v15 00/23] Generic page walk and ptdump Qian Cai
2019-11-06 13:31   ` Qian Cai
2019-11-06 15:05     ` Steven Price
2019-12-03 11:02       ` David Hildenbrand
2019-12-04 14:54         ` Qian Cai
2019-12-04 14:56           ` David Hildenbrand
2019-12-04 16:32             ` Steven Price
2019-12-04 17:51               ` Thomas Hellstrom
2019-12-05 13:15               ` Qian Cai
2019-12-05 14:32                 ` Thomas Hellstrom
2019-12-05 14:38                   ` Qian Cai

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191101140942.51554-15-steven.price@arm.com \
    --to=steven.price@arm.com \
    --cc=Mark.Rutland@arm.com \
    --cc=akpm@linux-foundation.org \
    --cc=ard.biesheuvel@linaro.org \
    --cc=arnd@arndb.de \
    --cc=bp@alien8.de \
    --cc=catalin.marinas@arm.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=james.morse@arm.com \
    --cc=jglisse@redhat.com \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    --cc=zong.li@sifive.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-mm Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-mm/0 linux-mm/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-mm linux-mm/ https://lore.kernel.org/linux-mm \
		linux-mm@kvack.org
	public-inbox-index linux-mm

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kvack.linux-mm


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git