All of lore.kernel.org
 help / color / mirror / Atom feed
From: Steven Price <steven.price@arm.com>
To: Dave Hansen <dave.hansen@intel.com>,
	linux-mm@kvack.org,
	Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Mark Rutland" <Mark.Rutland@arm.com>,
	"Arnd Bergmann" <arnd@arndb.de>,
	"Ard Biesheuvel" <ard.biesheuvel@linaro.org>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Catalin Marinas" <catalin.marinas@arm.com>,
	x86@kernel.org, "Will Deacon" <will.deacon@arm.com>,
	linux-kernel@vger.kernel.org,
	"Jérôme Glisse" <jglisse@redhat.com>,
	"Ingo Molnar" <mingo@redhat.com>,
	"Borislav Petkov" <bp@alien8.de>,
	"Andy Lutomirski" <luto@kernel.org>,
	"H. Peter Anvin" <hpa@zytor.com>,
	"James Morse" <james.morse@arm.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	linux-arm-kernel@lists.infradead.org, "Liang,
	Kan" <kan.liang@linux.intel.com>,
	"Steven Price" <steven.price@arm.com>
Subject: [RFC PATCH 3/3] x86: mm: Switch to using generic pt_dump
Date: Wed, 17 Apr 2019 15:34:23 +0100	[thread overview]
Message-ID: <20190417143423.26665-3-steven.price@arm.com> (raw)
In-Reply-To: <20190417143423.26665-1-steven.price@arm.com>

Instead of providing our own callbacks for walking the page tables,
switch to using the generic version instead.

Signed-off-by: Steven Price <steven.price@arm.com>
---
 arch/x86/Kconfig              |   1 +
 arch/x86/Kconfig.debug        |  20 +--
 arch/x86/mm/Makefile          |   4 +-
 arch/x86/mm/dump_pagetables.c | 297 +++++++---------------------------
 4 files changed, 62 insertions(+), 260 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c1f9b3cf437c..122c24055f02 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -106,6 +106,7 @@ config X86
 	select GENERIC_IRQ_RESERVATION_MODE
 	select GENERIC_IRQ_SHOW
 	select GENERIC_PENDING_IRQ		if SMP
+	select GENERIC_PTDUMP
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 15d0fbe27872..dc1dfe213657 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -62,26 +62,10 @@ config EARLY_PRINTK_USB_XDBC
 config MCSAFE_TEST
 	def_bool n
 
-config X86_PTDUMP_CORE
-	def_bool n
-
-config X86_PTDUMP
-	tristate "Export kernel pagetable layout to userspace via debugfs"
-	depends on DEBUG_KERNEL
-	select DEBUG_FS
-	select X86_PTDUMP_CORE
-	---help---
-	  Say Y here if you want to show the kernel pagetable layout in a
-	  debugfs file. This information is only useful for kernel developers
-	  who are working in architecture specific areas of the kernel.
-	  It is probably not a good idea to enable this feature in a production
-	  kernel.
-	  If in doubt, say "N"
-
 config EFI_PGT_DUMP
 	bool "Dump the EFI pagetable"
 	depends on EFI
-	select X86_PTDUMP_CORE
+	select PTDUMP_CORE
 	---help---
 	  Enable this if you want to dump the EFI page table before
 	  enabling virtual mode. This can be used to debug miscellaneous
@@ -90,7 +74,7 @@ config EFI_PGT_DUMP
 
 config DEBUG_WX
 	bool "Warn on W+X mappings at boot"
-	select X86_PTDUMP_CORE
+	select PTDUMP_CORE
 	---help---
 	  Generate a warning if any W+X mappings are found at boot.
 
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 4b101dd6e52f..5233190fc6bf 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -28,8 +28,8 @@ obj-$(CONFIG_X86_PAT)		+= pat_rbtree.o
 obj-$(CONFIG_X86_32)		+= pgtable_32.o iomap_32.o
 
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
-obj-$(CONFIG_X86_PTDUMP_CORE)	+= dump_pagetables.o
-obj-$(CONFIG_X86_PTDUMP)	+= debug_pagetables.o
+obj-$(CONFIG_PTDUMP_CORE)	+= dump_pagetables.o
+obj-$(CONFIG_PTDUMP_DEBUGFS)	+= debug_pagetables.o
 
 obj-$(CONFIG_HIGHMEM)		+= highmem_32.o
 
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index f6b814aaddf7..955824c7cddb 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -20,6 +20,7 @@
 #include <linux/seq_file.h>
 #include <linux/highmem.h>
 #include <linux/pci.h>
+#include <linux/ptdump.h>
 
 #include <asm/e820/types.h>
 #include <asm/pgtable.h>
@@ -30,15 +31,12 @@
  * when a "break" in the continuity is found.
  */
 struct pg_state {
+	struct ptdump_state ptdump;
 	int level;
-	pgprot_t current_prot;
+	pgprotval_t current_prot;
 	pgprotval_t effective_prot;
-	pgprotval_t effective_prot_pgd;
-	pgprotval_t effective_prot_p4d;
-	pgprotval_t effective_prot_pud;
-	pgprotval_t effective_prot_pmd;
+	pgprotval_t prot_levels[5];
 	unsigned long start_address;
-	unsigned long current_address;
 	const struct addr_marker *marker;
 	unsigned long lines;
 	bool to_dmesg;
@@ -179,9 +177,8 @@ static struct addr_marker address_markers[] = {
 /*
  * Print a readable form of a pgprot_t to the seq_file
  */
-static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
+static void printk_prot(struct seq_file *m, pgprotval_t pr, int level, bool dmsg)
 {
-	pgprotval_t pr = pgprot_val(prot);
 	static const char * const level_name[] =
 		{ "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
 
@@ -228,24 +225,11 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
 	pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]);
 }
 
-/*
- * On 64 bits, sign-extend the 48 bit address to 64 bit
- */
-static unsigned long normalize_addr(unsigned long u)
-{
-	int shift;
-	if (!IS_ENABLED(CONFIG_X86_64))
-		return u;
-
-	shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
-	return (signed long)(u << shift) >> shift;
-}
-
-static void note_wx(struct pg_state *st)
+static void note_wx(struct pg_state *st, unsigned long addr)
 {
 	unsigned long npages;
 
-	npages = (st->current_address - st->start_address) / PAGE_SIZE;
+	npages = (addr - st->start_address) / PAGE_SIZE;
 
 #ifdef CONFIG_PCI_BIOS
 	/*
@@ -253,7 +237,7 @@ static void note_wx(struct pg_state *st)
 	 * Inform about it, but avoid the warning.
 	 */
 	if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN &&
-	    st->current_address <= PAGE_OFFSET + BIOS_END) {
+	    addr <= PAGE_OFFSET + BIOS_END) {
 		pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages);
 		return;
 	}
@@ -264,25 +248,44 @@ static void note_wx(struct pg_state *st)
 		  (void *)st->start_address);
 }
 
+static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
+{
+	return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
+	       ((prot1 | prot2) & _PAGE_NX);
+}
+
 /*
  * This function gets called on a break in a continuous series
  * of PTE entries; the next one is different so we need to
  * print what we collected so far.
  */
-static void note_page(struct pg_state *st, pgprot_t new_prot,
-		      pgprotval_t new_eff, int level)
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
+		      unsigned long val)
 {
-	pgprotval_t prot, cur, eff;
+	struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+	pgprotval_t new_prot, new_eff;
+	pgprotval_t cur, eff;
 	static const char units[] = "BKMGTPE";
 	struct seq_file *m = st->seq;
 
+	new_prot = val & PTE_FLAGS_MASK;
+
+	if (level > 1) {
+		new_eff = effective_prot(st->prot_levels[level - 2],
+					 new_prot);
+	} else {
+		new_eff = new_prot;
+	}
+
+	if (level > 0)
+		st->prot_levels[level-1] = new_eff;
+
 	/*
 	 * If we have a "break" in the series, we need to flush the state that
 	 * we have now. "break" is either changing perms, levels or
 	 * address space marker.
 	 */
-	prot = pgprot_val(new_prot);
-	cur = pgprot_val(st->current_prot);
+	cur = st->current_prot;
 	eff = st->effective_prot;
 
 	if (!st->level) {
@@ -294,14 +297,14 @@ static void note_page(struct pg_state *st, pgprot_t new_prot,
 		st->lines = 0;
 		pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
 				   st->marker->name);
-	} else if (prot != cur || new_eff != eff || level != st->level ||
-		   st->current_address >= st->marker[1].start_address) {
+	} else if (new_prot != cur || new_eff != eff || level != st->level ||
+		   addr >= st->marker[1].start_address) {
 		const char *unit = units;
 		unsigned long delta;
 		int width = sizeof(unsigned long) * 2;
 
 		if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX))
-			note_wx(st);
+			note_wx(st, addr);
 
 		/*
 		 * Now print the actual finished series
@@ -311,9 +314,9 @@ static void note_page(struct pg_state *st, pgprot_t new_prot,
 			pt_dump_seq_printf(m, st->to_dmesg,
 					   "0x%0*lx-0x%0*lx   ",
 					   width, st->start_address,
-					   width, st->current_address);
+					   width, addr);
 
-			delta = st->current_address - st->start_address;
+			delta = addr - st->start_address;
 			while (!(delta & 1023) && unit[1]) {
 				delta >>= 10;
 				unit++;
@@ -331,7 +334,7 @@ static void note_page(struct pg_state *st, pgprot_t new_prot,
 		 * such as the start of vmalloc space etc.
 		 * This helps in the interpretation.
 		 */
-		if (st->current_address >= st->marker[1].start_address) {
+		if (addr >= st->marker[1].start_address) {
 			if (st->marker->max_lines &&
 			    st->lines > st->marker->max_lines) {
 				unsigned long nskip =
@@ -347,228 +350,42 @@ static void note_page(struct pg_state *st, pgprot_t new_prot,
 					   st->marker->name);
 		}
 
-		st->start_address = st->current_address;
+		st->start_address = addr;
 		st->current_prot = new_prot;
 		st->effective_prot = new_eff;
 		st->level = level;
 	}
 }
 
-static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
-{
-	return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
-	       ((prot1 | prot2) & _PAGE_NX);
-}
-
-static int ptdump_pte_entry(pte_t *pte, unsigned long addr,
-			    unsigned long next, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-	pgprotval_t eff, prot;
-
-	st->current_address = normalize_addr(addr);
-
-	prot = pte_flags(*pte);
-	eff = effective_prot(st->effective_prot_pmd, prot);
-	note_page(st, __pgprot(prot), eff, 5);
-
-	return 0;
-}
-
-#ifdef CONFIG_KASAN
-
-/*
- * This is an optimization for KASAN=y case. Since all kasan page tables
- * eventually point to the kasan_early_shadow_page we could call note_page()
- * right away without walking through lower level page tables. This saves
- * us dozens of seconds (minutes for 5-level config) while checking for
- * W+X mapping or reading kernel_page_tables debugfs file.
- */
-static inline bool kasan_page_table(struct pg_state *st, void *pt)
-{
-	if (__pa(pt) == __pa(kasan_early_shadow_pmd) ||
-	    (pgtable_l5_enabled() &&
-			__pa(pt) == __pa(kasan_early_shadow_p4d)) ||
-	    __pa(pt) == __pa(kasan_early_shadow_pud)) {
-		pgprotval_t prot = pte_flags(kasan_early_shadow_pte[0]);
-		note_page(st, __pgprot(prot), 0, 5);
-		return true;
-	}
-	return false;
-}
-#else
-static inline bool kasan_page_table(struct pg_state *st, void *pt)
-{
-	return false;
-}
-#endif
-
-static int ptdump_test_pmd(unsigned long addr, unsigned long next,
-			   pmd_t *pmd, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-
-	st->current_address = normalize_addr(addr);
-
-	if (kasan_page_table(st, pmd))
-		return 1;
-	return 0;
-}
-
-static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
-			    unsigned long next, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-	pgprotval_t eff, prot;
-
-	prot = pmd_flags(*pmd);
-	eff = effective_prot(st->effective_prot_pud, prot);
-
-	st->current_address = normalize_addr(addr);
-
-	if (pmd_large(*pmd))
-		note_page(st, __pgprot(prot), eff, 4);
-
-	st->effective_prot_pmd = eff;
-
-	return 0;
-}
-
-static int ptdump_test_pud(unsigned long addr, unsigned long next,
-			   pud_t *pud, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-
-	st->current_address = normalize_addr(addr);
-
-	if (kasan_page_table(st, pud))
-		return 1;
-	return 0;
-}
-
-static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
-			    unsigned long next, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-	pgprotval_t eff, prot;
-
-	prot = pud_flags(*pud);
-	eff = effective_prot(st->effective_prot_p4d, prot);
-
-	st->current_address = normalize_addr(addr);
-
-	if (pud_large(*pud))
-		note_page(st, __pgprot(prot), eff, 3);
-
-	st->effective_prot_pud = eff;
-
-	return 0;
-}
-
-static int ptdump_test_p4d(unsigned long addr, unsigned long next,
-			   p4d_t *p4d, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-
-	st->current_address = normalize_addr(addr);
-
-	if (kasan_page_table(st, p4d))
-		return 1;
-	return 0;
-}
-
-static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
-			    unsigned long next, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-	pgprotval_t eff, prot;
-
-	prot = p4d_flags(*p4d);
-	eff = effective_prot(st->effective_prot_pgd, prot);
-
-	st->current_address = normalize_addr(addr);
-
-	if (p4d_large(*p4d))
-		note_page(st, __pgprot(prot), eff, 2);
-
-	st->effective_prot_p4d = eff;
-
-	return 0;
-}
-
-static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
-			    unsigned long next, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-	pgprotval_t eff, prot;
+static const struct ptdump_range ptdump_ranges[] = {
+#ifdef CONFIG_X86_64
 
-	prot = pgd_flags(*pgd);
+#define normalize_addr_shift (64 - (__VIRTUAL_MASK_SHIFT + 1))
+#define normalize_addr(u) ((signed long)(u << normalize_addr_shift) >> normalize_addr_shift)
 
-#ifdef CONFIG_X86_PAE
-	eff = _PAGE_USER | _PAGE_RW;
+	{0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2},
+	{normalize_addr(PTRS_PER_PGD * PGD_LEVEL_MULT / 2), ~0UL},
 #else
-	eff = prot;
+	{0, ~0UL},
 #endif
-
-	st->current_address = normalize_addr(addr);
-
-	if (pgd_large(*pgd))
-		note_page(st, __pgprot(prot), eff, 1);
-
-	st->effective_prot_pgd = eff;
-
-	return 0;
-}
-
-static int ptdump_hole(unsigned long addr, unsigned long next,
-		       struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-
-	st->current_address = normalize_addr(addr);
-
-	note_page(st, __pgprot(0), 0, -1);
-
-	return 0;
-}
+	{0, 0}
+};
 
 static void ptdump_walk_pgd_level_core(struct seq_file *m, struct mm_struct *mm,
 				       bool checkwx, bool dmesg)
 {
-	struct pg_state st = {};
-	struct mm_walk walk = {
-		.mm		= mm,
-		.pgd_entry	= ptdump_pgd_entry,
-		.p4d_entry	= ptdump_p4d_entry,
-		.pud_entry	= ptdump_pud_entry,
-		.pmd_entry	= ptdump_pmd_entry,
-		.pte_entry	= ptdump_pte_entry,
-		.test_p4d	= ptdump_test_p4d,
-		.test_pud	= ptdump_test_pud,
-		.test_pmd	= ptdump_test_pmd,
-		.pte_hole	= ptdump_hole,
-		.private	= &st
+	struct pg_state st = {
+		.ptdump = {
+			.note_page	= note_page,
+			.range		= ptdump_ranges
+		},
+		.to_dmesg	= dmesg,
+		.check_wx	= checkwx,
+		.seq		= m
 	};
 
-	st.to_dmesg = dmesg;
-	st.check_wx = checkwx;
-	st.seq = m;
-	if (checkwx)
-		st.wx_pages = 0;
-
-	down_read(&mm->mmap_sem);
-#ifdef CONFIG_X86_64
-	walk_page_range(0, PTRS_PER_PGD*PGD_LEVEL_MULT/2, &walk);
-	walk_page_range(normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT/2), ~0,
-			&walk);
-#else
-	walk_page_range(0, ~0, &walk);
-#endif
-	up_read(&mm->mmap_sem);
+	ptdump_walk_pgd(&st.ptdump, mm);
 
-	/* Flush out the last page */
-	st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
-	note_page(&st, __pgprot(0), 0, 0);
 	if (!checkwx)
 		return;
 	if (st.wx_pages)
-- 
2.20.1


WARNING: multiple messages have this Message-ID (diff)
From: Steven Price <steven.price@arm.com>
To: Dave Hansen <dave.hansen@intel.com>,
	linux-mm@kvack.org,
	Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Mark Rutland" <Mark.Rutland@arm.com>,
	"Arnd Bergmann" <arnd@arndb.de>,
	"Ard Biesheuvel" <ard.biesheuvel@linaro.org>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Catalin Marinas" <catalin.marinas@arm.com>,
	x86@kernel.org, "Will Deacon" <will.deacon@arm.com>,
	linux-kernel@vger.kernel.org,
	"Steven Price" <steven.price@arm.com>,
	"Jérôme Glisse" <jglisse@redhat.com>,
	"Ingo Molnar" <mingo@redhat.com>,
	"Borislav Petkov" <bp@alien8.de>,
	"Andy Lutomirski" <luto@kernel.org>,
	"H. Peter Anvin" <hpa@zytor.com>,
	"James Morse" <james.morse@arm.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	linux-arm-kernel@lists.infradead.org, "Liang,
	Kan" <kan.liang@linux.intel.com>
Subject: [RFC PATCH 3/3] x86: mm: Switch to using generic pt_dump
Date: Wed, 17 Apr 2019 15:34:23 +0100	[thread overview]
Message-ID: <20190417143423.26665-3-steven.price@arm.com> (raw)
In-Reply-To: <20190417143423.26665-1-steven.price@arm.com>

Instead of providing our own callbacks for walking the page tables,
switch to using the generic version instead.

Signed-off-by: Steven Price <steven.price@arm.com>
---
 arch/x86/Kconfig              |   1 +
 arch/x86/Kconfig.debug        |  20 +--
 arch/x86/mm/Makefile          |   4 +-
 arch/x86/mm/dump_pagetables.c | 297 +++++++---------------------------
 4 files changed, 62 insertions(+), 260 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c1f9b3cf437c..122c24055f02 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -106,6 +106,7 @@ config X86
 	select GENERIC_IRQ_RESERVATION_MODE
 	select GENERIC_IRQ_SHOW
 	select GENERIC_PENDING_IRQ		if SMP
+	select GENERIC_PTDUMP
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 15d0fbe27872..dc1dfe213657 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -62,26 +62,10 @@ config EARLY_PRINTK_USB_XDBC
 config MCSAFE_TEST
 	def_bool n
 
-config X86_PTDUMP_CORE
-	def_bool n
-
-config X86_PTDUMP
-	tristate "Export kernel pagetable layout to userspace via debugfs"
-	depends on DEBUG_KERNEL
-	select DEBUG_FS
-	select X86_PTDUMP_CORE
-	---help---
-	  Say Y here if you want to show the kernel pagetable layout in a
-	  debugfs file. This information is only useful for kernel developers
-	  who are working in architecture specific areas of the kernel.
-	  It is probably not a good idea to enable this feature in a production
-	  kernel.
-	  If in doubt, say "N"
-
 config EFI_PGT_DUMP
 	bool "Dump the EFI pagetable"
 	depends on EFI
-	select X86_PTDUMP_CORE
+	select PTDUMP_CORE
 	---help---
 	  Enable this if you want to dump the EFI page table before
 	  enabling virtual mode. This can be used to debug miscellaneous
@@ -90,7 +74,7 @@ config EFI_PGT_DUMP
 
 config DEBUG_WX
 	bool "Warn on W+X mappings at boot"
-	select X86_PTDUMP_CORE
+	select PTDUMP_CORE
 	---help---
 	  Generate a warning if any W+X mappings are found at boot.
 
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 4b101dd6e52f..5233190fc6bf 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -28,8 +28,8 @@ obj-$(CONFIG_X86_PAT)		+= pat_rbtree.o
 obj-$(CONFIG_X86_32)		+= pgtable_32.o iomap_32.o
 
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
-obj-$(CONFIG_X86_PTDUMP_CORE)	+= dump_pagetables.o
-obj-$(CONFIG_X86_PTDUMP)	+= debug_pagetables.o
+obj-$(CONFIG_PTDUMP_CORE)	+= dump_pagetables.o
+obj-$(CONFIG_PTDUMP_DEBUGFS)	+= debug_pagetables.o
 
 obj-$(CONFIG_HIGHMEM)		+= highmem_32.o
 
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index f6b814aaddf7..955824c7cddb 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -20,6 +20,7 @@
 #include <linux/seq_file.h>
 #include <linux/highmem.h>
 #include <linux/pci.h>
+#include <linux/ptdump.h>
 
 #include <asm/e820/types.h>
 #include <asm/pgtable.h>
@@ -30,15 +31,12 @@
  * when a "break" in the continuity is found.
  */
 struct pg_state {
+	struct ptdump_state ptdump;
 	int level;
-	pgprot_t current_prot;
+	pgprotval_t current_prot;
 	pgprotval_t effective_prot;
-	pgprotval_t effective_prot_pgd;
-	pgprotval_t effective_prot_p4d;
-	pgprotval_t effective_prot_pud;
-	pgprotval_t effective_prot_pmd;
+	pgprotval_t prot_levels[5];
 	unsigned long start_address;
-	unsigned long current_address;
 	const struct addr_marker *marker;
 	unsigned long lines;
 	bool to_dmesg;
@@ -179,9 +177,8 @@ static struct addr_marker address_markers[] = {
 /*
  * Print a readable form of a pgprot_t to the seq_file
  */
-static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
+static void printk_prot(struct seq_file *m, pgprotval_t pr, int level, bool dmsg)
 {
-	pgprotval_t pr = pgprot_val(prot);
 	static const char * const level_name[] =
 		{ "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
 
@@ -228,24 +225,11 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
 	pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]);
 }
 
-/*
- * On 64 bits, sign-extend the 48 bit address to 64 bit
- */
-static unsigned long normalize_addr(unsigned long u)
-{
-	int shift;
-	if (!IS_ENABLED(CONFIG_X86_64))
-		return u;
-
-	shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
-	return (signed long)(u << shift) >> shift;
-}
-
-static void note_wx(struct pg_state *st)
+static void note_wx(struct pg_state *st, unsigned long addr)
 {
 	unsigned long npages;
 
-	npages = (st->current_address - st->start_address) / PAGE_SIZE;
+	npages = (addr - st->start_address) / PAGE_SIZE;
 
 #ifdef CONFIG_PCI_BIOS
 	/*
@@ -253,7 +237,7 @@ static void note_wx(struct pg_state *st)
 	 * Inform about it, but avoid the warning.
 	 */
 	if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN &&
-	    st->current_address <= PAGE_OFFSET + BIOS_END) {
+	    addr <= PAGE_OFFSET + BIOS_END) {
 		pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages);
 		return;
 	}
@@ -264,25 +248,44 @@ static void note_wx(struct pg_state *st)
 		  (void *)st->start_address);
 }
 
+static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
+{
+	return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
+	       ((prot1 | prot2) & _PAGE_NX);
+}
+
 /*
  * This function gets called on a break in a continuous series
  * of PTE entries; the next one is different so we need to
  * print what we collected so far.
  */
-static void note_page(struct pg_state *st, pgprot_t new_prot,
-		      pgprotval_t new_eff, int level)
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
+		      unsigned long val)
 {
-	pgprotval_t prot, cur, eff;
+	struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+	pgprotval_t new_prot, new_eff;
+	pgprotval_t cur, eff;
 	static const char units[] = "BKMGTPE";
 	struct seq_file *m = st->seq;
 
+	new_prot = val & PTE_FLAGS_MASK;
+
+	if (level > 1) {
+		new_eff = effective_prot(st->prot_levels[level - 2],
+					 new_prot);
+	} else {
+		new_eff = new_prot;
+	}
+
+	if (level > 0)
+		st->prot_levels[level-1] = new_eff;
+
 	/*
 	 * If we have a "break" in the series, we need to flush the state that
 	 * we have now. "break" is either changing perms, levels or
 	 * address space marker.
 	 */
-	prot = pgprot_val(new_prot);
-	cur = pgprot_val(st->current_prot);
+	cur = st->current_prot;
 	eff = st->effective_prot;
 
 	if (!st->level) {
@@ -294,14 +297,14 @@ static void note_page(struct pg_state *st, pgprot_t new_prot,
 		st->lines = 0;
 		pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
 				   st->marker->name);
-	} else if (prot != cur || new_eff != eff || level != st->level ||
-		   st->current_address >= st->marker[1].start_address) {
+	} else if (new_prot != cur || new_eff != eff || level != st->level ||
+		   addr >= st->marker[1].start_address) {
 		const char *unit = units;
 		unsigned long delta;
 		int width = sizeof(unsigned long) * 2;
 
 		if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX))
-			note_wx(st);
+			note_wx(st, addr);
 
 		/*
 		 * Now print the actual finished series
@@ -311,9 +314,9 @@ static void note_page(struct pg_state *st, pgprot_t new_prot,
 			pt_dump_seq_printf(m, st->to_dmesg,
 					   "0x%0*lx-0x%0*lx   ",
 					   width, st->start_address,
-					   width, st->current_address);
+					   width, addr);
 
-			delta = st->current_address - st->start_address;
+			delta = addr - st->start_address;
 			while (!(delta & 1023) && unit[1]) {
 				delta >>= 10;
 				unit++;
@@ -331,7 +334,7 @@ static void note_page(struct pg_state *st, pgprot_t new_prot,
 		 * such as the start of vmalloc space etc.
 		 * This helps in the interpretation.
 		 */
-		if (st->current_address >= st->marker[1].start_address) {
+		if (addr >= st->marker[1].start_address) {
 			if (st->marker->max_lines &&
 			    st->lines > st->marker->max_lines) {
 				unsigned long nskip =
@@ -347,228 +350,42 @@ static void note_page(struct pg_state *st, pgprot_t new_prot,
 					   st->marker->name);
 		}
 
-		st->start_address = st->current_address;
+		st->start_address = addr;
 		st->current_prot = new_prot;
 		st->effective_prot = new_eff;
 		st->level = level;
 	}
 }
 
-static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
-{
-	return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
-	       ((prot1 | prot2) & _PAGE_NX);
-}
-
-static int ptdump_pte_entry(pte_t *pte, unsigned long addr,
-			    unsigned long next, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-	pgprotval_t eff, prot;
-
-	st->current_address = normalize_addr(addr);
-
-	prot = pte_flags(*pte);
-	eff = effective_prot(st->effective_prot_pmd, prot);
-	note_page(st, __pgprot(prot), eff, 5);
-
-	return 0;
-}
-
-#ifdef CONFIG_KASAN
-
-/*
- * This is an optimization for KASAN=y case. Since all kasan page tables
- * eventually point to the kasan_early_shadow_page we could call note_page()
- * right away without walking through lower level page tables. This saves
- * us dozens of seconds (minutes for 5-level config) while checking for
- * W+X mapping or reading kernel_page_tables debugfs file.
- */
-static inline bool kasan_page_table(struct pg_state *st, void *pt)
-{
-	if (__pa(pt) == __pa(kasan_early_shadow_pmd) ||
-	    (pgtable_l5_enabled() &&
-			__pa(pt) == __pa(kasan_early_shadow_p4d)) ||
-	    __pa(pt) == __pa(kasan_early_shadow_pud)) {
-		pgprotval_t prot = pte_flags(kasan_early_shadow_pte[0]);
-		note_page(st, __pgprot(prot), 0, 5);
-		return true;
-	}
-	return false;
-}
-#else
-static inline bool kasan_page_table(struct pg_state *st, void *pt)
-{
-	return false;
-}
-#endif
-
-static int ptdump_test_pmd(unsigned long addr, unsigned long next,
-			   pmd_t *pmd, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-
-	st->current_address = normalize_addr(addr);
-
-	if (kasan_page_table(st, pmd))
-		return 1;
-	return 0;
-}
-
-static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
-			    unsigned long next, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-	pgprotval_t eff, prot;
-
-	prot = pmd_flags(*pmd);
-	eff = effective_prot(st->effective_prot_pud, prot);
-
-	st->current_address = normalize_addr(addr);
-
-	if (pmd_large(*pmd))
-		note_page(st, __pgprot(prot), eff, 4);
-
-	st->effective_prot_pmd = eff;
-
-	return 0;
-}
-
-static int ptdump_test_pud(unsigned long addr, unsigned long next,
-			   pud_t *pud, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-
-	st->current_address = normalize_addr(addr);
-
-	if (kasan_page_table(st, pud))
-		return 1;
-	return 0;
-}
-
-static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
-			    unsigned long next, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-	pgprotval_t eff, prot;
-
-	prot = pud_flags(*pud);
-	eff = effective_prot(st->effective_prot_p4d, prot);
-
-	st->current_address = normalize_addr(addr);
-
-	if (pud_large(*pud))
-		note_page(st, __pgprot(prot), eff, 3);
-
-	st->effective_prot_pud = eff;
-
-	return 0;
-}
-
-static int ptdump_test_p4d(unsigned long addr, unsigned long next,
-			   p4d_t *p4d, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-
-	st->current_address = normalize_addr(addr);
-
-	if (kasan_page_table(st, p4d))
-		return 1;
-	return 0;
-}
-
-static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
-			    unsigned long next, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-	pgprotval_t eff, prot;
-
-	prot = p4d_flags(*p4d);
-	eff = effective_prot(st->effective_prot_pgd, prot);
-
-	st->current_address = normalize_addr(addr);
-
-	if (p4d_large(*p4d))
-		note_page(st, __pgprot(prot), eff, 2);
-
-	st->effective_prot_p4d = eff;
-
-	return 0;
-}
-
-static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
-			    unsigned long next, struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-	pgprotval_t eff, prot;
+static const struct ptdump_range ptdump_ranges[] = {
+#ifdef CONFIG_X86_64
 
-	prot = pgd_flags(*pgd);
+#define normalize_addr_shift (64 - (__VIRTUAL_MASK_SHIFT + 1))
+#define normalize_addr(u) ((signed long)(u << normalize_addr_shift) >> normalize_addr_shift)
 
-#ifdef CONFIG_X86_PAE
-	eff = _PAGE_USER | _PAGE_RW;
+	{0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2},
+	{normalize_addr(PTRS_PER_PGD * PGD_LEVEL_MULT / 2), ~0UL},
 #else
-	eff = prot;
+	{0, ~0UL},
 #endif
-
-	st->current_address = normalize_addr(addr);
-
-	if (pgd_large(*pgd))
-		note_page(st, __pgprot(prot), eff, 1);
-
-	st->effective_prot_pgd = eff;
-
-	return 0;
-}
-
-static int ptdump_hole(unsigned long addr, unsigned long next,
-		       struct mm_walk *walk)
-{
-	struct pg_state *st = walk->private;
-
-	st->current_address = normalize_addr(addr);
-
-	note_page(st, __pgprot(0), 0, -1);
-
-	return 0;
-}
+	{0, 0}
+};
 
 static void ptdump_walk_pgd_level_core(struct seq_file *m, struct mm_struct *mm,
 				       bool checkwx, bool dmesg)
 {
-	struct pg_state st = {};
-	struct mm_walk walk = {
-		.mm		= mm,
-		.pgd_entry	= ptdump_pgd_entry,
-		.p4d_entry	= ptdump_p4d_entry,
-		.pud_entry	= ptdump_pud_entry,
-		.pmd_entry	= ptdump_pmd_entry,
-		.pte_entry	= ptdump_pte_entry,
-		.test_p4d	= ptdump_test_p4d,
-		.test_pud	= ptdump_test_pud,
-		.test_pmd	= ptdump_test_pmd,
-		.pte_hole	= ptdump_hole,
-		.private	= &st
+	struct pg_state st = {
+		.ptdump = {
+			.note_page	= note_page,
+			.range		= ptdump_ranges
+		},
+		.to_dmesg	= dmesg,
+		.check_wx	= checkwx,
+		.seq		= m
 	};
 
-	st.to_dmesg = dmesg;
-	st.check_wx = checkwx;
-	st.seq = m;
-	if (checkwx)
-		st.wx_pages = 0;
-
-	down_read(&mm->mmap_sem);
-#ifdef CONFIG_X86_64
-	walk_page_range(0, PTRS_PER_PGD*PGD_LEVEL_MULT/2, &walk);
-	walk_page_range(normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT/2), ~0,
-			&walk);
-#else
-	walk_page_range(0, ~0, &walk);
-#endif
-	up_read(&mm->mmap_sem);
+	ptdump_walk_pgd(&st.ptdump, mm);
 
-	/* Flush out the last page */
-	st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
-	note_page(&st, __pgprot(0), 0, 0);
 	if (!checkwx)
 		return;
 	if (st.wx_pages)
-- 
2.20.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2019-04-17 14:34 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-03 14:16 [PATCH v8 00/20] Convert x86 & arm64 to use generic page walk Steven Price
2019-04-03 14:16 ` Steven Price
2019-04-03 14:16 ` [PATCH v8 01/20] arc: mm: Add p?d_large() definitions Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 02/20] arm64: " Steven Price
2019-04-03 14:16   ` Steven Price
2019-06-11 15:36   ` Will Deacon
2019-06-11 15:36     ` Will Deacon
2019-04-03 14:16 ` [PATCH v8 03/20] mips: " Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 04/20] powerpc: " Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 05/20] KVM: PPC: Book3S HV: Remove pmd_is_leaf() Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-29  2:05   ` Paul Mackerras
2019-04-29  2:05     ` Paul Mackerras
2019-04-29  2:05     ` Paul Mackerras
2019-04-29  2:05     ` Paul Mackerras
2019-05-09 15:03     ` Steven Price
2019-05-09 15:03       ` Steven Price
2019-05-09 15:03       ` Steven Price
2019-05-09 15:03       ` Steven Price
2019-04-03 14:16 ` [PATCH v8 06/20] riscv: mm: Add p?d_large() definitions Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-05  4:14   ` Anup Patel
2019-04-05  4:14     ` Anup Patel
2019-04-05  4:14     ` Anup Patel
2019-04-03 14:16 ` [PATCH v8 07/20] s390: " Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 08/20] sparc: " Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 09/20] x86: " Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 10/20] mm: Add generic p?d_large() macros Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 11/20] mm: pagewalk: Add p4d_entry() and pgd_entry() Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 12/20] mm: pagewalk: Allow walking without vma Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 13/20] mm: pagewalk: Add test_p?d callbacks Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 14/20] arm64: mm: Convert mm/dump.c to use walk_page_range() Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 15/20] x86: mm: Don't display pages which aren't present in debugfs Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 16/20] x86: mm: Point to struct seq_file from struct pg_state Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 17/20] x86: mm+efi: Convert ptdump_walk_pgd_level() to take a mm_struct Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 18/20] x86: mm: Convert ptdump_walk_pgd_level_debugfs() to take an mm_struct Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 19/20] x86: mm: Convert ptdump_walk_pgd_level_core() " Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-03 14:16 ` [PATCH v8 20/20] x86: mm: Convert dump_pagetables to use walk_page_range Steven Price
2019-04-03 14:16   ` Steven Price
2019-04-10 14:56 ` [PATCH v8 00/20] Convert x86 & arm64 to use generic page walk Steven Price
2019-04-10 14:56   ` Steven Price
2019-04-12 14:44   ` Dave Hansen
2019-04-12 14:44     ` Dave Hansen
2019-04-17 14:28     ` Steven Price
2019-04-17 14:28       ` Steven Price
2019-04-17 14:34       ` [RFC PATCH 1/3] mm: Add generic ptdump Steven Price
2019-04-17 14:34         ` Steven Price
2019-04-17 14:34         ` [RFC PATCH 2/3] arm64: mm: Switch to using generic pt_dump Steven Price
2019-04-17 14:34           ` Steven Price
2019-04-17 14:34         ` Steven Price [this message]
2019-04-17 14:34           ` [RFC PATCH 3/3] x86: " Steven Price

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190417143423.26665-3-steven.price@arm.com \
    --to=steven.price@arm.com \
    --cc=Mark.Rutland@arm.com \
    --cc=akpm@linux-foundation.org \
    --cc=ard.biesheuvel@linaro.org \
    --cc=arnd@arndb.de \
    --cc=bp@alien8.de \
    --cc=catalin.marinas@arm.com \
    --cc=dave.hansen@intel.com \
    --cc=hpa@zytor.com \
    --cc=james.morse@arm.com \
    --cc=jglisse@redhat.com \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=will.deacon@arm.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.