From: Steven Price <steven.price@arm.com> To: Dave Hansen <dave.hansen@intel.com>, linux-mm@kvack.org, Linus Torvalds <torvalds@linux-foundation.org> Cc: "Mark Rutland" <Mark.Rutland@arm.com>, "Arnd Bergmann" <arnd@arndb.de>, "Ard Biesheuvel" <ard.biesheuvel@linaro.org>, "Peter Zijlstra" <peterz@infradead.org>, "Catalin Marinas" <catalin.marinas@arm.com>, x86@kernel.org, "Will Deacon" <will.deacon@arm.com>, linux-kernel@vger.kernel.org, "Jérôme Glisse" <jglisse@redhat.com>, "Ingo Molnar" <mingo@redhat.com>, "Borislav Petkov" <bp@alien8.de>, "Andy Lutomirski" <luto@kernel.org>, "H. Peter Anvin" <hpa@zytor.com>, "James Morse" <james.morse@arm.com>, "Thomas Gleixner" <tglx@linutronix.de>, "Andrew Morton" <akpm@linux-foundation.org>, linux-arm-kernel@lists.infradead.org, "Liang, Kan" <kan.liang@linux.intel.com>, "Steven Price" <steven.price@arm.com> Subject: [RFC PATCH 3/3] x86: mm: Switch to using generic pt_dump Date: Wed, 17 Apr 2019 15:34:23 +0100 [thread overview] Message-ID: <20190417143423.26665-3-steven.price@arm.com> (raw) In-Reply-To: <20190417143423.26665-1-steven.price@arm.com> Instead of providing our own callbacks for walking the page tables, switch to using the generic version instead. Signed-off-by: Steven Price <steven.price@arm.com> --- arch/x86/Kconfig | 1 + arch/x86/Kconfig.debug | 20 +-- arch/x86/mm/Makefile | 4 +- arch/x86/mm/dump_pagetables.c | 297 +++++++--------------------------- 4 files changed, 62 insertions(+), 260 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c1f9b3cf437c..122c24055f02 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -106,6 +106,7 @@ config X86 select GENERIC_IRQ_RESERVATION_MODE select GENERIC_IRQ_SHOW select GENERIC_PENDING_IRQ if SMP + select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 15d0fbe27872..dc1dfe213657 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -62,26 +62,10 @@ config EARLY_PRINTK_USB_XDBC config MCSAFE_TEST def_bool n -config X86_PTDUMP_CORE - def_bool n - -config X86_PTDUMP - tristate "Export kernel pagetable layout to userspace via debugfs" - depends on DEBUG_KERNEL - select DEBUG_FS - select X86_PTDUMP_CORE - ---help--- - Say Y here if you want to show the kernel pagetable layout in a - debugfs file. This information is only useful for kernel developers - who are working in architecture specific areas of the kernel. - It is probably not a good idea to enable this feature in a production - kernel. - If in doubt, say "N" - config EFI_PGT_DUMP bool "Dump the EFI pagetable" depends on EFI - select X86_PTDUMP_CORE + select PTDUMP_CORE ---help--- Enable this if you want to dump the EFI page table before enabling virtual mode. This can be used to debug miscellaneous @@ -90,7 +74,7 @@ config EFI_PGT_DUMP config DEBUG_WX bool "Warn on W+X mappings at boot" - select X86_PTDUMP_CORE + select PTDUMP_CORE ---help--- Generate a warning if any W+X mappings are found at boot. diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 4b101dd6e52f..5233190fc6bf 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -28,8 +28,8 @@ obj-$(CONFIG_X86_PAT) += pat_rbtree.o obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o -obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o +obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o +obj-$(CONFIG_PTDUMP_DEBUGFS) += debug_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index f6b814aaddf7..955824c7cddb 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -20,6 +20,7 @@ #include <linux/seq_file.h> #include <linux/highmem.h> #include <linux/pci.h> +#include <linux/ptdump.h> #include <asm/e820/types.h> #include <asm/pgtable.h> @@ -30,15 +31,12 @@ * when a "break" in the continuity is found. */ struct pg_state { + struct ptdump_state ptdump; int level; - pgprot_t current_prot; + pgprotval_t current_prot; pgprotval_t effective_prot; - pgprotval_t effective_prot_pgd; - pgprotval_t effective_prot_p4d; - pgprotval_t effective_prot_pud; - pgprotval_t effective_prot_pmd; + pgprotval_t prot_levels[5]; unsigned long start_address; - unsigned long current_address; const struct addr_marker *marker; unsigned long lines; bool to_dmesg; @@ -179,9 +177,8 @@ static struct addr_marker address_markers[] = { /* * Print a readable form of a pgprot_t to the seq_file */ -static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) +static void printk_prot(struct seq_file *m, pgprotval_t pr, int level, bool dmsg) { - pgprotval_t pr = pgprot_val(prot); static const char * const level_name[] = { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; @@ -228,24 +225,11 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); } -/* - * On 64 bits, sign-extend the 48 bit address to 64 bit - */ -static unsigned long normalize_addr(unsigned long u) -{ - int shift; - if (!IS_ENABLED(CONFIG_X86_64)) - return u; - - shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); - return (signed long)(u << shift) >> shift; -} - -static void note_wx(struct pg_state *st) +static void note_wx(struct pg_state *st, unsigned long addr) { unsigned long npages; - npages = (st->current_address - st->start_address) / PAGE_SIZE; + npages = (addr - st->start_address) / PAGE_SIZE; #ifdef CONFIG_PCI_BIOS /* @@ -253,7 +237,7 @@ static void note_wx(struct pg_state *st) * Inform about it, but avoid the warning. */ if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN && - st->current_address <= PAGE_OFFSET + BIOS_END) { + addr <= PAGE_OFFSET + BIOS_END) { pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages); return; } @@ -264,25 +248,44 @@ static void note_wx(struct pg_state *st) (void *)st->start_address); } +static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) +{ + return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | + ((prot1 | prot2) & _PAGE_NX); +} + /* * This function gets called on a break in a continuous series * of PTE entries; the next one is different so we need to * print what we collected so far. */ -static void note_page(struct pg_state *st, pgprot_t new_prot, - pgprotval_t new_eff, int level) +static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, + unsigned long val) { - pgprotval_t prot, cur, eff; + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); + pgprotval_t new_prot, new_eff; + pgprotval_t cur, eff; static const char units[] = "BKMGTPE"; struct seq_file *m = st->seq; + new_prot = val & PTE_FLAGS_MASK; + + if (level > 1) { + new_eff = effective_prot(st->prot_levels[level - 2], + new_prot); + } else { + new_eff = new_prot; + } + + if (level > 0) + st->prot_levels[level-1] = new_eff; + /* * If we have a "break" in the series, we need to flush the state that * we have now. "break" is either changing perms, levels or * address space marker. */ - prot = pgprot_val(new_prot); - cur = pgprot_val(st->current_prot); + cur = st->current_prot; eff = st->effective_prot; if (!st->level) { @@ -294,14 +297,14 @@ static void note_page(struct pg_state *st, pgprot_t new_prot, st->lines = 0; pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", st->marker->name); - } else if (prot != cur || new_eff != eff || level != st->level || - st->current_address >= st->marker[1].start_address) { + } else if (new_prot != cur || new_eff != eff || level != st->level || + addr >= st->marker[1].start_address) { const char *unit = units; unsigned long delta; int width = sizeof(unsigned long) * 2; if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) - note_wx(st); + note_wx(st, addr); /* * Now print the actual finished series @@ -311,9 +314,9 @@ static void note_page(struct pg_state *st, pgprot_t new_prot, pt_dump_seq_printf(m, st->to_dmesg, "0x%0*lx-0x%0*lx ", width, st->start_address, - width, st->current_address); + width, addr); - delta = st->current_address - st->start_address; + delta = addr - st->start_address; while (!(delta & 1023) && unit[1]) { delta >>= 10; unit++; @@ -331,7 +334,7 @@ static void note_page(struct pg_state *st, pgprot_t new_prot, * such as the start of vmalloc space etc. * This helps in the interpretation. */ - if (st->current_address >= st->marker[1].start_address) { + if (addr >= st->marker[1].start_address) { if (st->marker->max_lines && st->lines > st->marker->max_lines) { unsigned long nskip = @@ -347,228 +350,42 @@ static void note_page(struct pg_state *st, pgprot_t new_prot, st->marker->name); } - st->start_address = st->current_address; + st->start_address = addr; st->current_prot = new_prot; st->effective_prot = new_eff; st->level = level; } } -static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) -{ - return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | - ((prot1 | prot2) & _PAGE_NX); -} - -static int ptdump_pte_entry(pte_t *pte, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - pgprotval_t eff, prot; - - st->current_address = normalize_addr(addr); - - prot = pte_flags(*pte); - eff = effective_prot(st->effective_prot_pmd, prot); - note_page(st, __pgprot(prot), eff, 5); - - return 0; -} - -#ifdef CONFIG_KASAN - -/* - * This is an optimization for KASAN=y case. Since all kasan page tables - * eventually point to the kasan_early_shadow_page we could call note_page() - * right away without walking through lower level page tables. This saves - * us dozens of seconds (minutes for 5-level config) while checking for - * W+X mapping or reading kernel_page_tables debugfs file. - */ -static inline bool kasan_page_table(struct pg_state *st, void *pt) -{ - if (__pa(pt) == __pa(kasan_early_shadow_pmd) || - (pgtable_l5_enabled() && - __pa(pt) == __pa(kasan_early_shadow_p4d)) || - __pa(pt) == __pa(kasan_early_shadow_pud)) { - pgprotval_t prot = pte_flags(kasan_early_shadow_pte[0]); - note_page(st, __pgprot(prot), 0, 5); - return true; - } - return false; -} -#else -static inline bool kasan_page_table(struct pg_state *st, void *pt) -{ - return false; -} -#endif - -static int ptdump_test_pmd(unsigned long addr, unsigned long next, - pmd_t *pmd, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - - st->current_address = normalize_addr(addr); - - if (kasan_page_table(st, pmd)) - return 1; - return 0; -} - -static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - pgprotval_t eff, prot; - - prot = pmd_flags(*pmd); - eff = effective_prot(st->effective_prot_pud, prot); - - st->current_address = normalize_addr(addr); - - if (pmd_large(*pmd)) - note_page(st, __pgprot(prot), eff, 4); - - st->effective_prot_pmd = eff; - - return 0; -} - -static int ptdump_test_pud(unsigned long addr, unsigned long next, - pud_t *pud, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - - st->current_address = normalize_addr(addr); - - if (kasan_page_table(st, pud)) - return 1; - return 0; -} - -static int ptdump_pud_entry(pud_t *pud, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - pgprotval_t eff, prot; - - prot = pud_flags(*pud); - eff = effective_prot(st->effective_prot_p4d, prot); - - st->current_address = normalize_addr(addr); - - if (pud_large(*pud)) - note_page(st, __pgprot(prot), eff, 3); - - st->effective_prot_pud = eff; - - return 0; -} - -static int ptdump_test_p4d(unsigned long addr, unsigned long next, - p4d_t *p4d, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - - st->current_address = normalize_addr(addr); - - if (kasan_page_table(st, p4d)) - return 1; - return 0; -} - -static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - pgprotval_t eff, prot; - - prot = p4d_flags(*p4d); - eff = effective_prot(st->effective_prot_pgd, prot); - - st->current_address = normalize_addr(addr); - - if (p4d_large(*p4d)) - note_page(st, __pgprot(prot), eff, 2); - - st->effective_prot_p4d = eff; - - return 0; -} - -static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - pgprotval_t eff, prot; +static const struct ptdump_range ptdump_ranges[] = { +#ifdef CONFIG_X86_64 - prot = pgd_flags(*pgd); +#define normalize_addr_shift (64 - (__VIRTUAL_MASK_SHIFT + 1)) +#define normalize_addr(u) ((signed long)(u << normalize_addr_shift) >> normalize_addr_shift) -#ifdef CONFIG_X86_PAE - eff = _PAGE_USER | _PAGE_RW; + {0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2}, + {normalize_addr(PTRS_PER_PGD * PGD_LEVEL_MULT / 2), ~0UL}, #else - eff = prot; + {0, ~0UL}, #endif - - st->current_address = normalize_addr(addr); - - if (pgd_large(*pgd)) - note_page(st, __pgprot(prot), eff, 1); - - st->effective_prot_pgd = eff; - - return 0; -} - -static int ptdump_hole(unsigned long addr, unsigned long next, - struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - - st->current_address = normalize_addr(addr); - - note_page(st, __pgprot(0), 0, -1); - - return 0; -} + {0, 0} +}; static void ptdump_walk_pgd_level_core(struct seq_file *m, struct mm_struct *mm, bool checkwx, bool dmesg) { - struct pg_state st = {}; - struct mm_walk walk = { - .mm = mm, - .pgd_entry = ptdump_pgd_entry, - .p4d_entry = ptdump_p4d_entry, - .pud_entry = ptdump_pud_entry, - .pmd_entry = ptdump_pmd_entry, - .pte_entry = ptdump_pte_entry, - .test_p4d = ptdump_test_p4d, - .test_pud = ptdump_test_pud, - .test_pmd = ptdump_test_pmd, - .pte_hole = ptdump_hole, - .private = &st + struct pg_state st = { + .ptdump = { + .note_page = note_page, + .range = ptdump_ranges + }, + .to_dmesg = dmesg, + .check_wx = checkwx, + .seq = m }; - st.to_dmesg = dmesg; - st.check_wx = checkwx; - st.seq = m; - if (checkwx) - st.wx_pages = 0; - - down_read(&mm->mmap_sem); -#ifdef CONFIG_X86_64 - walk_page_range(0, PTRS_PER_PGD*PGD_LEVEL_MULT/2, &walk); - walk_page_range(normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT/2), ~0, - &walk); -#else - walk_page_range(0, ~0, &walk); -#endif - up_read(&mm->mmap_sem); + ptdump_walk_pgd(&st.ptdump, mm); - /* Flush out the last page */ - st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); - note_page(&st, __pgprot(0), 0, 0); if (!checkwx) return; if (st.wx_pages) -- 2.20.1
WARNING: multiple messages have this Message-ID (diff)
From: Steven Price <steven.price@arm.com> To: Dave Hansen <dave.hansen@intel.com>, linux-mm@kvack.org, Linus Torvalds <torvalds@linux-foundation.org> Cc: "Mark Rutland" <Mark.Rutland@arm.com>, "Arnd Bergmann" <arnd@arndb.de>, "Ard Biesheuvel" <ard.biesheuvel@linaro.org>, "Peter Zijlstra" <peterz@infradead.org>, "Catalin Marinas" <catalin.marinas@arm.com>, x86@kernel.org, "Will Deacon" <will.deacon@arm.com>, linux-kernel@vger.kernel.org, "Steven Price" <steven.price@arm.com>, "Jérôme Glisse" <jglisse@redhat.com>, "Ingo Molnar" <mingo@redhat.com>, "Borislav Petkov" <bp@alien8.de>, "Andy Lutomirski" <luto@kernel.org>, "H. Peter Anvin" <hpa@zytor.com>, "James Morse" <james.morse@arm.com>, "Thomas Gleixner" <tglx@linutronix.de>, "Andrew Morton" <akpm@linux-foundation.org>, linux-arm-kernel@lists.infradead.org, "Liang, Kan" <kan.liang@linux.intel.com> Subject: [RFC PATCH 3/3] x86: mm: Switch to using generic pt_dump Date: Wed, 17 Apr 2019 15:34:23 +0100 [thread overview] Message-ID: <20190417143423.26665-3-steven.price@arm.com> (raw) In-Reply-To: <20190417143423.26665-1-steven.price@arm.com> Instead of providing our own callbacks for walking the page tables, switch to using the generic version instead. Signed-off-by: Steven Price <steven.price@arm.com> --- arch/x86/Kconfig | 1 + arch/x86/Kconfig.debug | 20 +-- arch/x86/mm/Makefile | 4 +- arch/x86/mm/dump_pagetables.c | 297 +++++++--------------------------- 4 files changed, 62 insertions(+), 260 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c1f9b3cf437c..122c24055f02 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -106,6 +106,7 @@ config X86 select GENERIC_IRQ_RESERVATION_MODE select GENERIC_IRQ_SHOW select GENERIC_PENDING_IRQ if SMP + select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 15d0fbe27872..dc1dfe213657 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -62,26 +62,10 @@ config EARLY_PRINTK_USB_XDBC config MCSAFE_TEST def_bool n -config X86_PTDUMP_CORE - def_bool n - -config X86_PTDUMP - tristate "Export kernel pagetable layout to userspace via debugfs" - depends on DEBUG_KERNEL - select DEBUG_FS - select X86_PTDUMP_CORE - ---help--- - Say Y here if you want to show the kernel pagetable layout in a - debugfs file. This information is only useful for kernel developers - who are working in architecture specific areas of the kernel. - It is probably not a good idea to enable this feature in a production - kernel. - If in doubt, say "N" - config EFI_PGT_DUMP bool "Dump the EFI pagetable" depends on EFI - select X86_PTDUMP_CORE + select PTDUMP_CORE ---help--- Enable this if you want to dump the EFI page table before enabling virtual mode. This can be used to debug miscellaneous @@ -90,7 +74,7 @@ config EFI_PGT_DUMP config DEBUG_WX bool "Warn on W+X mappings at boot" - select X86_PTDUMP_CORE + select PTDUMP_CORE ---help--- Generate a warning if any W+X mappings are found at boot. diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 4b101dd6e52f..5233190fc6bf 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -28,8 +28,8 @@ obj-$(CONFIG_X86_PAT) += pat_rbtree.o obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o -obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o +obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o +obj-$(CONFIG_PTDUMP_DEBUGFS) += debug_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index f6b814aaddf7..955824c7cddb 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -20,6 +20,7 @@ #include <linux/seq_file.h> #include <linux/highmem.h> #include <linux/pci.h> +#include <linux/ptdump.h> #include <asm/e820/types.h> #include <asm/pgtable.h> @@ -30,15 +31,12 @@ * when a "break" in the continuity is found. */ struct pg_state { + struct ptdump_state ptdump; int level; - pgprot_t current_prot; + pgprotval_t current_prot; pgprotval_t effective_prot; - pgprotval_t effective_prot_pgd; - pgprotval_t effective_prot_p4d; - pgprotval_t effective_prot_pud; - pgprotval_t effective_prot_pmd; + pgprotval_t prot_levels[5]; unsigned long start_address; - unsigned long current_address; const struct addr_marker *marker; unsigned long lines; bool to_dmesg; @@ -179,9 +177,8 @@ static struct addr_marker address_markers[] = { /* * Print a readable form of a pgprot_t to the seq_file */ -static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) +static void printk_prot(struct seq_file *m, pgprotval_t pr, int level, bool dmsg) { - pgprotval_t pr = pgprot_val(prot); static const char * const level_name[] = { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; @@ -228,24 +225,11 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); } -/* - * On 64 bits, sign-extend the 48 bit address to 64 bit - */ -static unsigned long normalize_addr(unsigned long u) -{ - int shift; - if (!IS_ENABLED(CONFIG_X86_64)) - return u; - - shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); - return (signed long)(u << shift) >> shift; -} - -static void note_wx(struct pg_state *st) +static void note_wx(struct pg_state *st, unsigned long addr) { unsigned long npages; - npages = (st->current_address - st->start_address) / PAGE_SIZE; + npages = (addr - st->start_address) / PAGE_SIZE; #ifdef CONFIG_PCI_BIOS /* @@ -253,7 +237,7 @@ static void note_wx(struct pg_state *st) * Inform about it, but avoid the warning. */ if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN && - st->current_address <= PAGE_OFFSET + BIOS_END) { + addr <= PAGE_OFFSET + BIOS_END) { pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages); return; } @@ -264,25 +248,44 @@ static void note_wx(struct pg_state *st) (void *)st->start_address); } +static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) +{ + return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | + ((prot1 | prot2) & _PAGE_NX); +} + /* * This function gets called on a break in a continuous series * of PTE entries; the next one is different so we need to * print what we collected so far. */ -static void note_page(struct pg_state *st, pgprot_t new_prot, - pgprotval_t new_eff, int level) +static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, + unsigned long val) { - pgprotval_t prot, cur, eff; + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); + pgprotval_t new_prot, new_eff; + pgprotval_t cur, eff; static const char units[] = "BKMGTPE"; struct seq_file *m = st->seq; + new_prot = val & PTE_FLAGS_MASK; + + if (level > 1) { + new_eff = effective_prot(st->prot_levels[level - 2], + new_prot); + } else { + new_eff = new_prot; + } + + if (level > 0) + st->prot_levels[level-1] = new_eff; + /* * If we have a "break" in the series, we need to flush the state that * we have now. "break" is either changing perms, levels or * address space marker. */ - prot = pgprot_val(new_prot); - cur = pgprot_val(st->current_prot); + cur = st->current_prot; eff = st->effective_prot; if (!st->level) { @@ -294,14 +297,14 @@ static void note_page(struct pg_state *st, pgprot_t new_prot, st->lines = 0; pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", st->marker->name); - } else if (prot != cur || new_eff != eff || level != st->level || - st->current_address >= st->marker[1].start_address) { + } else if (new_prot != cur || new_eff != eff || level != st->level || + addr >= st->marker[1].start_address) { const char *unit = units; unsigned long delta; int width = sizeof(unsigned long) * 2; if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) - note_wx(st); + note_wx(st, addr); /* * Now print the actual finished series @@ -311,9 +314,9 @@ static void note_page(struct pg_state *st, pgprot_t new_prot, pt_dump_seq_printf(m, st->to_dmesg, "0x%0*lx-0x%0*lx ", width, st->start_address, - width, st->current_address); + width, addr); - delta = st->current_address - st->start_address; + delta = addr - st->start_address; while (!(delta & 1023) && unit[1]) { delta >>= 10; unit++; @@ -331,7 +334,7 @@ static void note_page(struct pg_state *st, pgprot_t new_prot, * such as the start of vmalloc space etc. * This helps in the interpretation. */ - if (st->current_address >= st->marker[1].start_address) { + if (addr >= st->marker[1].start_address) { if (st->marker->max_lines && st->lines > st->marker->max_lines) { unsigned long nskip = @@ -347,228 +350,42 @@ static void note_page(struct pg_state *st, pgprot_t new_prot, st->marker->name); } - st->start_address = st->current_address; + st->start_address = addr; st->current_prot = new_prot; st->effective_prot = new_eff; st->level = level; } } -static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) -{ - return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | - ((prot1 | prot2) & _PAGE_NX); -} - -static int ptdump_pte_entry(pte_t *pte, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - pgprotval_t eff, prot; - - st->current_address = normalize_addr(addr); - - prot = pte_flags(*pte); - eff = effective_prot(st->effective_prot_pmd, prot); - note_page(st, __pgprot(prot), eff, 5); - - return 0; -} - -#ifdef CONFIG_KASAN - -/* - * This is an optimization for KASAN=y case. Since all kasan page tables - * eventually point to the kasan_early_shadow_page we could call note_page() - * right away without walking through lower level page tables. This saves - * us dozens of seconds (minutes for 5-level config) while checking for - * W+X mapping or reading kernel_page_tables debugfs file. - */ -static inline bool kasan_page_table(struct pg_state *st, void *pt) -{ - if (__pa(pt) == __pa(kasan_early_shadow_pmd) || - (pgtable_l5_enabled() && - __pa(pt) == __pa(kasan_early_shadow_p4d)) || - __pa(pt) == __pa(kasan_early_shadow_pud)) { - pgprotval_t prot = pte_flags(kasan_early_shadow_pte[0]); - note_page(st, __pgprot(prot), 0, 5); - return true; - } - return false; -} -#else -static inline bool kasan_page_table(struct pg_state *st, void *pt) -{ - return false; -} -#endif - -static int ptdump_test_pmd(unsigned long addr, unsigned long next, - pmd_t *pmd, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - - st->current_address = normalize_addr(addr); - - if (kasan_page_table(st, pmd)) - return 1; - return 0; -} - -static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - pgprotval_t eff, prot; - - prot = pmd_flags(*pmd); - eff = effective_prot(st->effective_prot_pud, prot); - - st->current_address = normalize_addr(addr); - - if (pmd_large(*pmd)) - note_page(st, __pgprot(prot), eff, 4); - - st->effective_prot_pmd = eff; - - return 0; -} - -static int ptdump_test_pud(unsigned long addr, unsigned long next, - pud_t *pud, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - - st->current_address = normalize_addr(addr); - - if (kasan_page_table(st, pud)) - return 1; - return 0; -} - -static int ptdump_pud_entry(pud_t *pud, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - pgprotval_t eff, prot; - - prot = pud_flags(*pud); - eff = effective_prot(st->effective_prot_p4d, prot); - - st->current_address = normalize_addr(addr); - - if (pud_large(*pud)) - note_page(st, __pgprot(prot), eff, 3); - - st->effective_prot_pud = eff; - - return 0; -} - -static int ptdump_test_p4d(unsigned long addr, unsigned long next, - p4d_t *p4d, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - - st->current_address = normalize_addr(addr); - - if (kasan_page_table(st, p4d)) - return 1; - return 0; -} - -static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - pgprotval_t eff, prot; - - prot = p4d_flags(*p4d); - eff = effective_prot(st->effective_prot_pgd, prot); - - st->current_address = normalize_addr(addr); - - if (p4d_large(*p4d)) - note_page(st, __pgprot(prot), eff, 2); - - st->effective_prot_p4d = eff; - - return 0; -} - -static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - pgprotval_t eff, prot; +static const struct ptdump_range ptdump_ranges[] = { +#ifdef CONFIG_X86_64 - prot = pgd_flags(*pgd); +#define normalize_addr_shift (64 - (__VIRTUAL_MASK_SHIFT + 1)) +#define normalize_addr(u) ((signed long)(u << normalize_addr_shift) >> normalize_addr_shift) -#ifdef CONFIG_X86_PAE - eff = _PAGE_USER | _PAGE_RW; + {0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2}, + {normalize_addr(PTRS_PER_PGD * PGD_LEVEL_MULT / 2), ~0UL}, #else - eff = prot; + {0, ~0UL}, #endif - - st->current_address = normalize_addr(addr); - - if (pgd_large(*pgd)) - note_page(st, __pgprot(prot), eff, 1); - - st->effective_prot_pgd = eff; - - return 0; -} - -static int ptdump_hole(unsigned long addr, unsigned long next, - struct mm_walk *walk) -{ - struct pg_state *st = walk->private; - - st->current_address = normalize_addr(addr); - - note_page(st, __pgprot(0), 0, -1); - - return 0; -} + {0, 0} +}; static void ptdump_walk_pgd_level_core(struct seq_file *m, struct mm_struct *mm, bool checkwx, bool dmesg) { - struct pg_state st = {}; - struct mm_walk walk = { - .mm = mm, - .pgd_entry = ptdump_pgd_entry, - .p4d_entry = ptdump_p4d_entry, - .pud_entry = ptdump_pud_entry, - .pmd_entry = ptdump_pmd_entry, - .pte_entry = ptdump_pte_entry, - .test_p4d = ptdump_test_p4d, - .test_pud = ptdump_test_pud, - .test_pmd = ptdump_test_pmd, - .pte_hole = ptdump_hole, - .private = &st + struct pg_state st = { + .ptdump = { + .note_page = note_page, + .range = ptdump_ranges + }, + .to_dmesg = dmesg, + .check_wx = checkwx, + .seq = m }; - st.to_dmesg = dmesg; - st.check_wx = checkwx; - st.seq = m; - if (checkwx) - st.wx_pages = 0; - - down_read(&mm->mmap_sem); -#ifdef CONFIG_X86_64 - walk_page_range(0, PTRS_PER_PGD*PGD_LEVEL_MULT/2, &walk); - walk_page_range(normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT/2), ~0, - &walk); -#else - walk_page_range(0, ~0, &walk); -#endif - up_read(&mm->mmap_sem); + ptdump_walk_pgd(&st.ptdump, mm); - /* Flush out the last page */ - st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); - note_page(&st, __pgprot(0), 0, 0); if (!checkwx) return; if (st.wx_pages) -- 2.20.1 _______________________________________________ linux-arm-kernel mailing list linux-arm-kernel@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
next prev parent reply other threads:[~2019-04-17 14:34 UTC|newest] Thread overview: 74+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-04-03 14:16 [PATCH v8 00/20] Convert x86 & arm64 to use generic page walk Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 01/20] arc: mm: Add p?d_large() definitions Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 02/20] arm64: " Steven Price 2019-04-03 14:16 ` Steven Price 2019-06-11 15:36 ` Will Deacon 2019-06-11 15:36 ` Will Deacon 2019-04-03 14:16 ` [PATCH v8 03/20] mips: " Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 04/20] powerpc: " Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 05/20] KVM: PPC: Book3S HV: Remove pmd_is_leaf() Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-29 2:05 ` Paul Mackerras 2019-04-29 2:05 ` Paul Mackerras 2019-04-29 2:05 ` Paul Mackerras 2019-04-29 2:05 ` Paul Mackerras 2019-05-09 15:03 ` Steven Price 2019-05-09 15:03 ` Steven Price 2019-05-09 15:03 ` Steven Price 2019-05-09 15:03 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 06/20] riscv: mm: Add p?d_large() definitions Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-05 4:14 ` Anup Patel 2019-04-05 4:14 ` Anup Patel 2019-04-05 4:14 ` Anup Patel 2019-04-03 14:16 ` [PATCH v8 07/20] s390: " Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 08/20] sparc: " Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 09/20] x86: " Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 10/20] mm: Add generic p?d_large() macros Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 11/20] mm: pagewalk: Add p4d_entry() and pgd_entry() Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 12/20] mm: pagewalk: Allow walking without vma Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 13/20] mm: pagewalk: Add test_p?d callbacks Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 14/20] arm64: mm: Convert mm/dump.c to use walk_page_range() Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 15/20] x86: mm: Don't display pages which aren't present in debugfs Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 16/20] x86: mm: Point to struct seq_file from struct pg_state Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 17/20] x86: mm+efi: Convert ptdump_walk_pgd_level() to take a mm_struct Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 18/20] x86: mm: Convert ptdump_walk_pgd_level_debugfs() to take an mm_struct Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 19/20] x86: mm: Convert ptdump_walk_pgd_level_core() " Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-03 14:16 ` [PATCH v8 20/20] x86: mm: Convert dump_pagetables to use walk_page_range Steven Price 2019-04-03 14:16 ` Steven Price 2019-04-10 14:56 ` [PATCH v8 00/20] Convert x86 & arm64 to use generic page walk Steven Price 2019-04-10 14:56 ` Steven Price 2019-04-12 14:44 ` Dave Hansen 2019-04-12 14:44 ` Dave Hansen 2019-04-17 14:28 ` Steven Price 2019-04-17 14:28 ` Steven Price 2019-04-17 14:34 ` [RFC PATCH 1/3] mm: Add generic ptdump Steven Price 2019-04-17 14:34 ` Steven Price 2019-04-17 14:34 ` [RFC PATCH 2/3] arm64: mm: Switch to using generic pt_dump Steven Price 2019-04-17 14:34 ` Steven Price 2019-04-17 14:34 ` Steven Price [this message] 2019-04-17 14:34 ` [RFC PATCH 3/3] x86: " Steven Price
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20190417143423.26665-3-steven.price@arm.com \ --to=steven.price@arm.com \ --cc=Mark.Rutland@arm.com \ --cc=akpm@linux-foundation.org \ --cc=ard.biesheuvel@linaro.org \ --cc=arnd@arndb.de \ --cc=bp@alien8.de \ --cc=catalin.marinas@arm.com \ --cc=dave.hansen@intel.com \ --cc=hpa@zytor.com \ --cc=james.morse@arm.com \ --cc=jglisse@redhat.com \ --cc=kan.liang@linux.intel.com \ --cc=linux-arm-kernel@lists.infradead.org \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-mm@kvack.org \ --cc=luto@kernel.org \ --cc=mingo@redhat.com \ --cc=peterz@infradead.org \ --cc=tglx@linutronix.de \ --cc=torvalds@linux-foundation.org \ --cc=will.deacon@arm.com \ --cc=x86@kernel.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.