All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 1/4] powerpc/ptdump: Use DEFINE_SHOW_ATTRIBUTE()
@ 2021-07-08 16:49 ` Christophe Leroy
  0 siblings, 0 replies; 25+ messages in thread
From: Christophe Leroy @ 2021-07-08 16:49 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

Use DEFINE_SHOW_ATTRIBUTE() instead of open coding
open() and fops.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/mm/ptdump/bats.c          | 14 ++------------
 arch/powerpc/mm/ptdump/hashpagetable.c | 12 +-----------
 arch/powerpc/mm/ptdump/ptdump.c        | 13 +------------
 arch/powerpc/mm/ptdump/segment_regs.c  | 12 +-----------
 4 files changed, 5 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c
index c4c628b03cf8..4ed3418f07d9 100644
--- a/arch/powerpc/mm/ptdump/bats.c
+++ b/arch/powerpc/mm/ptdump/bats.c
@@ -57,7 +57,7 @@ static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool
 
 #define BAT_SHOW_603(_m, _n, _l, _u, _d) bat_show_603(_m, _n, mfspr(_l), mfspr(_u), _d)
 
-static int bats_show_603(struct seq_file *m, void *v)
+static int bats_show(struct seq_file *m, void *v)
 {
 	seq_puts(m, "---[ Instruction Block Address Translation ]---\n");
 
@@ -88,17 +88,7 @@ static int bats_show_603(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int bats_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, bats_show_603, NULL);
-}
-
-static const struct file_operations bats_fops = {
-	.open		= bats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(bats);
 
 static int __init bats_init(void)
 {
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index ad6df9a2e7c8..c7f824d294b2 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -526,17 +526,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int ptdump_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ptdump_show, NULL);
-}
-
-static const struct file_operations ptdump_fops = {
-	.open		= ptdump_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ptdump);
 
 static int ptdump_init(void)
 {
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 5062c58b1e5b..349fd8fe173f 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -397,18 +397,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-
-static int ptdump_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ptdump_show, NULL);
-}
-
-static const struct file_operations ptdump_fops = {
-	.open		= ptdump_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ptdump);
 
 static void build_pgtable_complete_mask(void)
 {
diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c
index 565048a0c9be..3054944d3d7e 100644
--- a/arch/powerpc/mm/ptdump/segment_regs.c
+++ b/arch/powerpc/mm/ptdump/segment_regs.c
@@ -41,17 +41,7 @@ static int sr_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int sr_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, sr_show, NULL);
-}
-
-static const struct file_operations sr_fops = {
-	.open		= sr_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(sr);
 
 static int __init sr_init(void)
 {
-- 
2.25.0


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v4 1/4] powerpc/ptdump: Use DEFINE_SHOW_ATTRIBUTE()
@ 2021-07-08 16:49 ` Christophe Leroy
  0 siblings, 0 replies; 25+ messages in thread
From: Christophe Leroy @ 2021-07-08 16:49 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Use DEFINE_SHOW_ATTRIBUTE() instead of open coding
open() and fops.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/mm/ptdump/bats.c          | 14 ++------------
 arch/powerpc/mm/ptdump/hashpagetable.c | 12 +-----------
 arch/powerpc/mm/ptdump/ptdump.c        | 13 +------------
 arch/powerpc/mm/ptdump/segment_regs.c  | 12 +-----------
 4 files changed, 5 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c
index c4c628b03cf8..4ed3418f07d9 100644
--- a/arch/powerpc/mm/ptdump/bats.c
+++ b/arch/powerpc/mm/ptdump/bats.c
@@ -57,7 +57,7 @@ static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool
 
 #define BAT_SHOW_603(_m, _n, _l, _u, _d) bat_show_603(_m, _n, mfspr(_l), mfspr(_u), _d)
 
-static int bats_show_603(struct seq_file *m, void *v)
+static int bats_show(struct seq_file *m, void *v)
 {
 	seq_puts(m, "---[ Instruction Block Address Translation ]---\n");
 
@@ -88,17 +88,7 @@ static int bats_show_603(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int bats_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, bats_show_603, NULL);
-}
-
-static const struct file_operations bats_fops = {
-	.open		= bats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(bats);
 
 static int __init bats_init(void)
 {
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index ad6df9a2e7c8..c7f824d294b2 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -526,17 +526,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int ptdump_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ptdump_show, NULL);
-}
-
-static const struct file_operations ptdump_fops = {
-	.open		= ptdump_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ptdump);
 
 static int ptdump_init(void)
 {
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 5062c58b1e5b..349fd8fe173f 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -397,18 +397,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-
-static int ptdump_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ptdump_show, NULL);
-}
-
-static const struct file_operations ptdump_fops = {
-	.open		= ptdump_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ptdump);
 
 static void build_pgtable_complete_mask(void)
 {
diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c
index 565048a0c9be..3054944d3d7e 100644
--- a/arch/powerpc/mm/ptdump/segment_regs.c
+++ b/arch/powerpc/mm/ptdump/segment_regs.c
@@ -41,17 +41,7 @@ static int sr_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int sr_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, sr_show, NULL);
-}
-
-static const struct file_operations sr_fops = {
-	.open		= sr_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(sr);
 
 static int __init sr_init(void)
 {
-- 
2.25.0


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v4 2/4] powerpc/ptdump: Remove unused 'page_size' parameter
  2021-07-08 16:49 ` Christophe Leroy
@ 2021-07-08 16:49   ` Christophe Leroy
  -1 siblings, 0 replies; 25+ messages in thread
From: Christophe Leroy @ 2021-07-08 16:49 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

note_page_update_state() doesn't use page_size. Remove it.

Could also be removed to note_page() but as a following patch
will remove all current users of note_page(), just leave it as
is for now.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/mm/ptdump/ptdump.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 349fd8fe173f..3eb8732641da 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -189,7 +189,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
 }
 
 static void note_page_update_state(struct pg_state *st, unsigned long addr,
-				   unsigned int level, u64 val, unsigned long page_size)
+				   unsigned int level, u64 val)
 {
 	u64 flag = val & pg_level[level].mask;
 	u64 pa = val & PTE_RPN_MASK;
@@ -213,7 +213,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
 	/* At first no level is set */
 	if (!st->level) {
 		pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
-		note_page_update_state(st, addr, level, val, page_size);
+		note_page_update_state(st, addr, level, val);
 	/*
 	 * Dump the section of virtual memory when:
 	 *   - the PTE flags from one entry to the next differs.
@@ -242,7 +242,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
 		 * Address indicates we have passed the end of the
 		 * current section of virtual memory
 		 */
-		note_page_update_state(st, addr, level, val, page_size);
+		note_page_update_state(st, addr, level, val);
 	}
 }
 
-- 
2.25.0


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v4 2/4] powerpc/ptdump: Remove unused 'page_size' parameter
@ 2021-07-08 16:49   ` Christophe Leroy
  0 siblings, 0 replies; 25+ messages in thread
From: Christophe Leroy @ 2021-07-08 16:49 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

note_page_update_state() doesn't use page_size. Remove it.

Could also be removed to note_page() but as a following patch
will remove all current users of note_page(), just leave it as
is for now.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/mm/ptdump/ptdump.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 349fd8fe173f..3eb8732641da 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -189,7 +189,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
 }
 
 static void note_page_update_state(struct pg_state *st, unsigned long addr,
-				   unsigned int level, u64 val, unsigned long page_size)
+				   unsigned int level, u64 val)
 {
 	u64 flag = val & pg_level[level].mask;
 	u64 pa = val & PTE_RPN_MASK;
@@ -213,7 +213,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
 	/* At first no level is set */
 	if (!st->level) {
 		pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
-		note_page_update_state(st, addr, level, val, page_size);
+		note_page_update_state(st, addr, level, val);
 	/*
 	 * Dump the section of virtual memory when:
 	 *   - the PTE flags from one entry to the next differs.
@@ -242,7 +242,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
 		 * Address indicates we have passed the end of the
 		 * current section of virtual memory
 		 */
-		note_page_update_state(st, addr, level, val, page_size);
+		note_page_update_state(st, addr, level, val);
 	}
 }
 
-- 
2.25.0


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v4 3/4] powerpc/ptdump: Reduce level numbers by 1 in note_page() and add p4d level
  2021-07-08 16:49 ` Christophe Leroy
@ 2021-07-08 16:49   ` Christophe Leroy
  -1 siblings, 0 replies; 25+ messages in thread
From: Christophe Leroy @ 2021-07-08 16:49 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

Do the same as commit f8f0d0b6fa20 ("mm: ptdump: reduce level numbers
by 1 in note_page()") and add missing p4d level.

This will align powerpc to the users of generic ptdump.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/mm/ptdump/8xx.c      |  6 ++++--
 arch/powerpc/mm/ptdump/book3s64.c |  6 ++++--
 arch/powerpc/mm/ptdump/ptdump.c   | 17 +++++++++--------
 arch/powerpc/mm/ptdump/shared.c   |  6 ++++--
 4 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c
index 86da2a669680..fac932eb8f9a 100644
--- a/arch/powerpc/mm/ptdump/8xx.c
+++ b/arch/powerpc/mm/ptdump/8xx.c
@@ -75,8 +75,10 @@ static const struct flag_info flag_array[] = {
 };
 
 struct pgtable_level pg_level[5] = {
-	{
-	}, { /* pgd */
+	{ /* pgd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* p4d */
 		.flag	= flag_array,
 		.num	= ARRAY_SIZE(flag_array),
 	}, { /* pud */
diff --git a/arch/powerpc/mm/ptdump/book3s64.c b/arch/powerpc/mm/ptdump/book3s64.c
index 14f73868db66..5ad92d9dc5d1 100644
--- a/arch/powerpc/mm/ptdump/book3s64.c
+++ b/arch/powerpc/mm/ptdump/book3s64.c
@@ -103,8 +103,10 @@ static const struct flag_info flag_array[] = {
 };
 
 struct pgtable_level pg_level[5] = {
-	{
-	}, { /* pgd */
+	{ /* pgd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* p4d */
 		.flag	= flag_array,
 		.num	= ARRAY_SIZE(flag_array),
 	}, { /* pud */
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 3eb8732641da..fb531bc64fc5 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -58,7 +58,7 @@ struct pg_state {
 	const struct addr_marker *marker;
 	unsigned long start_address;
 	unsigned long start_pa;
-	unsigned int level;
+	int level;
 	u64 current_flags;
 	bool check_wx;
 	unsigned long wx_pages;
@@ -188,10 +188,9 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
 	st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
 }
 
-static void note_page_update_state(struct pg_state *st, unsigned long addr,
-				   unsigned int level, u64 val)
+static void note_page_update_state(struct pg_state *st, unsigned long addr, int level, u64 val)
 {
-	u64 flag = val & pg_level[level].mask;
+	u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
 	u64 pa = val & PTE_RPN_MASK;
 
 	st->level = level;
@@ -206,12 +205,12 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr,
 }
 
 static void note_page(struct pg_state *st, unsigned long addr,
-	       unsigned int level, u64 val, unsigned long page_size)
+		      int level, u64 val, unsigned long page_size)
 {
-	u64 flag = val & pg_level[level].mask;
+	u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
 
 	/* At first no level is set */
-	if (!st->level) {
+	if (st->level == -1) {
 		pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
 		note_page_update_state(st, addr, level, val);
 	/*
@@ -383,6 +382,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 	struct pg_state st = {
 		.seq = m,
 		.marker = address_markers,
+		.level = -1,
 		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
 	};
 
@@ -393,7 +393,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 
 	/* Traverse kernel page tables */
 	walk_pagetables(&st);
-	note_page(&st, 0, 0, 0, 0);
+	note_page(&st, 0, -1, 0, 0);
 	return 0;
 }
 
@@ -415,6 +415,7 @@ void ptdump_check_wx(void)
 	struct pg_state st = {
 		.seq = NULL,
 		.marker = address_markers,
+		.level = -1,
 		.check_wx = true,
 		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
 	};
diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c
index c005fe041c18..03607ab90c66 100644
--- a/arch/powerpc/mm/ptdump/shared.c
+++ b/arch/powerpc/mm/ptdump/shared.c
@@ -68,8 +68,10 @@ static const struct flag_info flag_array[] = {
 };
 
 struct pgtable_level pg_level[5] = {
-	{
-	}, { /* pgd */
+	{ /* pgd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* p4d */
 		.flag	= flag_array,
 		.num	= ARRAY_SIZE(flag_array),
 	}, { /* pud */
-- 
2.25.0


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v4 3/4] powerpc/ptdump: Reduce level numbers by 1 in note_page() and add p4d level
@ 2021-07-08 16:49   ` Christophe Leroy
  0 siblings, 0 replies; 25+ messages in thread
From: Christophe Leroy @ 2021-07-08 16:49 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Do the same as commit f8f0d0b6fa20 ("mm: ptdump: reduce level numbers
by 1 in note_page()") and add missing p4d level.

This will align powerpc to the users of generic ptdump.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/mm/ptdump/8xx.c      |  6 ++++--
 arch/powerpc/mm/ptdump/book3s64.c |  6 ++++--
 arch/powerpc/mm/ptdump/ptdump.c   | 17 +++++++++--------
 arch/powerpc/mm/ptdump/shared.c   |  6 ++++--
 4 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c
index 86da2a669680..fac932eb8f9a 100644
--- a/arch/powerpc/mm/ptdump/8xx.c
+++ b/arch/powerpc/mm/ptdump/8xx.c
@@ -75,8 +75,10 @@ static const struct flag_info flag_array[] = {
 };
 
 struct pgtable_level pg_level[5] = {
-	{
-	}, { /* pgd */
+	{ /* pgd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* p4d */
 		.flag	= flag_array,
 		.num	= ARRAY_SIZE(flag_array),
 	}, { /* pud */
diff --git a/arch/powerpc/mm/ptdump/book3s64.c b/arch/powerpc/mm/ptdump/book3s64.c
index 14f73868db66..5ad92d9dc5d1 100644
--- a/arch/powerpc/mm/ptdump/book3s64.c
+++ b/arch/powerpc/mm/ptdump/book3s64.c
@@ -103,8 +103,10 @@ static const struct flag_info flag_array[] = {
 };
 
 struct pgtable_level pg_level[5] = {
-	{
-	}, { /* pgd */
+	{ /* pgd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* p4d */
 		.flag	= flag_array,
 		.num	= ARRAY_SIZE(flag_array),
 	}, { /* pud */
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 3eb8732641da..fb531bc64fc5 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -58,7 +58,7 @@ struct pg_state {
 	const struct addr_marker *marker;
 	unsigned long start_address;
 	unsigned long start_pa;
-	unsigned int level;
+	int level;
 	u64 current_flags;
 	bool check_wx;
 	unsigned long wx_pages;
@@ -188,10 +188,9 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
 	st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
 }
 
-static void note_page_update_state(struct pg_state *st, unsigned long addr,
-				   unsigned int level, u64 val)
+static void note_page_update_state(struct pg_state *st, unsigned long addr, int level, u64 val)
 {
-	u64 flag = val & pg_level[level].mask;
+	u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
 	u64 pa = val & PTE_RPN_MASK;
 
 	st->level = level;
@@ -206,12 +205,12 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr,
 }
 
 static void note_page(struct pg_state *st, unsigned long addr,
-	       unsigned int level, u64 val, unsigned long page_size)
+		      int level, u64 val, unsigned long page_size)
 {
-	u64 flag = val & pg_level[level].mask;
+	u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
 
 	/* At first no level is set */
-	if (!st->level) {
+	if (st->level == -1) {
 		pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
 		note_page_update_state(st, addr, level, val);
 	/*
@@ -383,6 +382,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 	struct pg_state st = {
 		.seq = m,
 		.marker = address_markers,
+		.level = -1,
 		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
 	};
 
@@ -393,7 +393,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 
 	/* Traverse kernel page tables */
 	walk_pagetables(&st);
-	note_page(&st, 0, 0, 0, 0);
+	note_page(&st, 0, -1, 0, 0);
 	return 0;
 }
 
@@ -415,6 +415,7 @@ void ptdump_check_wx(void)
 	struct pg_state st = {
 		.seq = NULL,
 		.marker = address_markers,
+		.level = -1,
 		.check_wx = true,
 		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
 	};
diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c
index c005fe041c18..03607ab90c66 100644
--- a/arch/powerpc/mm/ptdump/shared.c
+++ b/arch/powerpc/mm/ptdump/shared.c
@@ -68,8 +68,10 @@ static const struct flag_info flag_array[] = {
 };
 
 struct pgtable_level pg_level[5] = {
-	{
-	}, { /* pgd */
+	{ /* pgd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* p4d */
 		.flag	= flag_array,
 		.num	= ARRAY_SIZE(flag_array),
 	}, { /* pud */
-- 
2.25.0


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
  2021-07-08 16:49 ` Christophe Leroy
@ 2021-07-08 16:49   ` Christophe Leroy
  -1 siblings, 0 replies; 25+ messages in thread
From: Christophe Leroy @ 2021-07-08 16:49 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

This patch converts powerpc to the generic PTDUMP implementation.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v4: Reworked init of ptdump range
---
 arch/powerpc/Kconfig            |   2 +
 arch/powerpc/Kconfig.debug      |  30 -------
 arch/powerpc/mm/Makefile        |   2 +-
 arch/powerpc/mm/mmu_decl.h      |   2 +-
 arch/powerpc/mm/ptdump/Makefile |   9 +-
 arch/powerpc/mm/ptdump/ptdump.c | 146 ++++++++------------------------
 6 files changed, 47 insertions(+), 144 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 0104345d0a65..dc1ab533a1cf 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -123,6 +123,7 @@ config PPC
 	select ARCH_HAS_COPY_MC			if PPC64
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEBUG_VM_PGTABLE
+	select ARCH_HAS_DEBUG_WX		if STRICT_KERNEL_RWX
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_DMA_MAP_DIRECT 		if PPC_PSERIES
 	select ARCH_HAS_ELF_RANDOMIZE
@@ -182,6 +183,7 @@ config PPC
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW_LEVEL
 	select GENERIC_PCI_IOMAP		if PCI
+	select GENERIC_PTDUMP
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 205cd77f321f..192f0ed0097f 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -365,36 +365,6 @@ config FAIL_IOMMU
 
 	  If you are unsure, say N.
 
-config PPC_PTDUMP
-	bool "Export kernel pagetable layout to userspace via debugfs"
-	depends on DEBUG_KERNEL && DEBUG_FS
-	help
-	  This option exports the state of the kernel pagetables to a
-	  debugfs file. This is only useful for kernel developers who are
-	  working in architecture specific areas of the kernel - probably
-	  not a good idea to enable this feature in a production kernel.
-
-	  If you are unsure, say N.
-
-config PPC_DEBUG_WX
-	bool "Warn on W+X mappings at boot"
-	depends on PPC_PTDUMP && STRICT_KERNEL_RWX
-	help
-	  Generate a warning if any W+X mappings are found at boot.
-
-	  This is useful for discovering cases where the kernel is leaving
-	  W+X mappings after applying NX, as such mappings are a security risk.
-
-	  Note that even if the check fails, your kernel is possibly
-	  still fine, as W+X mappings are not a security hole in
-	  themselves, what they do is that they make the exploitation
-	  of other unfixed kernel bugs easier.
-
-	  There is no runtime or memory usage effect of this option
-	  once the kernel has booted up - it's a one time check.
-
-	  If in doubt, say "Y".
-
 config PPC_FAST_ENDIAN_SWITCH
 	bool "Deprecated fast endian-switch syscall"
 	depends on DEBUG_KERNEL && PPC_BOOK3S_64
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index eae4ec2988fc..df8172da2301 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -18,5 +18,5 @@ obj-$(CONFIG_PPC_MM_SLICES)	+= slice.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
 obj-$(CONFIG_PPC_COPRO_BASE)	+= copro_fault.o
-obj-$(CONFIG_PPC_PTDUMP)	+= ptdump/
+obj-$(CONFIG_PTDUMP_CORE)	+= ptdump/
 obj-$(CONFIG_KASAN)		+= kasan/
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 7dac910c0b21..dd1cabc2ea0f 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -180,7 +180,7 @@ static inline void mmu_mark_rodata_ro(void) { }
 void __init mmu_mapin_immr(void);
 #endif
 
-#ifdef CONFIG_PPC_DEBUG_WX
+#ifdef CONFIG_DEBUG_WX
 void ptdump_check_wx(void);
 #else
 static inline void ptdump_check_wx(void) { }
diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile
index 712762be3cb1..4050cbb55acf 100644
--- a/arch/powerpc/mm/ptdump/Makefile
+++ b/arch/powerpc/mm/ptdump/Makefile
@@ -5,5 +5,10 @@ obj-y	+= ptdump.o
 obj-$(CONFIG_4xx)		+= shared.o
 obj-$(CONFIG_PPC_8xx)		+= 8xx.o
 obj-$(CONFIG_PPC_BOOK3E_MMU)	+= shared.o
-obj-$(CONFIG_PPC_BOOK3S_32)	+= shared.o bats.o segment_regs.o
-obj-$(CONFIG_PPC_BOOK3S_64)	+= book3s64.o hashpagetable.o
+obj-$(CONFIG_PPC_BOOK3S_32)	+= shared.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= book3s64.o
+
+ifdef CONFIG_PTDUMP_DEBUGFS
+obj-$(CONFIG_PPC_BOOK3S_32)	+= bats.o segment_regs.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= hashpagetable.o
+endif
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index fb531bc64fc5..2d80d775d15e 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -16,6 +16,7 @@
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
+#include <linux/ptdump.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <asm/fixmap.h>
@@ -54,6 +55,7 @@
  *
  */
 struct pg_state {
+	struct ptdump_state ptdump;
 	struct seq_file *seq;
 	const struct addr_marker *marker;
 	unsigned long start_address;
@@ -102,6 +104,11 @@ static struct addr_marker address_markers[] = {
 	{ -1,	NULL },
 };
 
+static struct ptdump_range ptdump_range[] __ro_after_init = {
+	{TASK_SIZE_MAX, ~0UL},
+	{0, 0}
+};
+
 #define pt_dump_seq_printf(m, fmt, args...)	\
 ({						\
 	if (m)					\
@@ -204,10 +211,10 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr, int
 	}
 }
 
-static void note_page(struct pg_state *st, unsigned long addr,
-		      int level, u64 val, unsigned long page_size)
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
 {
 	u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
+	struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
 
 	/* At first no level is set */
 	if (st->level == -1) {
@@ -245,94 +252,6 @@ static void note_page(struct pg_state *st, unsigned long addr,
 	}
 }
 
-static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
-{
-	pte_t *pte = pte_offset_kernel(pmd, 0);
-	unsigned long addr;
-	unsigned int i;
-
-	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
-		addr = start + i * PAGE_SIZE;
-		note_page(st, addr, 4, pte_val(*pte), PAGE_SIZE);
-
-	}
-}
-
-static void walk_hugepd(struct pg_state *st, hugepd_t *phpd, unsigned long start,
-			int pdshift, int level)
-{
-#ifdef CONFIG_ARCH_HAS_HUGEPD
-	unsigned int i;
-	int shift = hugepd_shift(*phpd);
-	int ptrs_per_hpd = pdshift - shift > 0 ? 1 << (pdshift - shift) : 1;
-
-	if (start & ((1 << shift) - 1))
-		return;
-
-	for (i = 0; i < ptrs_per_hpd; i++) {
-		unsigned long addr = start + (i << shift);
-		pte_t *pte = hugepte_offset(*phpd, addr, pdshift);
-
-		note_page(st, addr, level + 1, pte_val(*pte), 1 << shift);
-	}
-#endif
-}
-
-static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
-{
-	pmd_t *pmd = pmd_offset(pud, 0);
-	unsigned long addr;
-	unsigned int i;
-
-	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
-		addr = start + i * PMD_SIZE;
-		if (!pmd_none(*pmd) && !pmd_is_leaf(*pmd))
-			/* pmd exists */
-			walk_pte(st, pmd, addr);
-		else
-			note_page(st, addr, 3, pmd_val(*pmd), PMD_SIZE);
-	}
-}
-
-static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start)
-{
-	pud_t *pud = pud_offset(p4d, 0);
-	unsigned long addr;
-	unsigned int i;
-
-	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
-		addr = start + i * PUD_SIZE;
-		if (!pud_none(*pud) && !pud_is_leaf(*pud))
-			/* pud exists */
-			walk_pmd(st, pud, addr);
-		else
-			note_page(st, addr, 2, pud_val(*pud), PUD_SIZE);
-	}
-}
-
-static void walk_pagetables(struct pg_state *st)
-{
-	unsigned int i;
-	unsigned long addr = st->start_address & PGDIR_MASK;
-	pgd_t *pgd = pgd_offset_k(addr);
-
-	/*
-	 * Traverse the linux pagetable structure and dump pages that are in
-	 * the hash pagetable.
-	 */
-	for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
-		p4d_t *p4d = p4d_offset(pgd, 0);
-
-		if (p4d_none(*p4d) || p4d_is_leaf(*p4d))
-			note_page(st, addr, 1, p4d_val(*p4d), PGDIR_SIZE);
-		else if (is_hugepd(__hugepd(p4d_val(*p4d))))
-			walk_hugepd(st, (hugepd_t *)p4d, addr, PGDIR_SHIFT, 1);
-		else
-			/* p4d exists */
-			walk_pud(st, p4d, addr);
-	}
-}
-
 static void populate_markers(void)
 {
 	int i = 0;
@@ -383,17 +302,14 @@ static int ptdump_show(struct seq_file *m, void *v)
 		.seq = m,
 		.marker = address_markers,
 		.level = -1,
-		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
+		.ptdump = {
+			.note_page = note_page,
+			.range = ptdump_range,
+		}
 	};
 
-#ifdef CONFIG_PPC64
-	if (!radix_enabled())
-		st.start_address = KERN_VIRT_START;
-#endif
-
 	/* Traverse kernel page tables */
-	walk_pagetables(&st);
-	note_page(&st, 0, -1, 0, 0);
+	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
 	return 0;
 }
 
@@ -409,23 +325,24 @@ static void build_pgtable_complete_mask(void)
 				pg_level[i].mask |= pg_level[i].flag[j].mask;
 }
 
-#ifdef CONFIG_PPC_DEBUG_WX
+#ifdef CONFIG_DEBUG_WX
 void ptdump_check_wx(void)
 {
 	struct pg_state st = {
 		.seq = NULL,
-		.marker = address_markers,
+		.marker = (struct addr_marker[]) {
+			{ 0, NULL},
+			{ -1, NULL},
+		},
 		.level = -1,
 		.check_wx = true,
-		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
+		.ptdump = {
+			.note_page = note_page,
+			.range = ptdump_range,
+		}
 	};
 
-#ifdef CONFIG_PPC64
-	if (!radix_enabled())
-		st.start_address = KERN_VIRT_START;
-#endif
-
-	walk_pagetables(&st);
+	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
 
 	if (st.wx_pages)
 		pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
@@ -435,12 +352,21 @@ void ptdump_check_wx(void)
 }
 #endif
 
-static int ptdump_init(void)
+static int __init ptdump_init(void)
 {
+#ifdef CONFIG_PPC64
+	if (!radix_enabled())
+		ptdump_range[0].start = KERN_VIRT_START;
+	else
+		ptdump_range[0].start = PAGE_OFFSET;
+#endif
+
 	populate_markers();
 	build_pgtable_complete_mask();
-	debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
-			    &ptdump_fops);
+
+	if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS))
+		debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
+
 	return 0;
 }
 device_initcall(ptdump_init);
-- 
2.25.0


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
@ 2021-07-08 16:49   ` Christophe Leroy
  0 siblings, 0 replies; 25+ messages in thread
From: Christophe Leroy @ 2021-07-08 16:49 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

This patch converts powerpc to the generic PTDUMP implementation.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v4: Reworked init of ptdump range
---
 arch/powerpc/Kconfig            |   2 +
 arch/powerpc/Kconfig.debug      |  30 -------
 arch/powerpc/mm/Makefile        |   2 +-
 arch/powerpc/mm/mmu_decl.h      |   2 +-
 arch/powerpc/mm/ptdump/Makefile |   9 +-
 arch/powerpc/mm/ptdump/ptdump.c | 146 ++++++++------------------------
 6 files changed, 47 insertions(+), 144 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 0104345d0a65..dc1ab533a1cf 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -123,6 +123,7 @@ config PPC
 	select ARCH_HAS_COPY_MC			if PPC64
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEBUG_VM_PGTABLE
+	select ARCH_HAS_DEBUG_WX		if STRICT_KERNEL_RWX
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_DMA_MAP_DIRECT 		if PPC_PSERIES
 	select ARCH_HAS_ELF_RANDOMIZE
@@ -182,6 +183,7 @@ config PPC
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW_LEVEL
 	select GENERIC_PCI_IOMAP		if PCI
+	select GENERIC_PTDUMP
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 205cd77f321f..192f0ed0097f 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -365,36 +365,6 @@ config FAIL_IOMMU
 
 	  If you are unsure, say N.
 
-config PPC_PTDUMP
-	bool "Export kernel pagetable layout to userspace via debugfs"
-	depends on DEBUG_KERNEL && DEBUG_FS
-	help
-	  This option exports the state of the kernel pagetables to a
-	  debugfs file. This is only useful for kernel developers who are
-	  working in architecture specific areas of the kernel - probably
-	  not a good idea to enable this feature in a production kernel.
-
-	  If you are unsure, say N.
-
-config PPC_DEBUG_WX
-	bool "Warn on W+X mappings at boot"
-	depends on PPC_PTDUMP && STRICT_KERNEL_RWX
-	help
-	  Generate a warning if any W+X mappings are found at boot.
-
-	  This is useful for discovering cases where the kernel is leaving
-	  W+X mappings after applying NX, as such mappings are a security risk.
-
-	  Note that even if the check fails, your kernel is possibly
-	  still fine, as W+X mappings are not a security hole in
-	  themselves, what they do is that they make the exploitation
-	  of other unfixed kernel bugs easier.
-
-	  There is no runtime or memory usage effect of this option
-	  once the kernel has booted up - it's a one time check.
-
-	  If in doubt, say "Y".
-
 config PPC_FAST_ENDIAN_SWITCH
 	bool "Deprecated fast endian-switch syscall"
 	depends on DEBUG_KERNEL && PPC_BOOK3S_64
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index eae4ec2988fc..df8172da2301 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -18,5 +18,5 @@ obj-$(CONFIG_PPC_MM_SLICES)	+= slice.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
 obj-$(CONFIG_PPC_COPRO_BASE)	+= copro_fault.o
-obj-$(CONFIG_PPC_PTDUMP)	+= ptdump/
+obj-$(CONFIG_PTDUMP_CORE)	+= ptdump/
 obj-$(CONFIG_KASAN)		+= kasan/
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 7dac910c0b21..dd1cabc2ea0f 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -180,7 +180,7 @@ static inline void mmu_mark_rodata_ro(void) { }
 void __init mmu_mapin_immr(void);
 #endif
 
-#ifdef CONFIG_PPC_DEBUG_WX
+#ifdef CONFIG_DEBUG_WX
 void ptdump_check_wx(void);
 #else
 static inline void ptdump_check_wx(void) { }
diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile
index 712762be3cb1..4050cbb55acf 100644
--- a/arch/powerpc/mm/ptdump/Makefile
+++ b/arch/powerpc/mm/ptdump/Makefile
@@ -5,5 +5,10 @@ obj-y	+= ptdump.o
 obj-$(CONFIG_4xx)		+= shared.o
 obj-$(CONFIG_PPC_8xx)		+= 8xx.o
 obj-$(CONFIG_PPC_BOOK3E_MMU)	+= shared.o
-obj-$(CONFIG_PPC_BOOK3S_32)	+= shared.o bats.o segment_regs.o
-obj-$(CONFIG_PPC_BOOK3S_64)	+= book3s64.o hashpagetable.o
+obj-$(CONFIG_PPC_BOOK3S_32)	+= shared.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= book3s64.o
+
+ifdef CONFIG_PTDUMP_DEBUGFS
+obj-$(CONFIG_PPC_BOOK3S_32)	+= bats.o segment_regs.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= hashpagetable.o
+endif
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index fb531bc64fc5..2d80d775d15e 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -16,6 +16,7 @@
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
+#include <linux/ptdump.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <asm/fixmap.h>
@@ -54,6 +55,7 @@
  *
  */
 struct pg_state {
+	struct ptdump_state ptdump;
 	struct seq_file *seq;
 	const struct addr_marker *marker;
 	unsigned long start_address;
@@ -102,6 +104,11 @@ static struct addr_marker address_markers[] = {
 	{ -1,	NULL },
 };
 
+static struct ptdump_range ptdump_range[] __ro_after_init = {
+	{TASK_SIZE_MAX, ~0UL},
+	{0, 0}
+};
+
 #define pt_dump_seq_printf(m, fmt, args...)	\
 ({						\
 	if (m)					\
@@ -204,10 +211,10 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr, int
 	}
 }
 
-static void note_page(struct pg_state *st, unsigned long addr,
-		      int level, u64 val, unsigned long page_size)
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
 {
 	u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
+	struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
 
 	/* At first no level is set */
 	if (st->level == -1) {
@@ -245,94 +252,6 @@ static void note_page(struct pg_state *st, unsigned long addr,
 	}
 }
 
-static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
-{
-	pte_t *pte = pte_offset_kernel(pmd, 0);
-	unsigned long addr;
-	unsigned int i;
-
-	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
-		addr = start + i * PAGE_SIZE;
-		note_page(st, addr, 4, pte_val(*pte), PAGE_SIZE);
-
-	}
-}
-
-static void walk_hugepd(struct pg_state *st, hugepd_t *phpd, unsigned long start,
-			int pdshift, int level)
-{
-#ifdef CONFIG_ARCH_HAS_HUGEPD
-	unsigned int i;
-	int shift = hugepd_shift(*phpd);
-	int ptrs_per_hpd = pdshift - shift > 0 ? 1 << (pdshift - shift) : 1;
-
-	if (start & ((1 << shift) - 1))
-		return;
-
-	for (i = 0; i < ptrs_per_hpd; i++) {
-		unsigned long addr = start + (i << shift);
-		pte_t *pte = hugepte_offset(*phpd, addr, pdshift);
-
-		note_page(st, addr, level + 1, pte_val(*pte), 1 << shift);
-	}
-#endif
-}
-
-static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
-{
-	pmd_t *pmd = pmd_offset(pud, 0);
-	unsigned long addr;
-	unsigned int i;
-
-	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
-		addr = start + i * PMD_SIZE;
-		if (!pmd_none(*pmd) && !pmd_is_leaf(*pmd))
-			/* pmd exists */
-			walk_pte(st, pmd, addr);
-		else
-			note_page(st, addr, 3, pmd_val(*pmd), PMD_SIZE);
-	}
-}
-
-static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start)
-{
-	pud_t *pud = pud_offset(p4d, 0);
-	unsigned long addr;
-	unsigned int i;
-
-	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
-		addr = start + i * PUD_SIZE;
-		if (!pud_none(*pud) && !pud_is_leaf(*pud))
-			/* pud exists */
-			walk_pmd(st, pud, addr);
-		else
-			note_page(st, addr, 2, pud_val(*pud), PUD_SIZE);
-	}
-}
-
-static void walk_pagetables(struct pg_state *st)
-{
-	unsigned int i;
-	unsigned long addr = st->start_address & PGDIR_MASK;
-	pgd_t *pgd = pgd_offset_k(addr);
-
-	/*
-	 * Traverse the linux pagetable structure and dump pages that are in
-	 * the hash pagetable.
-	 */
-	for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
-		p4d_t *p4d = p4d_offset(pgd, 0);
-
-		if (p4d_none(*p4d) || p4d_is_leaf(*p4d))
-			note_page(st, addr, 1, p4d_val(*p4d), PGDIR_SIZE);
-		else if (is_hugepd(__hugepd(p4d_val(*p4d))))
-			walk_hugepd(st, (hugepd_t *)p4d, addr, PGDIR_SHIFT, 1);
-		else
-			/* p4d exists */
-			walk_pud(st, p4d, addr);
-	}
-}
-
 static void populate_markers(void)
 {
 	int i = 0;
@@ -383,17 +302,14 @@ static int ptdump_show(struct seq_file *m, void *v)
 		.seq = m,
 		.marker = address_markers,
 		.level = -1,
-		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
+		.ptdump = {
+			.note_page = note_page,
+			.range = ptdump_range,
+		}
 	};
 
-#ifdef CONFIG_PPC64
-	if (!radix_enabled())
-		st.start_address = KERN_VIRT_START;
-#endif
-
 	/* Traverse kernel page tables */
-	walk_pagetables(&st);
-	note_page(&st, 0, -1, 0, 0);
+	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
 	return 0;
 }
 
@@ -409,23 +325,24 @@ static void build_pgtable_complete_mask(void)
 				pg_level[i].mask |= pg_level[i].flag[j].mask;
 }
 
-#ifdef CONFIG_PPC_DEBUG_WX
+#ifdef CONFIG_DEBUG_WX
 void ptdump_check_wx(void)
 {
 	struct pg_state st = {
 		.seq = NULL,
-		.marker = address_markers,
+		.marker = (struct addr_marker[]) {
+			{ 0, NULL},
+			{ -1, NULL},
+		},
 		.level = -1,
 		.check_wx = true,
-		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
+		.ptdump = {
+			.note_page = note_page,
+			.range = ptdump_range,
+		}
 	};
 
-#ifdef CONFIG_PPC64
-	if (!radix_enabled())
-		st.start_address = KERN_VIRT_START;
-#endif
-
-	walk_pagetables(&st);
+	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
 
 	if (st.wx_pages)
 		pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
@@ -435,12 +352,21 @@ void ptdump_check_wx(void)
 }
 #endif
 
-static int ptdump_init(void)
+static int __init ptdump_init(void)
 {
+#ifdef CONFIG_PPC64
+	if (!radix_enabled())
+		ptdump_range[0].start = KERN_VIRT_START;
+	else
+		ptdump_range[0].start = PAGE_OFFSET;
+#endif
+
 	populate_markers();
 	build_pgtable_complete_mask();
-	debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
-			    &ptdump_fops);
+
+	if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS))
+		debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
+
 	return 0;
 }
 device_initcall(ptdump_init);
-- 
2.25.0


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 1/4] powerpc/ptdump: Use DEFINE_SHOW_ATTRIBUTE()
  2021-07-08 16:49 ` Christophe Leroy
                   ` (3 preceding siblings ...)
  (?)
@ 2021-08-27 13:16 ` Michael Ellerman
  -1 siblings, 0 replies; 25+ messages in thread
From: Michael Ellerman @ 2021-08-27 13:16 UTC (permalink / raw)
  To: Michael Ellerman, Christophe Leroy, Paul Mackerras,
	Benjamin Herrenschmidt
  Cc: linuxppc-dev, linux-kernel

On Thu, 8 Jul 2021 16:49:40 +0000 (UTC), Christophe Leroy wrote:
> Use DEFINE_SHOW_ATTRIBUTE() instead of open coding
> open() and fops.
> 
> 
> 
> 

Applied to powerpc/next.

[1/4] powerpc/ptdump: Use DEFINE_SHOW_ATTRIBUTE()
      https://git.kernel.org/powerpc/c/11f27a7fa4ca27935de74e3eb052bdc430d5f8d8
[2/4] powerpc/ptdump: Remove unused 'page_size' parameter
      https://git.kernel.org/powerpc/c/64b87b0c70e0fd28352895cba3c0a9631e0072dd
[3/4] powerpc/ptdump: Reduce level numbers by 1 in note_page() and add p4d level
      https://git.kernel.org/powerpc/c/cf98d2b6eea6a1b2c43f85680ad58fcc3ea9496b
[4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
      https://git.kernel.org/powerpc/c/e084728393a58e7fdafeee2e6b20e0faff09b557

cheers

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
  2021-07-08 16:49   ` Christophe Leroy
@ 2021-08-29 18:55     ` Nathan Chancellor
  -1 siblings, 0 replies; 25+ messages in thread
From: Nathan Chancellor @ 2021-08-29 18:55 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	linux-kernel, linuxppc-dev

Hi Christophe,

On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
> This patch converts powerpc to the generic PTDUMP implementation.
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>

This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
config [1] when booting up in QEMU with [2]:

[    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
[    1.623058] Faulting instruction address: 0xc00000000045e5fc
[    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
[    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
[    1.625015] Modules linked in:
[    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
[    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
[    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
[    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
[    1.628449] CFAR: c000000000518300 IRQMASK: 0
[    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
[    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
[    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
[    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
[    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
[    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
[    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
[    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
[    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
[    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
[    1.635755] Call Trace:
[    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
[    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
[    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
[    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
[    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
[    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
[    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
[    1.640597] Instruction dump:
[    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
[    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
[    1.642771] ---[ end trace 6cf72b085097ad52 ]---
[    1.643220]
[    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
[    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---

This is not compiler specific, I can reproduce it with GCC 11.2.0 and
binutils 2.37. If there is any additional information I can provide,
please let me know.

[1]: https://src.fedoraproject.org/rpms/kernel/raw/rawhide/f/kernel-ppc64le-fedora.config
[2]: https://github.com/ClangBuiltLinux/boot-utils

Cheers,
Nathan

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
@ 2021-08-29 18:55     ` Nathan Chancellor
  0 siblings, 0 replies; 25+ messages in thread
From: Nathan Chancellor @ 2021-08-29 18:55 UTC (permalink / raw)
  To: Christophe Leroy; +Cc: linuxppc-dev, Paul Mackerras, linux-kernel

Hi Christophe,

On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
> This patch converts powerpc to the generic PTDUMP implementation.
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>

This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
config [1] when booting up in QEMU with [2]:

[    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
[    1.623058] Faulting instruction address: 0xc00000000045e5fc
[    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
[    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
[    1.625015] Modules linked in:
[    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
[    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
[    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
[    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
[    1.628449] CFAR: c000000000518300 IRQMASK: 0
[    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
[    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
[    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
[    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
[    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
[    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
[    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
[    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
[    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
[    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
[    1.635755] Call Trace:
[    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
[    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
[    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
[    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
[    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
[    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
[    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
[    1.640597] Instruction dump:
[    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
[    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
[    1.642771] ---[ end trace 6cf72b085097ad52 ]---
[    1.643220]
[    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
[    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---

This is not compiler specific, I can reproduce it with GCC 11.2.0 and
binutils 2.37. If there is any additional information I can provide,
please let me know.

[1]: https://src.fedoraproject.org/rpms/kernel/raw/rawhide/f/kernel-ppc64le-fedora.config
[2]: https://github.com/ClangBuiltLinux/boot-utils

Cheers,
Nathan

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
  2021-08-29 18:55     ` Nathan Chancellor
@ 2021-08-29 19:11       ` Christophe Leroy
  -1 siblings, 0 replies; 25+ messages in thread
From: Christophe Leroy @ 2021-08-29 19:11 UTC (permalink / raw)
  To: Nathan Chancellor
  Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	linux-kernel, linuxppc-dev

Hi Nathan,

Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
> Hi Christophe,
> 
> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>> This patch converts powerpc to the generic PTDUMP implementation.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> 
> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
> config [1] when booting up in QEMU with [2]:
> 
> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
> [    1.625015] Modules linked in:
> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
> [    1.635755] Call Trace:
> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
> [    1.640597] Instruction dump:
> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
> [    1.643220]
> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
> 
> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
> binutils 2.37. If there is any additional information I can provide,
> please let me know.

Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.

Thanks
Christophe


> 
> [1]: https://src.fedoraproject.org/rpms/kernel/raw/rawhide/f/kernel-ppc64le-fedora.config
> [2]: https://github.com/ClangBuiltLinux/boot-utils
> 
> Cheers,
> Nathan
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
@ 2021-08-29 19:11       ` Christophe Leroy
  0 siblings, 0 replies; 25+ messages in thread
From: Christophe Leroy @ 2021-08-29 19:11 UTC (permalink / raw)
  To: Nathan Chancellor; +Cc: linuxppc-dev, Paul Mackerras, linux-kernel

Hi Nathan,

Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
> Hi Christophe,
> 
> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>> This patch converts powerpc to the generic PTDUMP implementation.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> 
> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
> config [1] when booting up in QEMU with [2]:
> 
> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
> [    1.625015] Modules linked in:
> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
> [    1.635755] Call Trace:
> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
> [    1.640597] Instruction dump:
> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
> [    1.643220]
> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
> 
> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
> binutils 2.37. If there is any additional information I can provide,
> please let me know.

Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.

Thanks
Christophe


> 
> [1]: https://src.fedoraproject.org/rpms/kernel/raw/rawhide/f/kernel-ppc64le-fedora.config
> [2]: https://github.com/ClangBuiltLinux/boot-utils
> 
> Cheers,
> Nathan
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
  2021-08-29 19:11       ` Christophe Leroy
@ 2021-08-29 21:39         ` Nathan Chancellor
  -1 siblings, 0 replies; 25+ messages in thread
From: Nathan Chancellor @ 2021-08-29 21:39 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	linux-kernel, linuxppc-dev

On Sun, Aug 29, 2021 at 09:11:47PM +0200, Christophe Leroy wrote:
> Hi Nathan,
> 
> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
> > Hi Christophe,
> > 
> > On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
> > > This patch converts powerpc to the generic PTDUMP implementation.
> > > 
> > > Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> > 
> > This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
> > GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
> > config [1] when booting up in QEMU with [2]:
> > 
> > [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
> > [    1.623058] Faulting instruction address: 0xc00000000045e5fc
> > [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
> > [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
> > [    1.625015] Modules linked in:
> > [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
> > [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
> > [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
> > [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
> > [    1.628449] CFAR: c000000000518300 IRQMASK: 0
> > [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
> > [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
> > [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
> > [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
> > [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
> > [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
> > [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
> > [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
> > [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
> > [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
> > [    1.635755] Call Trace:
> > [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
> > [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
> > [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
> > [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
> > [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
> > [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
> > [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
> > [    1.640597] Instruction dump:
> > [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
> > [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
> > [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
> > [    1.643220]
> > [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
> > [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
> > 
> > This is not compiler specific, I can reproduce it with GCC 11.2.0 and
> > binutils 2.37. If there is any additional information I can provide,
> > please let me know.
> 
> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.

Sure thing!

Disassembly of mm/pagewalk.o: https://gist.github.com/2cc2cadc598fe55b0f5cea0d75e89186

vmlinux binary (zstd compressed, 123MB): https://1drv.ms/u/s!AsQNYeB-IEbqjjai5EiHUBiPYzI3?e=kqUwpN

Cheers,
Nathan

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
@ 2021-08-29 21:39         ` Nathan Chancellor
  0 siblings, 0 replies; 25+ messages in thread
From: Nathan Chancellor @ 2021-08-29 21:39 UTC (permalink / raw)
  To: Christophe Leroy; +Cc: linuxppc-dev, Paul Mackerras, linux-kernel

On Sun, Aug 29, 2021 at 09:11:47PM +0200, Christophe Leroy wrote:
> Hi Nathan,
> 
> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
> > Hi Christophe,
> > 
> > On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
> > > This patch converts powerpc to the generic PTDUMP implementation.
> > > 
> > > Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> > 
> > This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
> > GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
> > config [1] when booting up in QEMU with [2]:
> > 
> > [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
> > [    1.623058] Faulting instruction address: 0xc00000000045e5fc
> > [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
> > [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
> > [    1.625015] Modules linked in:
> > [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
> > [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
> > [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
> > [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
> > [    1.628449] CFAR: c000000000518300 IRQMASK: 0
> > [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
> > [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
> > [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
> > [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
> > [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
> > [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
> > [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
> > [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
> > [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
> > [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
> > [    1.635755] Call Trace:
> > [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
> > [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
> > [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
> > [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
> > [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
> > [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
> > [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
> > [    1.640597] Instruction dump:
> > [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
> > [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
> > [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
> > [    1.643220]
> > [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
> > [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
> > 
> > This is not compiler specific, I can reproduce it with GCC 11.2.0 and
> > binutils 2.37. If there is any additional information I can provide,
> > please let me know.
> 
> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.

Sure thing!

Disassembly of mm/pagewalk.o: https://gist.github.com/2cc2cadc598fe55b0f5cea0d75e89186

vmlinux binary (zstd compressed, 123MB): https://1drv.ms/u/s!AsQNYeB-IEbqjjai5EiHUBiPYzI3?e=kqUwpN

Cheers,
Nathan

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
  2021-08-29 19:11       ` Christophe Leroy
@ 2021-08-30  7:52         ` Michael Ellerman
  -1 siblings, 0 replies; 25+ messages in thread
From: Michael Ellerman @ 2021-08-30  7:52 UTC (permalink / raw)
  To: Christophe Leroy, Nathan Chancellor
  Cc: Benjamin Herrenschmidt, Paul Mackerras, linux-kernel, linuxppc-dev

Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> Hi Nathan,
>
> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>> Hi Christophe,
>> 
>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> 
>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>> config [1] when booting up in QEMU with [2]:
>> 
>> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
>> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>> [    1.625015] Modules linked in:
>> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
>> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
>> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
>> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>> [    1.635755] Call Trace:
>> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>> [    1.640597] Instruction dump:
>> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
>> [    1.643220]
>> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>> 
>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>> binutils 2.37. If there is any additional information I can provide,
>> please let me know.
>
> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.

It seems to be walking of the end of the pgd.

[    3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
[    3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000	<- end of pgd at PAGE_OFFSET + 4PB
[    3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000
[    3.373957] walk_p4d_range: addr c010008000000000 end c010010000000000
[    3.374009] walk_p4d_range: addr c010010000000000 end c010018000000000
[    3.374060] walk_p4d_range: addr c010018000000000 end c010020000000000
[    3.376727] walk_p4d_range: addr c010020000000000 end c010028000000000
[    3.376780] walk_p4d_range: addr c010028000000000 end c010030000000000
[    3.376831] walk_p4d_range: addr c010030000000000 end c010038000000000
[    3.376883] walk_p4d_range: addr c010038000000000 end c010040000000000
[    3.376935] walk_p4d_range: addr c010040000000000 end c010048000000000
[    3.376988] walk_p4d_range: addr c010048000000000 end c010050000000000
[    3.377039] walk_p4d_range: addr c010050000000000 end c010058000000000
[    3.377091] walk_p4d_range: addr c010058000000000 end c010060000000000
[    3.377143] walk_p4d_range: addr c010060000000000 end c010068000000000
[    3.377244] walk_pud_range: addr c010060000000000 end c010068000000000
[    3.377374] walk_pmd_range: addr c010060100000000 end c010060140000000
[    3.377817] BUG: Unable to handle kernel data access on read at 0xf906a038d8ba8400
[    3.378247] Faulting instruction address: 0xc00000000045b4a4
[    3.378725] Oops: Kernel access of bad area, sig: 11 [#1]
[    3.378843] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
[    3.379118] Modules linked in:
[    3.379422] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc2+ #75
[    3.379751] NIP:  c00000000045b4a4 LR: c00000000045b430 CTR: c000000000b4b580
[    3.379833] REGS: c0000000085637c0 TRAP: 0300   Not tainted  (5.14.0-rc2+)
[    3.379940] MSR:  8000000002009033 <SF,VEC,EE,ME,IR,DR,RI,LE>  CR: 8800228f  XER: 20040000
[    3.380284] CFAR: c0000000001f5744 DAR: f906a038d8ba8400 DSISR: 40000000 IRQMASK: 0
[    3.380284] GPR00: c00000000045b430 c000000008563a60 c0000000028a7d00 000000000000003a
[    3.380284] GPR04: 00000000ffffe14d c000000008563748 ffffffffffffffff 00000000000001ff
[    3.380284] GPR08: f906a038d8ba8400 c0100601001fffff c01006013fffffff fffffffffffc51e0
[    3.380284] GPR12: 0000000000002000 c0000000ffffee80 0000000000000001 c000000008563be0
[    3.380284] GPR16: c000000001198118 c0000000028daef8 c000000002971a60 c0000000014bc868
[    3.380284] GPR20: c010060100200000 c000000002971a58 c000000002971a68 c010060100000000
[    3.380284] GPR24: 0000000000000003 c000000001198118 c000000001198118 c000000001000020
[    3.380284] GPR28: 0000000000000000 c010060140000000 c010068000000000 f906a038d8ba8400
[    3.381235] NIP [c00000000045b4a4] __walk_page_range+0x7f4/0xbd0
[    3.381906] LR [c00000000045b430] __walk_page_range+0x780/0xbd0
[    3.382120] Call Trace:
[    3.382240] [c000000008563a60] [c00000000045b430] __walk_page_range+0x780/0xbd0 (unreliable)
[    3.382445] [c000000008563bc0] [c00000000045ba94] walk_page_range_novma+0x74/0xb0
[    3.382548] [c000000008563c10] [c000000000514cd8] ptdump_walk_pgd+0x98/0x170
[    3.382630] [c000000008563c60] [c0000000000aaf70] ptdump_check_wx+0xb0/0x100
[    3.382774] [c000000008563d40] [c00000000008db18] mark_rodata_ro+0x48/0x80
[    3.382849] [c000000008563da0] [c000000000012a18] kernel_init+0x78/0x1c0
[    3.382926] [c000000008563e10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
[    3.383092] Instruction dump:
[    3.383341] 78c8f00e 7fe84a14 e9360000 e94100a8 39290010 7dc94836 7e89ba14 7d2900d0
[    3.383516] 7e944838 3934ffff 7c295040 40800098 <e93f0000> 2c290000 40820094 e9990028
[    3.384126] ---[ end trace d8e6479034d7a9d1 ]---

cheers

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
@ 2021-08-30  7:52         ` Michael Ellerman
  0 siblings, 0 replies; 25+ messages in thread
From: Michael Ellerman @ 2021-08-30  7:52 UTC (permalink / raw)
  To: Christophe Leroy, Nathan Chancellor
  Cc: linuxppc-dev, Paul Mackerras, linux-kernel

Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> Hi Nathan,
>
> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>> Hi Christophe,
>> 
>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> 
>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>> config [1] when booting up in QEMU with [2]:
>> 
>> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
>> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>> [    1.625015] Modules linked in:
>> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
>> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
>> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
>> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>> [    1.635755] Call Trace:
>> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>> [    1.640597] Instruction dump:
>> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
>> [    1.643220]
>> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>> 
>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>> binutils 2.37. If there is any additional information I can provide,
>> please let me know.
>
> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.

It seems to be walking of the end of the pgd.

[    3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
[    3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000	<- end of pgd at PAGE_OFFSET + 4PB
[    3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000
[    3.373957] walk_p4d_range: addr c010008000000000 end c010010000000000
[    3.374009] walk_p4d_range: addr c010010000000000 end c010018000000000
[    3.374060] walk_p4d_range: addr c010018000000000 end c010020000000000
[    3.376727] walk_p4d_range: addr c010020000000000 end c010028000000000
[    3.376780] walk_p4d_range: addr c010028000000000 end c010030000000000
[    3.376831] walk_p4d_range: addr c010030000000000 end c010038000000000
[    3.376883] walk_p4d_range: addr c010038000000000 end c010040000000000
[    3.376935] walk_p4d_range: addr c010040000000000 end c010048000000000
[    3.376988] walk_p4d_range: addr c010048000000000 end c010050000000000
[    3.377039] walk_p4d_range: addr c010050000000000 end c010058000000000
[    3.377091] walk_p4d_range: addr c010058000000000 end c010060000000000
[    3.377143] walk_p4d_range: addr c010060000000000 end c010068000000000
[    3.377244] walk_pud_range: addr c010060000000000 end c010068000000000
[    3.377374] walk_pmd_range: addr c010060100000000 end c010060140000000
[    3.377817] BUG: Unable to handle kernel data access on read at 0xf906a038d8ba8400
[    3.378247] Faulting instruction address: 0xc00000000045b4a4
[    3.378725] Oops: Kernel access of bad area, sig: 11 [#1]
[    3.378843] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
[    3.379118] Modules linked in:
[    3.379422] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc2+ #75
[    3.379751] NIP:  c00000000045b4a4 LR: c00000000045b430 CTR: c000000000b4b580
[    3.379833] REGS: c0000000085637c0 TRAP: 0300   Not tainted  (5.14.0-rc2+)
[    3.379940] MSR:  8000000002009033 <SF,VEC,EE,ME,IR,DR,RI,LE>  CR: 8800228f  XER: 20040000
[    3.380284] CFAR: c0000000001f5744 DAR: f906a038d8ba8400 DSISR: 40000000 IRQMASK: 0
[    3.380284] GPR00: c00000000045b430 c000000008563a60 c0000000028a7d00 000000000000003a
[    3.380284] GPR04: 00000000ffffe14d c000000008563748 ffffffffffffffff 00000000000001ff
[    3.380284] GPR08: f906a038d8ba8400 c0100601001fffff c01006013fffffff fffffffffffc51e0
[    3.380284] GPR12: 0000000000002000 c0000000ffffee80 0000000000000001 c000000008563be0
[    3.380284] GPR16: c000000001198118 c0000000028daef8 c000000002971a60 c0000000014bc868
[    3.380284] GPR20: c010060100200000 c000000002971a58 c000000002971a68 c010060100000000
[    3.380284] GPR24: 0000000000000003 c000000001198118 c000000001198118 c000000001000020
[    3.380284] GPR28: 0000000000000000 c010060140000000 c010068000000000 f906a038d8ba8400
[    3.381235] NIP [c00000000045b4a4] __walk_page_range+0x7f4/0xbd0
[    3.381906] LR [c00000000045b430] __walk_page_range+0x780/0xbd0
[    3.382120] Call Trace:
[    3.382240] [c000000008563a60] [c00000000045b430] __walk_page_range+0x780/0xbd0 (unreliable)
[    3.382445] [c000000008563bc0] [c00000000045ba94] walk_page_range_novma+0x74/0xb0
[    3.382548] [c000000008563c10] [c000000000514cd8] ptdump_walk_pgd+0x98/0x170
[    3.382630] [c000000008563c60] [c0000000000aaf70] ptdump_check_wx+0xb0/0x100
[    3.382774] [c000000008563d40] [c00000000008db18] mark_rodata_ro+0x48/0x80
[    3.382849] [c000000008563da0] [c000000000012a18] kernel_init+0x78/0x1c0
[    3.382926] [c000000008563e10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
[    3.383092] Instruction dump:
[    3.383341] 78c8f00e 7fe84a14 e9360000 e94100a8 39290010 7dc94836 7e89ba14 7d2900d0
[    3.383516] 7e944838 3934ffff 7c295040 40800098 <e93f0000> 2c290000 40820094 e9990028
[    3.384126] ---[ end trace d8e6479034d7a9d1 ]---

cheers

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
  2021-08-30  7:52         ` Michael Ellerman
@ 2021-08-30  8:16           ` Christophe Leroy
  -1 siblings, 0 replies; 25+ messages in thread
From: Christophe Leroy @ 2021-08-30  8:16 UTC (permalink / raw)
  To: Michael Ellerman, Nathan Chancellor
  Cc: Benjamin Herrenschmidt, Paul Mackerras, linux-kernel, linuxppc-dev



Le 30/08/2021 à 09:52, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>> Hi Nathan,
>>
>> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>>> Hi Christophe,
>>>
>>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>>
>>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>>
>>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>>> config [1] when booting up in QEMU with [2]:
>>>
>>> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>>> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
>>> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>>> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>>> [    1.625015] Modules linked in:
>>> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>>> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>>> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
>>> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
>>> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
>>> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>>> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>>> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>>> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>>> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>>> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>>> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>>> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>>> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>>> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>>> [    1.635755] Call Trace:
>>> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>>> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>>> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>>> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>>> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>>> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>>> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>>> [    1.640597] Instruction dump:
>>> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>>> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>>> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
>>> [    1.643220]
>>> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>
>>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>>> binutils 2.37. If there is any additional information I can provide,
>>> please let me know.
>>
>> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.
> 
> It seems to be walking of the end of the pgd.
> 
> [    3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
> [    3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000	<- end of pgd at PAGE_OFFSET + 4PB
> [    3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000

Yes, I want it to walk from TASK_SIZE_MAX up to 0xffffffffffffffff :)

static struct ptdump_range ptdump_range[] __ro_after_init = {
	{TASK_SIZE_MAX, ~0UL},
	{0, 0}
};


Ok, well, ppc32 go up to 0xffffffff

What's the top address to be used for ppc64 ?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
@ 2021-08-30  8:16           ` Christophe Leroy
  0 siblings, 0 replies; 25+ messages in thread
From: Christophe Leroy @ 2021-08-30  8:16 UTC (permalink / raw)
  To: Michael Ellerman, Nathan Chancellor
  Cc: linuxppc-dev, Paul Mackerras, linux-kernel



Le 30/08/2021 à 09:52, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>> Hi Nathan,
>>
>> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>>> Hi Christophe,
>>>
>>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>>
>>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>>
>>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>>> config [1] when booting up in QEMU with [2]:
>>>
>>> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>>> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
>>> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>>> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>>> [    1.625015] Modules linked in:
>>> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>>> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>>> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
>>> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
>>> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
>>> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>>> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>>> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>>> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>>> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>>> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>>> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>>> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>>> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>>> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>>> [    1.635755] Call Trace:
>>> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>>> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>>> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>>> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>>> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>>> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>>> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>>> [    1.640597] Instruction dump:
>>> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>>> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>>> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
>>> [    1.643220]
>>> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>
>>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>>> binutils 2.37. If there is any additional information I can provide,
>>> please let me know.
>>
>> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.
> 
> It seems to be walking of the end of the pgd.
> 
> [    3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
> [    3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000	<- end of pgd at PAGE_OFFSET + 4PB
> [    3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000

Yes, I want it to walk from TASK_SIZE_MAX up to 0xffffffffffffffff :)

static struct ptdump_range ptdump_range[] __ro_after_init = {
	{TASK_SIZE_MAX, ~0UL},
	{0, 0}
};


Ok, well, ppc32 go up to 0xffffffff

What's the top address to be used for ppc64 ?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
  2021-08-30  8:16           ` Christophe Leroy
@ 2021-08-30 11:55             ` Michael Ellerman
  -1 siblings, 0 replies; 25+ messages in thread
From: Michael Ellerman @ 2021-08-30 11:55 UTC (permalink / raw)
  To: Christophe Leroy, Nathan Chancellor
  Cc: Benjamin Herrenschmidt, Paul Mackerras, linux-kernel, linuxppc-dev

Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> Le 30/08/2021 à 09:52, Michael Ellerman a écrit :
>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>>>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>>>
>>>>
>>>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>>>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>>>> config [1] when booting up in QEMU with [2]:
>>>>
>>>> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>>>> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
>>>> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>>>> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>>>> [    1.625015] Modules linked in:
>>>> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>>>> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>>>> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
>>>> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
>>>> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
>>>> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>>>> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>>>> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>>>> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>>>> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>>>> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>>>> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>>>> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>>>> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>>>> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>>>> [    1.635755] Call Trace:
>>>> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>>>> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>>>> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>>>> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>>>> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>>>> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>>>> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>>>> [    1.640597] Instruction dump:
>>>> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>>>> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>>>> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
>>>> [    1.643220]
>>>> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>
>>>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>>>> binutils 2.37. If there is any additional information I can provide,
>>>> please let me know.
>>>
>>> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.
>> 
>> It seems to be walking of the end of the pgd.
>> 
>> [    3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
>> [    3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000	<- end of pgd at PAGE_OFFSET + 4PB
>> [    3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000
>
> Yes, I want it to walk from TASK_SIZE_MAX up to 0xffffffffffffffff :)

But the page table doesn't span that far? 0_o

> static struct ptdump_range ptdump_range[] __ro_after_init = {
> 	{TASK_SIZE_MAX, ~0UL},
> 	{0, 0}
> };
>
> Ok, well, ppc32 go up to 0xffffffff
>
> What's the top address to be used for ppc64 ?

It's different for (hash | radix) x page size.

The below works, and matches what we used to do.

Possibly we can come up with something cleaner, not sure.

cheers


diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 2d80d775d15e..3d3778a74969 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -359,6 +359,8 @@ static int __init ptdump_init(void)
 		ptdump_range[0].start = KERN_VIRT_START;
 	else
 		ptdump_range[0].start = PAGE_OFFSET;
+
+	ptdump_range[0].end = ptdump_range[0].start + (PGDIR_SIZE * PTRS_PER_PGD);
 #endif
 
 	populate_markers();

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
@ 2021-08-30 11:55             ` Michael Ellerman
  0 siblings, 0 replies; 25+ messages in thread
From: Michael Ellerman @ 2021-08-30 11:55 UTC (permalink / raw)
  To: Christophe Leroy, Nathan Chancellor
  Cc: linuxppc-dev, Paul Mackerras, linux-kernel

Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> Le 30/08/2021 à 09:52, Michael Ellerman a écrit :
>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>>>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>>>
>>>>
>>>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>>>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>>>> config [1] when booting up in QEMU with [2]:
>>>>
>>>> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>>>> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
>>>> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>>>> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>>>> [    1.625015] Modules linked in:
>>>> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>>>> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>>>> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
>>>> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
>>>> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
>>>> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>>>> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>>>> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>>>> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>>>> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>>>> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>>>> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>>>> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>>>> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>>>> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>>>> [    1.635755] Call Trace:
>>>> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>>>> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>>>> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>>>> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>>>> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>>>> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>>>> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>>>> [    1.640597] Instruction dump:
>>>> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>>>> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>>>> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
>>>> [    1.643220]
>>>> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>
>>>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>>>> binutils 2.37. If there is any additional information I can provide,
>>>> please let me know.
>>>
>>> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.
>> 
>> It seems to be walking of the end of the pgd.
>> 
>> [    3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
>> [    3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000	<- end of pgd at PAGE_OFFSET + 4PB
>> [    3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000
>
> Yes, I want it to walk from TASK_SIZE_MAX up to 0xffffffffffffffff :)

But the page table doesn't span that far? 0_o

> static struct ptdump_range ptdump_range[] __ro_after_init = {
> 	{TASK_SIZE_MAX, ~0UL},
> 	{0, 0}
> };
>
> Ok, well, ppc32 go up to 0xffffffff
>
> What's the top address to be used for ppc64 ?

It's different for (hash | radix) x page size.

The below works, and matches what we used to do.

Possibly we can come up with something cleaner, not sure.

cheers


diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 2d80d775d15e..3d3778a74969 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -359,6 +359,8 @@ static int __init ptdump_init(void)
 		ptdump_range[0].start = KERN_VIRT_START;
 	else
 		ptdump_range[0].start = PAGE_OFFSET;
+
+	ptdump_range[0].end = ptdump_range[0].start + (PGDIR_SIZE * PTRS_PER_PGD);
 #endif
 
 	populate_markers();

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
  2021-08-30 11:55             ` Michael Ellerman
@ 2021-08-30 13:16               ` Christophe Leroy
  -1 siblings, 0 replies; 25+ messages in thread
From: Christophe Leroy @ 2021-08-30 13:16 UTC (permalink / raw)
  To: Michael Ellerman, Nathan Chancellor
  Cc: Benjamin Herrenschmidt, Paul Mackerras, linux-kernel, linuxppc-dev



Le 30/08/2021 à 13:55, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>> Le 30/08/2021 à 09:52, Michael Ellerman a écrit :
>>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>>> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>>>>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>>>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>>>>
>>>>>
>>>>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>>>>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>>>>> config [1] when booting up in QEMU with [2]:
>>>>>
>>>>> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>>>>> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
>>>>> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>>>>> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>>>>> [    1.625015] Modules linked in:
>>>>> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>>>>> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>>>>> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
>>>>> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
>>>>> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
>>>>> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>>>>> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>>>>> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>>>>> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>>>>> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>>>>> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>>>>> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>>>>> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>>>>> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>>>>> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>>>>> [    1.635755] Call Trace:
>>>>> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>>>>> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>>>>> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>>>>> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>>>>> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>>>>> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>>>>> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>>>>> [    1.640597] Instruction dump:
>>>>> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>>>>> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>>>>> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
>>>>> [    1.643220]
>>>>> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>>> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>>
>>>>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>>>>> binutils 2.37. If there is any additional information I can provide,
>>>>> please let me know.
>>>>
>>>> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.
>>>
>>> It seems to be walking of the end of the pgd.
>>>
>>> [    3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
>>> [    3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000	<- end of pgd at PAGE_OFFSET + 4PB
>>> [    3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000
>>
>> Yes, I want it to walk from TASK_SIZE_MAX up to 0xffffffffffffffff :)
> 
> But the page table doesn't span that far? 0_o
> 
>> static struct ptdump_range ptdump_range[] __ro_after_init = {
>> 	{TASK_SIZE_MAX, ~0UL},
>> 	{0, 0}
>> };
>>
>> Ok, well, ppc32 go up to 0xffffffff
>>
>> What's the top address to be used for ppc64 ?
> 
> It's different for (hash | radix) x page size.
> 
> The below works, and matches what we used to do.
> 
> Possibly we can come up with something cleaner, not sure.
> 
> cheers
> 
> 
> diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
> index 2d80d775d15e..3d3778a74969 100644
> --- a/arch/powerpc/mm/ptdump/ptdump.c
> +++ b/arch/powerpc/mm/ptdump/ptdump.c
> @@ -359,6 +359,8 @@ static int __init ptdump_init(void)
>   		ptdump_range[0].start = KERN_VIRT_START;
>   	else
>   		ptdump_range[0].start = PAGE_OFFSET;
> +
> +	ptdump_range[0].end = ptdump_range[0].start + (PGDIR_SIZE * PTRS_PER_PGD);

Hum ...

It was:

	for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {

And there is

#define pgd_index(a)  (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))


Do we have the following ?

	pgd_index(KERN_VIRT_START) == 0


Shouldn't it be something like

	ptdump_range[0].end = PAGE_OFFSET + (PGDIR_SIZE * PTRS_PER_PGD);


Christophe

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
@ 2021-08-30 13:16               ` Christophe Leroy
  0 siblings, 0 replies; 25+ messages in thread
From: Christophe Leroy @ 2021-08-30 13:16 UTC (permalink / raw)
  To: Michael Ellerman, Nathan Chancellor
  Cc: linuxppc-dev, Paul Mackerras, linux-kernel



Le 30/08/2021 à 13:55, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>> Le 30/08/2021 à 09:52, Michael Ellerman a écrit :
>>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>>> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>>>>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>>>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>>>>
>>>>>
>>>>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>>>>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>>>>> config [1] when booting up in QEMU with [2]:
>>>>>
>>>>> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>>>>> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
>>>>> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>>>>> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>>>>> [    1.625015] Modules linked in:
>>>>> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>>>>> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>>>>> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
>>>>> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
>>>>> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
>>>>> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>>>>> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>>>>> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>>>>> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>>>>> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>>>>> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>>>>> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>>>>> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>>>>> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>>>>> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>>>>> [    1.635755] Call Trace:
>>>>> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>>>>> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>>>>> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>>>>> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>>>>> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>>>>> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>>>>> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>>>>> [    1.640597] Instruction dump:
>>>>> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>>>>> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>>>>> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
>>>>> [    1.643220]
>>>>> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>>> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>>
>>>>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>>>>> binutils 2.37. If there is any additional information I can provide,
>>>>> please let me know.
>>>>
>>>> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.
>>>
>>> It seems to be walking of the end of the pgd.
>>>
>>> [    3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
>>> [    3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000	<- end of pgd at PAGE_OFFSET + 4PB
>>> [    3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000
>>
>> Yes, I want it to walk from TASK_SIZE_MAX up to 0xffffffffffffffff :)
> 
> But the page table doesn't span that far? 0_o
> 
>> static struct ptdump_range ptdump_range[] __ro_after_init = {
>> 	{TASK_SIZE_MAX, ~0UL},
>> 	{0, 0}
>> };
>>
>> Ok, well, ppc32 go up to 0xffffffff
>>
>> What's the top address to be used for ppc64 ?
> 
> It's different for (hash | radix) x page size.
> 
> The below works, and matches what we used to do.
> 
> Possibly we can come up with something cleaner, not sure.
> 
> cheers
> 
> 
> diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
> index 2d80d775d15e..3d3778a74969 100644
> --- a/arch/powerpc/mm/ptdump/ptdump.c
> +++ b/arch/powerpc/mm/ptdump/ptdump.c
> @@ -359,6 +359,8 @@ static int __init ptdump_init(void)
>   		ptdump_range[0].start = KERN_VIRT_START;
>   	else
>   		ptdump_range[0].start = PAGE_OFFSET;
> +
> +	ptdump_range[0].end = ptdump_range[0].start + (PGDIR_SIZE * PTRS_PER_PGD);

Hum ...

It was:

	for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {

And there is

#define pgd_index(a)  (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))


Do we have the following ?

	pgd_index(KERN_VIRT_START) == 0


Shouldn't it be something like

	ptdump_range[0].end = PAGE_OFFSET + (PGDIR_SIZE * PTRS_PER_PGD);


Christophe

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
  2021-08-30 13:16               ` Christophe Leroy
@ 2021-08-31 13:48                 ` Michael Ellerman
  -1 siblings, 0 replies; 25+ messages in thread
From: Michael Ellerman @ 2021-08-31 13:48 UTC (permalink / raw)
  To: Christophe Leroy, Nathan Chancellor
  Cc: Benjamin Herrenschmidt, Paul Mackerras, linux-kernel, linuxppc-dev

Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> Le 30/08/2021 à 13:55, Michael Ellerman a écrit :
>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>> Le 30/08/2021 à 09:52, Michael Ellerman a écrit :
>>>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>>>> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>>>>>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>>>>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>>>>>
>>>>>>
>>>>>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>>>>>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>>>>>> config [1] when booting up in QEMU with [2]:
>>>>>>
>>>>>> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>>>>>> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
>>>>>> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>>>>>> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>>>>>> [    1.625015] Modules linked in:
>>>>>> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>>>>>> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>>>>>> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
>>>>>> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
>>>>>> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
>>>>>> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>>>>>> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>>>>>> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>>>>>> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>>>>>> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>>>>>> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>>>>>> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>>>>>> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>>>>>> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>>>>>> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>>>>>> [    1.635755] Call Trace:
>>>>>> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>>>>>> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>>>>>> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>>>>>> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>>>>>> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>>>>>> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>>>>>> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>>>>>> [    1.640597] Instruction dump:
>>>>>> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>>>>>> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>>>>>> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
>>>>>> [    1.643220]
>>>>>> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>>>> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>>>
>>>>>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>>>>>> binutils 2.37. If there is any additional information I can provide,
>>>>>> please let me know.
>>>>>
>>>>> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.
>>>>
>>>> It seems to be walking of the end of the pgd.
>>>>
>>>> [    3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
>>>> [    3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000	<- end of pgd at PAGE_OFFSET + 4PB
>>>> [    3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000
>>>
>>> Yes, I want it to walk from TASK_SIZE_MAX up to 0xffffffffffffffff :)
>> 
>> But the page table doesn't span that far? 0_o
>> 
>>> static struct ptdump_range ptdump_range[] __ro_after_init = {
>>> 	{TASK_SIZE_MAX, ~0UL},
>>> 	{0, 0}
>>> };
>>>
>>> Ok, well, ppc32 go up to 0xffffffff
>>>
>>> What's the top address to be used for ppc64 ?
>> 
>> It's different for (hash | radix) x page size.
>> 
>> The below works, and matches what we used to do.
>> 
>> Possibly we can come up with something cleaner, not sure.
>> 
>> cheers
>> 
>> 
>> diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
>> index 2d80d775d15e..3d3778a74969 100644
>> --- a/arch/powerpc/mm/ptdump/ptdump.c
>> +++ b/arch/powerpc/mm/ptdump/ptdump.c
>> @@ -359,6 +359,8 @@ static int __init ptdump_init(void)
>>   		ptdump_range[0].start = KERN_VIRT_START;
>>   	else
>>   		ptdump_range[0].start = PAGE_OFFSET;
>> +
>> +	ptdump_range[0].end = ptdump_range[0].start + (PGDIR_SIZE * PTRS_PER_PGD);
>
> Hum ...
>
> It was:
>
> 	for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
>
> And there is
>
> #define pgd_index(a)  (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))

Yes you're right.

> Do we have the following ?
>
> 	pgd_index(KERN_VIRT_START) == 0

No.

Since 0034d395f89d ("powerpc/mm/hash64: Map all the kernel regions in the same 0xc range")

It's:

 	pgd_index(PAGE_OFFSET) == 0


> Shouldn't it be something like
>
> 	ptdump_range[0].end = PAGE_OFFSET + (PGDIR_SIZE * PTRS_PER_PGD);

Yep.

And we should also change the start address for hash to be PAGE_OFFSET.
Even though we don't expect anything in the page tables between
PAGE_OFFSET and KERN_VIRT_START, it's still good to check that range.

cheers

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
@ 2021-08-31 13:48                 ` Michael Ellerman
  0 siblings, 0 replies; 25+ messages in thread
From: Michael Ellerman @ 2021-08-31 13:48 UTC (permalink / raw)
  To: Christophe Leroy, Nathan Chancellor
  Cc: linuxppc-dev, Paul Mackerras, linux-kernel

Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> Le 30/08/2021 à 13:55, Michael Ellerman a écrit :
>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>> Le 30/08/2021 à 09:52, Michael Ellerman a écrit :
>>>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>>>> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>>>>>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>>>>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>>>>>
>>>>>>
>>>>>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>>>>>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>>>>>> config [1] when booting up in QEMU with [2]:
>>>>>>
>>>>>> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>>>>>> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
>>>>>> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>>>>>> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>>>>>> [    1.625015] Modules linked in:
>>>>>> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>>>>>> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>>>>>> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
>>>>>> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
>>>>>> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
>>>>>> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>>>>>> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>>>>>> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>>>>>> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>>>>>> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>>>>>> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>>>>>> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>>>>>> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>>>>>> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>>>>>> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>>>>>> [    1.635755] Call Trace:
>>>>>> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>>>>>> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>>>>>> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>>>>>> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>>>>>> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>>>>>> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>>>>>> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>>>>>> [    1.640597] Instruction dump:
>>>>>> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>>>>>> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>>>>>> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
>>>>>> [    1.643220]
>>>>>> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>>>> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>>>
>>>>>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>>>>>> binutils 2.37. If there is any additional information I can provide,
>>>>>> please let me know.
>>>>>
>>>>> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.
>>>>
>>>> It seems to be walking of the end of the pgd.
>>>>
>>>> [    3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
>>>> [    3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000	<- end of pgd at PAGE_OFFSET + 4PB
>>>> [    3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000
>>>
>>> Yes, I want it to walk from TASK_SIZE_MAX up to 0xffffffffffffffff :)
>> 
>> But the page table doesn't span that far? 0_o
>> 
>>> static struct ptdump_range ptdump_range[] __ro_after_init = {
>>> 	{TASK_SIZE_MAX, ~0UL},
>>> 	{0, 0}
>>> };
>>>
>>> Ok, well, ppc32 go up to 0xffffffff
>>>
>>> What's the top address to be used for ppc64 ?
>> 
>> It's different for (hash | radix) x page size.
>> 
>> The below works, and matches what we used to do.
>> 
>> Possibly we can come up with something cleaner, not sure.
>> 
>> cheers
>> 
>> 
>> diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
>> index 2d80d775d15e..3d3778a74969 100644
>> --- a/arch/powerpc/mm/ptdump/ptdump.c
>> +++ b/arch/powerpc/mm/ptdump/ptdump.c
>> @@ -359,6 +359,8 @@ static int __init ptdump_init(void)
>>   		ptdump_range[0].start = KERN_VIRT_START;
>>   	else
>>   		ptdump_range[0].start = PAGE_OFFSET;
>> +
>> +	ptdump_range[0].end = ptdump_range[0].start + (PGDIR_SIZE * PTRS_PER_PGD);
>
> Hum ...
>
> It was:
>
> 	for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
>
> And there is
>
> #define pgd_index(a)  (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))

Yes you're right.

> Do we have the following ?
>
> 	pgd_index(KERN_VIRT_START) == 0

No.

Since 0034d395f89d ("powerpc/mm/hash64: Map all the kernel regions in the same 0xc range")

It's:

 	pgd_index(PAGE_OFFSET) == 0


> Shouldn't it be something like
>
> 	ptdump_range[0].end = PAGE_OFFSET + (PGDIR_SIZE * PTRS_PER_PGD);

Yep.

And we should also change the start address for hash to be PAGE_OFFSET.
Even though we don't expect anything in the page tables between
PAGE_OFFSET and KERN_VIRT_START, it's still good to check that range.

cheers

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2021-08-31 13:49 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-08 16:49 [PATCH v4 1/4] powerpc/ptdump: Use DEFINE_SHOW_ATTRIBUTE() Christophe Leroy
2021-07-08 16:49 ` Christophe Leroy
2021-07-08 16:49 ` [PATCH v4 2/4] powerpc/ptdump: Remove unused 'page_size' parameter Christophe Leroy
2021-07-08 16:49   ` Christophe Leroy
2021-07-08 16:49 ` [PATCH v4 3/4] powerpc/ptdump: Reduce level numbers by 1 in note_page() and add p4d level Christophe Leroy
2021-07-08 16:49   ` Christophe Leroy
2021-07-08 16:49 ` [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP Christophe Leroy
2021-07-08 16:49   ` Christophe Leroy
2021-08-29 18:55   ` Nathan Chancellor
2021-08-29 18:55     ` Nathan Chancellor
2021-08-29 19:11     ` Christophe Leroy
2021-08-29 19:11       ` Christophe Leroy
2021-08-29 21:39       ` Nathan Chancellor
2021-08-29 21:39         ` Nathan Chancellor
2021-08-30  7:52       ` Michael Ellerman
2021-08-30  7:52         ` Michael Ellerman
2021-08-30  8:16         ` Christophe Leroy
2021-08-30  8:16           ` Christophe Leroy
2021-08-30 11:55           ` Michael Ellerman
2021-08-30 11:55             ` Michael Ellerman
2021-08-30 13:16             ` Christophe Leroy
2021-08-30 13:16               ` Christophe Leroy
2021-08-31 13:48               ` Michael Ellerman
2021-08-31 13:48                 ` Michael Ellerman
2021-08-27 13:16 ` [PATCH v4 1/4] powerpc/ptdump: Use DEFINE_SHOW_ATTRIBUTE() Michael Ellerman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.