linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] powerpc: Reimplement __get_SP() as a function not a define
@ 2014-10-13  8:41 Anton Blanchard
  2014-10-13  8:41 ` [PATCH 2/3] powerpc: Rename __get_SP() to current_stack_pointer() Anton Blanchard
  2014-10-13  8:41 ` [PATCH 3/3] powerpc/pseries: Use dump_stack instead of show_stack Anton Blanchard
  0 siblings, 2 replies; 3+ messages in thread
From: Anton Blanchard @ 2014-10-13  8:41 UTC (permalink / raw)
  To: benh, paulus, mpe, zhong; +Cc: linuxppc-dev

Li Zhong points out an issue with our current __get_SP()
implementation. If ftrace function tracing is enabled (ie -pg
profiling using _mcount) we spill a stack frame on 64bit all the
time.

If a function calls __get_SP() and later calls a function that is
tail call optimised, we will pop the stack frame and the value
returned by __get_SP() is no longer valid. An example from Li can
be found in save_stack_trace -> save_context_stack:

c0000000000432c0 <.save_stack_trace>:
c0000000000432c0:       mflr    r0
c0000000000432c4:       std     r0,16(r1)
c0000000000432c8:       stdu    r1,-128(r1) <-- stack frame for _mcount
c0000000000432cc:       std     r3,112(r1)
c0000000000432d0:       bl      <._mcount>
c0000000000432d4:       nop

c0000000000432d8:       mr      r4,r1 <-- __get_SP()

c0000000000432dc:       ld      r5,632(r13)
c0000000000432e0:       ld      r3,112(r1)
c0000000000432e4:       li      r6,1

c0000000000432e8:       addi    r1,r1,128 <-- pop stack frame

c0000000000432ec:       ld      r0,16(r1)
c0000000000432f0:       mtlr    r0
c0000000000432f4:       b       <.save_context_stack> <-- tail call optimized

save_context_stack ends up with a stack pointer below the current
one, and it is likely to be scribbled over.

Fix this by making __get_SP() a function which returns the
callers stack frame. Also replace inline assembly which grabs
the stack pointer in save_stack_trace and show_stack with
__get_SP().

This also fixes an issue with perf_arch_fetch_caller_regs().
It currently unwinds the stack once, which will skip a
valid stack frame on a leaf function. With the __get_SP() fixes
in this patch, we never need to unwind the stack frame to get
to the first interesting frame.

We have to export __get_SP() because perf_arch_fetch_caller_regs()
(which is used in modules) calls it from a header file.

Reported-by: Li Zhong <zhong@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
---
 arch/powerpc/include/asm/perf_event.h | 2 +-
 arch/powerpc/include/asm/reg.h        | 3 +--
 arch/powerpc/kernel/misc.S            | 4 ++++
 arch/powerpc/kernel/ppc_ksyms.c       | 2 ++
 arch/powerpc/kernel/process.c         | 2 +-
 arch/powerpc/kernel/stacktrace.c      | 2 +-
 6 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index 0bb2372..b058568 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -34,7 +34,7 @@
 	do {							\
 		(regs)->result = 0;				\
 		(regs)->nip = __ip;				\
-		(regs)->gpr[1] = *(unsigned long *)__get_SP();	\
+		(regs)->gpr[1] = __get_SP();			\
 		asm volatile("mfmsr %0" : "=r" ((regs)->msr));	\
 	} while (0)
 #endif
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index fe3f948..e539d7e 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1265,8 +1265,7 @@ static inline unsigned long mfvtb (void)
 
 #define proc_trap()	asm volatile("trap")
 
-#define __get_SP()	({unsigned long sp; \
-			asm volatile("mr %0,1": "=r" (sp)); sp;})
+extern unsigned long __get_SP(void);
 
 extern unsigned long scom970_read(unsigned int address);
 extern void scom970_write(unsigned int address, unsigned long value);
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 7ce26d4..120deb7 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -114,3 +114,7 @@ _GLOBAL(longjmp)
 	mtlr	r0
 	mr	r3,r4
 	blr
+
+_GLOBAL(__get_SP)
+	PPC_LL	r3,0(r1)
+	blr
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index c4dfff6..9d84efb 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -41,3 +41,5 @@ EXPORT_SYMBOL(giveup_spe);
 #ifdef CONFIG_EPAPR_PARAVIRT
 EXPORT_SYMBOL(epapr_hypercall_start);
 #endif
+
+EXPORT_SYMBOL(__get_SP);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index aa1df89..3cc6439 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1545,7 +1545,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
 		tsk = current;
 	if (sp == 0) {
 		if (tsk == current)
-			asm("mr %0,1" : "=r" (sp));
+			sp = __get_SP();
 		else
 			sp = tsk->thread.ksp;
 	}
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 3d30ef1..7f65bae 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -50,7 +50,7 @@ void save_stack_trace(struct stack_trace *trace)
 {
 	unsigned long sp;
 
-	asm("mr %0,1" : "=r" (sp));
+	sp = __get_SP();
 
 	save_context_stack(trace, sp, current, 1);
 }
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/3] powerpc: Rename __get_SP() to current_stack_pointer()
  2014-10-13  8:41 [PATCH 1/3] powerpc: Reimplement __get_SP() as a function not a define Anton Blanchard
@ 2014-10-13  8:41 ` Anton Blanchard
  2014-10-13  8:41 ` [PATCH 3/3] powerpc/pseries: Use dump_stack instead of show_stack Anton Blanchard
  1 sibling, 0 replies; 3+ messages in thread
From: Anton Blanchard @ 2014-10-13  8:41 UTC (permalink / raw)
  To: benh, paulus, mpe, zhong; +Cc: linuxppc-dev

Michael points out that __get_SP() is a pretty horrible
function name. Let's give it a better name.

Signed-off-by: Anton Blanchard <anton@samba.org>
---
 arch/powerpc/include/asm/perf_event.h | 2 +-
 arch/powerpc/include/asm/reg.h        | 2 +-
 arch/powerpc/kernel/irq.c             | 2 +-
 arch/powerpc/kernel/misc.S            | 2 +-
 arch/powerpc/kernel/ppc_ksyms.c       | 2 +-
 arch/powerpc/kernel/process.c         | 2 +-
 arch/powerpc/kernel/stacktrace.c      | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index b058568..8bf1b63 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -34,7 +34,7 @@
 	do {							\
 		(regs)->result = 0;				\
 		(regs)->nip = __ip;				\
-		(regs)->gpr[1] = __get_SP();			\
+		(regs)->gpr[1] = current_stack_pointer();	\
 		asm volatile("mfmsr %0" : "=r" ((regs)->msr));	\
 	} while (0)
 #endif
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e539d7e..c998279 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1265,7 +1265,7 @@ static inline unsigned long mfvtb (void)
 
 #define proc_trap()	asm volatile("trap")
 
-extern unsigned long __get_SP(void);
+extern unsigned long current_stack_pointer(void);
 
 extern unsigned long scom970_read(unsigned int address);
 extern void scom970_write(unsigned int address, unsigned long value);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 8eb857f..c143835 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -466,7 +466,7 @@ static inline void check_stack_overflow(void)
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 	long sp;
 
-	sp = __get_SP() & (THREAD_SIZE-1);
+	sp = current_stack_pointer() & (THREAD_SIZE-1);
 
 	/* check for stack overflow: is there less than 2KB free? */
 	if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 120deb7..0d43219 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -115,6 +115,6 @@ _GLOBAL(longjmp)
 	mr	r3,r4
 	blr
 
-_GLOBAL(__get_SP)
+_GLOBAL(current_stack_pointer)
 	PPC_LL	r3,0(r1)
 	blr
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 9d84efb..202963e 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -42,4 +42,4 @@ EXPORT_SYMBOL(giveup_spe);
 EXPORT_SYMBOL(epapr_hypercall_start);
 #endif
 
-EXPORT_SYMBOL(__get_SP);
+EXPORT_SYMBOL(current_stack_pointer);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 3cc6439..923cd2d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1545,7 +1545,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
 		tsk = current;
 	if (sp == 0) {
 		if (tsk == current)
-			sp = __get_SP();
+			sp = current_stack_pointer();
 		else
 			sp = tsk->thread.ksp;
 	}
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 7f65bae..ea43a34 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -50,7 +50,7 @@ void save_stack_trace(struct stack_trace *trace)
 {
 	unsigned long sp;
 
-	sp = __get_SP();
+	sp = current_stack_pointer();
 
 	save_context_stack(trace, sp, current, 1);
 }
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 3/3] powerpc/pseries: Use dump_stack instead of show_stack
  2014-10-13  8:41 [PATCH 1/3] powerpc: Reimplement __get_SP() as a function not a define Anton Blanchard
  2014-10-13  8:41 ` [PATCH 2/3] powerpc: Rename __get_SP() to current_stack_pointer() Anton Blanchard
@ 2014-10-13  8:41 ` Anton Blanchard
  1 sibling, 0 replies; 3+ messages in thread
From: Anton Blanchard @ 2014-10-13  8:41 UTC (permalink / raw)
  To: benh, paulus, mpe, zhong; +Cc: linuxppc-dev

We can use the simpler dump_stack() instead of
show_stack(current, __get_SP())

Signed-off-by: Anton Blanchard <anton@samba.org>
---
 arch/powerpc/platforms/pseries/iommu.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index de1ec54..e32e009 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -30,7 +30,6 @@
 #include <linux/mm.h>
 #include <linux/memblock.h>
 #include <linux/spinlock.h>
-#include <linux/sched.h>	/* for show_stack */
 #include <linux/string.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
@@ -168,7 +167,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
 			printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
 			printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
 			printk("\ttce val = 0x%llx\n", tce );
-			show_stack(current, (unsigned long *)__get_SP());
+			dump_stack();
 		}
 
 		tcenum++;
@@ -257,7 +256,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
 		printk("\tnpages  = 0x%llx\n", (u64)npages);
 		printk("\ttce[0] val = 0x%llx\n", tcep[0]);
-		show_stack(current, (unsigned long *)__get_SP());
+		dump_stack();
 	}
 	return ret;
 }
@@ -273,7 +272,7 @@ static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages
 			printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
 			printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
 			printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
-			show_stack(current, (unsigned long *)__get_SP());
+			dump_stack();
 		}
 
 		tcenum++;
@@ -292,7 +291,7 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
 		printk("\trc      = %lld\n", rc);
 		printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
 		printk("\tnpages  = 0x%llx\n", (u64)npages);
-		show_stack(current, (unsigned long *)__get_SP());
+		dump_stack();
 	}
 }
 
@@ -307,7 +306,7 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
 		printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc);
 		printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
 		printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
-		show_stack(current, (unsigned long *)__get_SP());
+		dump_stack();
 	}
 
 	return tce_ret;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2014-10-13  8:41 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-10-13  8:41 [PATCH 1/3] powerpc: Reimplement __get_SP() as a function not a define Anton Blanchard
2014-10-13  8:41 ` [PATCH 2/3] powerpc: Rename __get_SP() to current_stack_pointer() Anton Blanchard
2014-10-13  8:41 ` [PATCH 3/3] powerpc/pseries: Use dump_stack instead of show_stack Anton Blanchard

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).