All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 00/13] Support hardware perf counters on xtensa
@ 2015-07-18  8:30 Max Filippov
  2015-07-18  8:30 ` [PATCH v2 01/13] xtensa: clean up Kconfig dependencies for custom cores Max Filippov
                   ` (12 more replies)
  0 siblings, 13 replies; 21+ messages in thread
From: Max Filippov @ 2015-07-18  8:30 UTC (permalink / raw)
  To: linux-xtensa, linux-kernel; +Cc: Chris Zankel, Marc Gauthier, Max Filippov

Hello,

this series adds support for hardware performance counters in counting
and sampling modes as well as for page fault counting. Perf IRQ is handled
as NMI when configured properly (as the only topmost medium-priority
interrupt).

It reorganizes stack tracing code to share it between old oprofile and new
perf interfaces, and improves kernel stack traces both for builtin debug
functions and for gdb.

There's also fixes and cleanups for the areas touched by the new features.

Changes v1->v2:
- make continuous stack changes conditional;
- replace l32i/s32i + add used to access spilled registers with l32e/s32e;
- use -EINVAL instead of -ENOENT for invalid PMU event configuratons;
- fix kernel register spilling code;
- optionally treat perf IRQ as NMI.

Max Filippov (13):
  xtensa: clean up Kconfig dependencies for custom cores
  xtensa: keep exception/interrupt stack continuous
  xtensa: move oprofile stack tracing to stacktrace.c
  xtensa: select PERF_USE_VMALLOC for cache-aliasing configurations
  xtensa: add profiling IRQ type to xtensa_irq_map
  xtensa: count software page fault perf events
  xtensa: implement counting and sampling perf events
  perf tools: xtensa: add DWARF register names
  xtensa: reorganize irq flags tracing
  xtensa: fix kernel register spilling
  xtensa: don't touch EXC_TABLE_FIXUP in _switch_to
  xtensa: implement fake NMI
  xtensa: drop unused irq_err_count

 arch/xtensa/Kconfig                      |  22 +-
 arch/xtensa/include/asm/atomic.h         |  10 +-
 arch/xtensa/include/asm/cmpxchg.h        |   4 +-
 arch/xtensa/include/asm/irqflags.h       |  22 +-
 arch/xtensa/include/asm/processor.h      |  31 ++-
 arch/xtensa/include/asm/stacktrace.h     |   8 +
 arch/xtensa/include/asm/traps.h          |  29 +-
 arch/xtensa/kernel/Makefile              |   1 +
 arch/xtensa/kernel/entry.S               | 197 ++++++++++----
 arch/xtensa/kernel/irq.c                 |  17 +-
 arch/xtensa/kernel/perf_event.c          | 454 +++++++++++++++++++++++++++++++
 arch/xtensa/kernel/stacktrace.c          | 167 +++++++++++-
 arch/xtensa/kernel/traps.c               |  31 ++-
 arch/xtensa/kernel/vectors.S             |  10 +-
 arch/xtensa/mm/fault.c                   |   7 +
 arch/xtensa/oprofile/backtrace.c         | 158 +----------
 tools/perf/arch/xtensa/Build             |   1 +
 tools/perf/arch/xtensa/Makefile          |   3 +
 tools/perf/arch/xtensa/util/Build        |   1 +
 tools/perf/arch/xtensa/util/dwarf-regs.c |  25 ++
 20 files changed, 954 insertions(+), 244 deletions(-)
 create mode 100644 arch/xtensa/kernel/perf_event.c
 create mode 100644 tools/perf/arch/xtensa/Build
 create mode 100644 tools/perf/arch/xtensa/Makefile
 create mode 100644 tools/perf/arch/xtensa/util/Build
 create mode 100644 tools/perf/arch/xtensa/util/dwarf-regs.c

-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 01/13] xtensa: clean up Kconfig dependencies for custom cores
  2015-07-18  8:30 [PATCH v2 00/13] Support hardware perf counters on xtensa Max Filippov
@ 2015-07-18  8:30 ` Max Filippov
  2015-07-18  8:30 ` [PATCH v2 02/13] xtensa: keep exception/interrupt stack continuous Max Filippov
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 21+ messages in thread
From: Max Filippov @ 2015-07-18  8:30 UTC (permalink / raw)
  To: linux-xtensa, linux-kernel; +Cc: Chris Zankel, Marc Gauthier, Max Filippov

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/Kconfig | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 87be10e..ba22699 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -61,9 +61,7 @@ config TRACE_IRQFLAGS_SUPPORT
 	def_bool y
 
 config MMU
-	bool
-	default n if !XTENSA_VARIANT_CUSTOM
-	default XTENSA_VARIANT_MMU if XTENSA_VARIANT_CUSTOM
+	def_bool n
 
 config VARIANT_IRQ_SWITCH
 	def_bool n
@@ -71,9 +69,6 @@ config VARIANT_IRQ_SWITCH
 config HAVE_XTENSA_GPIO32
 	def_bool n
 
-config MAY_HAVE_SMP
-	def_bool n
-
 menu "Processor type and features"
 
 choice
@@ -100,7 +95,6 @@ config XTENSA_VARIANT_DC233C
 
 config XTENSA_VARIANT_CUSTOM
 	bool "Custom Xtensa processor configuration"
-	select MAY_HAVE_SMP
 	select HAVE_XTENSA_GPIO32
 	help
 	  Select this variant to use a custom Xtensa processor configuration.
@@ -126,6 +120,7 @@ config XTENSA_VARIANT_MMU
 	bool "Core variant has a Full MMU (TLB, Pages, Protection, etc)"
 	depends on XTENSA_VARIANT_CUSTOM
 	default y
+	select MMU
 	help
 	  Build a Conventional Kernel with full MMU support,
 	  ie: it supports a TLB with auto-loading, page protection.
@@ -143,7 +138,7 @@ source "kernel/Kconfig.preempt"
 
 config HAVE_SMP
 	bool "System Supports SMP (MX)"
-	depends on MAY_HAVE_SMP
+	depends on XTENSA_VARIANT_CUSTOM
 	select XTENSA_MX
 	help
 	  This option is use to indicate that the system-on-a-chip (SOC)
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 02/13] xtensa: keep exception/interrupt stack continuous
  2015-07-18  8:30 [PATCH v2 00/13] Support hardware perf counters on xtensa Max Filippov
  2015-07-18  8:30 ` [PATCH v2 01/13] xtensa: clean up Kconfig dependencies for custom cores Max Filippov
@ 2015-07-18  8:30 ` Max Filippov
  2015-07-18  8:30 ` [PATCH v2 03/13] xtensa: move oprofile stack tracing to stacktrace.c Max Filippov
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 21+ messages in thread
From: Max Filippov @ 2015-07-18  8:30 UTC (permalink / raw)
  To: linux-xtensa, linux-kernel; +Cc: Chris Zankel, Marc Gauthier, Max Filippov

Restore original a0 in the kernel exception stack frame. This way it
looks like the frame that got interrupt/exception did alloca (copy a0 and
a1 potentially spilled under old stack to the new location as well) to
save registers and then did a call to handler. The point where
interrupt/exception was taken is not in the stack chain, only in pt_regs
(call4 from that address can be simulated to keep it in the stack trace).

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
Changes v1->v2:
- make continuous stack changes conditional;
- replace l32i/s32i + add used to access spilled registers with l32e/s32e.

 arch/xtensa/kernel/entry.S | 59 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 44 insertions(+), 15 deletions(-)

diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 82bbfa5..b64075f 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -28,6 +28,10 @@
 #include <asm/tlbflush.h>
 #include <variant/tie-asm.h>
 
+#if defined(CONFIG_DEBUG_KERNEL) || defined(CONFIG_STACKTRACE_SUPPORT)
+#define XTENSA_CONTINUOUS_STACK
+#endif
+
 /* Unimplemented features. */
 
 #undef KERNEL_STACK_OVERFLOW_CHECK
@@ -219,6 +223,10 @@ _user_exception:
 
 2:	/* Now, jump to the common exception handler. */
 
+#ifdef XTENSA_CONTINUOUS_STACK
+	movi	a0, 0			# terminate user stack trace with 0
+	wsr	a0, depc
+#endif
 	j	common_exception
 
 ENDPROC(user_exception)
@@ -264,6 +272,24 @@ ENTRY(kernel_exception)
 	.globl _kernel_exception
 _kernel_exception:
 
+	/* Copy spill slots of a0 and a1 to imitate movsp
+	 * in order to keep exception stack continuous
+	 */
+#ifdef XTENSA_CONTINUOUS_STACK
+	/* Reload previous stack pointer: l32e relative to the current stack
+	 * won't reach it, because kernel exception stack frame is definitely
+	 * larger than l32e range (64 bytes).
+	 */
+	l32i	a0, a1, PT_AREG1
+	l32e	a3, a0, -16
+	l32e	a0, a0, -12
+	s32e	a3, a1, -16
+	s32e	a0, a1, -12
+
+	l32i	a0, a1, PT_AREG0	# restore saved a0
+	wsr	a0, depc
+#endif
+
 	/* Save SAR and turn off single stepping */
 
 	movi	a2, 0
@@ -346,12 +372,12 @@ common_exception:
 	s32i	a0, a1, PT_EXCCAUSE
 	s32i	a3, a2, EXC_TABLE_FIXUP
 
-	/* All unrecoverable states are saved on stack, now, and a1 is valid,
-	 * so we can allow exceptions and interrupts (*) again.
-	 * Set PS(EXCM = 0, UM = 0, RING = 0, OWB = 0, WOE = 1, INTLEVEL = X)
+	/* All unrecoverable states are saved on stack, now, and a1 is valid.
+	 * Now we can allow exceptions again. In case we've got an interrupt
+	 * PS.INTLEVEL is set to LOCKLEVEL disabling furhter interrupts,
+	 * otherwise it's left unchanged.
 	 *
-	 * (*) We only allow interrupts if they were previously enabled and
-	 *     we're not handling an IRQ
+	 * Set PS(EXCM = 0, UM = 0, RING = 0, OWB = 0, WOE = 1, INTLEVEL = X)
 	 */
 
 	rsr	a3, ps
@@ -362,28 +388,32 @@ common_exception:
 	moveqz	a3, a2, a0		# a3 = LOCKLEVEL iff interrupt
 	movi	a2, 1 << PS_WOE_BIT
 	or	a3, a3, a2
-	rsr	a0, exccause
+	rsr	a2, exccause
+#ifdef XTENSA_CONTINUOUS_STACK
+	/* restore return address (or 0 if return to userspace) */
+	rsr	a0, depc
+#endif
 	xsr	a3, ps
 
 	s32i	a3, a1, PT_PS		# save ps
 
 	/* Save lbeg, lend */
 
-	rsr	a2, lbeg
+	rsr	a4, lbeg
 	rsr	a3, lend
-	s32i	a2, a1, PT_LBEG
+	s32i	a4, a1, PT_LBEG
 	s32i	a3, a1, PT_LEND
 
 	/* Save SCOMPARE1 */
 
 #if XCHAL_HAVE_S32C1I
-	rsr     a2, scompare1
-	s32i    a2, a1, PT_SCOMPARE1
+	rsr     a3, scompare1
+	s32i    a3, a1, PT_SCOMPARE1
 #endif
 
 	/* Save optional registers. */
 
-	save_xtregs_opt a1 a2 a4 a5 a6 a7 PT_XTREGS_OPT
+	save_xtregs_opt a1 a3 a4 a5 a6 a7 PT_XTREGS_OPT
 	
 #ifdef CONFIG_TRACE_IRQFLAGS
 	l32i	a4, a1, PT_DEPC
@@ -391,8 +421,7 @@ common_exception:
 	 * while PS.EXCM was set, i.e. interrupts disabled.
 	 */
 	bgeui	a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-	l32i	a4, a1, PT_EXCCAUSE
-	bnei	a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f
+	bnei	a2, EXCCAUSE_LEVEL1_INTERRUPT, 1f
 	/* We came here with an interrupt means interrupts were enabled
 	 * and we've just disabled them.
 	 */
@@ -407,8 +436,8 @@ common_exception:
 
 	rsr	a4, excsave1
 	mov	a6, a1			# pass stack frame
-	mov	a7, a0			# pass EXCCAUSE
-	addx4	a4, a0, a4
+	mov	a7, a2			# pass EXCCAUSE
+	addx4	a4, a2, a4
 	l32i	a4, a4, EXC_TABLE_DEFAULT		# load handler
 
 	/* Call the second-level handler */
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 03/13] xtensa: move oprofile stack tracing to stacktrace.c
  2015-07-18  8:30 [PATCH v2 00/13] Support hardware perf counters on xtensa Max Filippov
  2015-07-18  8:30 ` [PATCH v2 01/13] xtensa: clean up Kconfig dependencies for custom cores Max Filippov
  2015-07-18  8:30 ` [PATCH v2 02/13] xtensa: keep exception/interrupt stack continuous Max Filippov
@ 2015-07-18  8:30 ` Max Filippov
  2015-07-18  8:30 ` [PATCH v2 04/13] xtensa: select PERF_USE_VMALLOC for cache-aliasing configurations Max Filippov
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 21+ messages in thread
From: Max Filippov @ 2015-07-18  8:30 UTC (permalink / raw)
  To: linux-xtensa, linux-kernel; +Cc: Chris Zankel, Marc Gauthier, Max Filippov

Old oprofile interface will share user stack tracing with new perf
interface. Move oprofile user/kernel stack tracing to stacktrace.c to
make it possible.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/stacktrace.h |   8 ++
 arch/xtensa/kernel/stacktrace.c      | 167 ++++++++++++++++++++++++++++++++++-
 arch/xtensa/oprofile/backtrace.c     | 158 ++-------------------------------
 3 files changed, 182 insertions(+), 151 deletions(-)

diff --git a/arch/xtensa/include/asm/stacktrace.h b/arch/xtensa/include/asm/stacktrace.h
index 6a05fcb..fe06e8e 100644
--- a/arch/xtensa/include/asm/stacktrace.h
+++ b/arch/xtensa/include/asm/stacktrace.h
@@ -33,4 +33,12 @@ void walk_stackframe(unsigned long *sp,
 		int (*fn)(struct stackframe *frame, void *data),
 		void *data);
 
+void xtensa_backtrace_kernel(struct pt_regs *regs, unsigned int depth,
+			     int (*kfn)(struct stackframe *frame, void *data),
+			     int (*ufn)(struct stackframe *frame, void *data),
+			     void *data);
+void xtensa_backtrace_user(struct pt_regs *regs, unsigned int depth,
+			   int (*ufn)(struct stackframe *frame, void *data),
+			   void *data);
+
 #endif /* _XTENSA_STACKTRACE_H */
diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c
index 7d2c317..7538d80 100644
--- a/arch/xtensa/kernel/stacktrace.c
+++ b/arch/xtensa/kernel/stacktrace.c
@@ -1,11 +1,12 @@
 /*
- * arch/xtensa/kernel/stacktrace.c
+ * Kernel and userspace stack tracing.
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
  * Copyright (C) 2001 - 2013 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  */
 #include <linux/export.h>
 #include <linux/sched.h>
@@ -13,6 +14,170 @@
 
 #include <asm/stacktrace.h>
 #include <asm/traps.h>
+#include <asm/uaccess.h>
+
+#if IS_ENABLED(CONFIG_OPROFILE) || IS_ENABLED(CONFIG_PERF_EVENTS)
+
+/* Address of common_exception_return, used to check the
+ * transition from kernel to user space.
+ */
+extern int common_exception_return;
+
+/* A struct that maps to the part of the frame containing the a0 and
+ * a1 registers.
+ */
+struct frame_start {
+	unsigned long a0;
+	unsigned long a1;
+};
+
+void xtensa_backtrace_user(struct pt_regs *regs, unsigned int depth,
+			   int (*ufn)(struct stackframe *frame, void *data),
+			   void *data)
+{
+	unsigned long windowstart = regs->windowstart;
+	unsigned long windowbase = regs->windowbase;
+	unsigned long a0 = regs->areg[0];
+	unsigned long a1 = regs->areg[1];
+	unsigned long pc = regs->pc;
+	struct stackframe frame;
+	int index;
+
+	if (!depth--)
+		return;
+
+	frame.pc = pc;
+	frame.sp = a1;
+
+	if (pc == 0 || pc >= TASK_SIZE || ufn(&frame, data))
+		return;
+
+	/* Two steps:
+	 *
+	 * 1. Look through the register window for the
+	 * previous PCs in the call trace.
+	 *
+	 * 2. Look on the stack.
+	 */
+
+	/* Step 1.  */
+	/* Rotate WINDOWSTART to move the bit corresponding to
+	 * the current window to the bit #0.
+	 */
+	windowstart = (windowstart << WSBITS | windowstart) >> windowbase;
+
+	/* Look for bits that are set, they correspond to
+	 * valid windows.
+	 */
+	for (index = WSBITS - 1; (index > 0) && depth; depth--, index--)
+		if (windowstart & (1 << index)) {
+			/* Get the PC from a0 and a1. */
+			pc = MAKE_PC_FROM_RA(a0, pc);
+			/* Read a0 and a1 from the
+			 * corresponding position in AREGs.
+			 */
+			a0 = regs->areg[index * 4];
+			a1 = regs->areg[index * 4 + 1];
+
+			frame.pc = pc;
+			frame.sp = a1;
+
+			if (pc == 0 || pc >= TASK_SIZE || ufn(&frame, data))
+				return;
+		}
+
+	/* Step 2. */
+	/* We are done with the register window, we need to
+	 * look through the stack.
+	 */
+	if (!depth)
+		return;
+
+	/* Start from the a1 register. */
+	/* a1 = regs->areg[1]; */
+	while (a0 != 0 && depth--) {
+		struct frame_start frame_start;
+		/* Get the location for a1, a0 for the
+		 * previous frame from the current a1.
+		 */
+		unsigned long *psp = (unsigned long *)a1;
+
+		psp -= 4;
+
+		/* Check if the region is OK to access. */
+		if (!access_ok(VERIFY_READ, psp, sizeof(frame_start)))
+			return;
+		/* Copy a1, a0 from user space stack frame. */
+		if (__copy_from_user_inatomic(&frame_start, psp,
+					      sizeof(frame_start)))
+			return;
+
+		pc = MAKE_PC_FROM_RA(a0, pc);
+		a0 = frame_start.a0;
+		a1 = frame_start.a1;
+
+		frame.pc = pc;
+		frame.sp = a1;
+
+		if (pc == 0 || pc >= TASK_SIZE || ufn(&frame, data))
+			return;
+	}
+}
+EXPORT_SYMBOL(xtensa_backtrace_user);
+
+void xtensa_backtrace_kernel(struct pt_regs *regs, unsigned int depth,
+			     int (*kfn)(struct stackframe *frame, void *data),
+			     int (*ufn)(struct stackframe *frame, void *data),
+			     void *data)
+{
+	unsigned long pc = regs->depc > VALID_DOUBLE_EXCEPTION_ADDRESS ?
+		regs->depc : regs->pc;
+	unsigned long sp_start, sp_end;
+	unsigned long a0 = regs->areg[0];
+	unsigned long a1 = regs->areg[1];
+
+	sp_start = a1 & ~(THREAD_SIZE - 1);
+	sp_end = sp_start + THREAD_SIZE;
+
+	/* Spill the register window to the stack first. */
+	spill_registers();
+
+	/* Read the stack frames one by one and create the PC
+	 * from the a0 and a1 registers saved there.
+	 */
+	while (a1 > sp_start && a1 < sp_end && depth--) {
+		struct stackframe frame;
+		unsigned long *psp = (unsigned long *)a1;
+
+		frame.pc = pc;
+		frame.sp = a1;
+
+		if (kernel_text_address(pc) && kfn(&frame, data))
+			return;
+
+		if (pc == (unsigned long)&common_exception_return) {
+			regs = (struct pt_regs *)a1;
+			if (user_mode(regs)) {
+				if (ufn == NULL)
+					return;
+				xtensa_backtrace_user(regs, depth, ufn, data);
+				return;
+			}
+			a0 = regs->areg[0];
+			a1 = regs->areg[1];
+			continue;
+		}
+
+		sp_start = a1;
+
+		pc = MAKE_PC_FROM_RA(a0, pc);
+		a0 = *(psp - 4);
+		a1 = *(psp - 3);
+	}
+}
+EXPORT_SYMBOL(xtensa_backtrace_kernel);
+
+#endif
 
 void walk_stackframe(unsigned long *sp,
 		int (*fn)(struct stackframe *frame, void *data),
diff --git a/arch/xtensa/oprofile/backtrace.c b/arch/xtensa/oprofile/backtrace.c
index 5f03a59..8f95203 100644
--- a/arch/xtensa/oprofile/backtrace.c
+++ b/arch/xtensa/oprofile/backtrace.c
@@ -2,168 +2,26 @@
  * @file backtrace.c
  *
  * @remark Copyright 2008 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  * @remark Read the file COPYING
  *
  */
 
 #include <linux/oprofile.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
 #include <asm/ptrace.h>
-#include <asm/uaccess.h>
-#include <asm/traps.h>
+#include <asm/stacktrace.h>
 
-/* Address of common_exception_return, used to check the
- * transition from kernel to user space.
- */
-extern int common_exception_return;
-
-/* A struct that maps to the part of the frame containing the a0 and
- * a1 registers.
- */
-struct frame_start {
-	unsigned long a0;
-	unsigned long a1;
-};
-
-static void xtensa_backtrace_user(struct pt_regs *regs, unsigned int depth)
-{
-	unsigned long windowstart = regs->windowstart;
-	unsigned long windowbase = regs->windowbase;
-	unsigned long a0 = regs->areg[0];
-	unsigned long a1 = regs->areg[1];
-	unsigned long pc = MAKE_PC_FROM_RA(a0, regs->pc);
-	int index;
-
-	/* First add the current PC to the trace. */
-	if (pc != 0 && pc <= TASK_SIZE)
-		oprofile_add_trace(pc);
-	else
-		return;
-
-	/* Two steps:
-	 *
-	 * 1. Look through the register window for the
-	 * previous PCs in the call trace.
-	 *
-	 * 2. Look on the stack.
-	 */
-
-	/* Step 1.  */
-	/* Rotate WINDOWSTART to move the bit corresponding to
-	 * the current window to the bit #0.
-	 */
-	windowstart = (windowstart << WSBITS | windowstart) >> windowbase;
-
-	/* Look for bits that are set, they correspond to
-	 * valid windows.
-	 */
-	for (index = WSBITS - 1; (index > 0) && depth; depth--, index--)
-		if (windowstart & (1 << index)) {
-			/* Read a0 and a1 from the
-			 * corresponding position in AREGs.
-			 */
-			a0 = regs->areg[index * 4];
-			a1 = regs->areg[index * 4 + 1];
-			/* Get the PC from a0 and a1. */
-			pc = MAKE_PC_FROM_RA(a0, pc);
-
-			/* Add the PC to the trace. */
-			if (pc != 0 && pc <= TASK_SIZE)
-				oprofile_add_trace(pc);
-			else
-				return;
-		}
-
-	/* Step 2. */
-	/* We are done with the register window, we need to
-	 * look through the stack.
-	 */
-	if (depth > 0) {
-		/* Start from the a1 register. */
-		/* a1 = regs->areg[1]; */
-		while (a0 != 0 && depth--) {
-
-			struct frame_start frame_start;
-			/* Get the location for a1, a0 for the
-			 * previous frame from the current a1.
-			 */
-			unsigned long *psp = (unsigned long *)a1;
-			psp -= 4;
-
-			/* Check if the region is OK to access. */
-			if (!access_ok(VERIFY_READ, psp, sizeof(frame_start)))
-				return;
-			/* Copy a1, a0 from user space stack frame. */
-			if (__copy_from_user_inatomic(&frame_start, psp,
-						sizeof(frame_start)))
-				return;
-
-			a0 = frame_start.a0;
-			a1 = frame_start.a1;
-			pc = MAKE_PC_FROM_RA(a0, pc);
-
-			if (pc != 0 && pc <= TASK_SIZE)
-				oprofile_add_trace(pc);
-			else
-				return;
-		}
-	}
-}
-
-static void xtensa_backtrace_kernel(struct pt_regs *regs, unsigned int depth)
+static int xtensa_backtrace_cb(struct stackframe *frame, void *data)
 {
-	unsigned long pc = regs->pc;
-	unsigned long *psp;
-	unsigned long sp_start, sp_end;
-	unsigned long a0 = regs->areg[0];
-	unsigned long a1 = regs->areg[1];
-
-	sp_start = a1 & ~(THREAD_SIZE-1);
-	sp_end = sp_start + THREAD_SIZE;
-
-	/* Spill the register window to the stack first. */
-	spill_registers();
-
-	/* Read the stack frames one by one and create the PC
-	 * from the a0 and a1 registers saved there.
-	 */
-	while (a1 > sp_start && a1 < sp_end && depth--) {
-		pc = MAKE_PC_FROM_RA(a0, pc);
-
-		/* Add the PC to the trace. */
-		oprofile_add_trace(pc);
-		if (pc == (unsigned long) &common_exception_return) {
-			regs = (struct pt_regs *)a1;
-			if (user_mode(regs)) {
-				pc = regs->pc;
-				if (pc != 0 && pc <= TASK_SIZE)
-					oprofile_add_trace(pc);
-				else
-					return;
-				return xtensa_backtrace_user(regs, depth);
-			}
-			a0 = regs->areg[0];
-			a1 = regs->areg[1];
-			continue;
-		}
-
-		psp = (unsigned long *)a1;
-
-		a0 = *(psp - 4);
-		a1 = *(psp - 3);
-
-		if (a1 <= (unsigned long)psp)
-			return;
-
-	}
-	return;
+	oprofile_add_trace(frame->pc);
+	return 0;
 }
 
 void xtensa_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
 	if (user_mode(regs))
-		xtensa_backtrace_user(regs, depth);
+		xtensa_backtrace_user(regs, depth, xtensa_backtrace_cb, NULL);
 	else
-		xtensa_backtrace_kernel(regs, depth);
+		xtensa_backtrace_kernel(regs, depth, xtensa_backtrace_cb,
+					xtensa_backtrace_cb, NULL);
 }
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 04/13] xtensa: select PERF_USE_VMALLOC for cache-aliasing configurations
  2015-07-18  8:30 [PATCH v2 00/13] Support hardware perf counters on xtensa Max Filippov
                   ` (2 preceding siblings ...)
  2015-07-18  8:30 ` [PATCH v2 03/13] xtensa: move oprofile stack tracing to stacktrace.c Max Filippov
@ 2015-07-18  8:30 ` Max Filippov
  2015-07-18  8:30 ` [PATCH v2 05/13] xtensa: add profiling IRQ type to xtensa_irq_map Max Filippov
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 21+ messages in thread
From: Max Filippov @ 2015-07-18  8:30 UTC (permalink / raw)
  To: linux-xtensa, linux-kernel; +Cc: Chris Zankel, Marc Gauthier, Max Filippov

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index ba22699..3c57934 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -20,6 +20,7 @@ config XTENSA
 	select HAVE_PERF_EVENTS
 	select IRQ_DOMAIN
 	select MODULES_USE_ELF_RELA
+	select PERF_USE_VMALLOC
 	select VIRT_TO_BUS
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 05/13] xtensa: add profiling IRQ type to xtensa_irq_map
  2015-07-18  8:30 [PATCH v2 00/13] Support hardware perf counters on xtensa Max Filippov
                   ` (3 preceding siblings ...)
  2015-07-18  8:30 ` [PATCH v2 04/13] xtensa: select PERF_USE_VMALLOC for cache-aliasing configurations Max Filippov
@ 2015-07-18  8:30 ` Max Filippov
  2015-07-18  8:30 ` [PATCH v2 06/13] xtensa: count software page fault perf events Max Filippov
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 21+ messages in thread
From: Max Filippov @ 2015-07-18  8:30 UTC (permalink / raw)
  To: linux-xtensa, linux-kernel; +Cc: Chris Zankel, Marc Gauthier, Max Filippov

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/irq.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 3eee94f..32b6056 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -106,6 +106,12 @@ int xtensa_irq_map(struct irq_domain *d, unsigned int irq,
 		irq_set_chip_and_handler_name(irq, irq_chip,
 				handle_percpu_irq, "timer");
 		irq_clear_status_flags(irq, IRQ_LEVEL);
+#ifdef XCHAL_INTTYPE_MASK_PROFILING
+	} else if (mask & XCHAL_INTTYPE_MASK_PROFILING) {
+		irq_set_chip_and_handler_name(irq, irq_chip,
+				handle_percpu_irq, "profiling");
+		irq_set_status_flags(irq, IRQ_LEVEL);
+#endif
 	} else {/* XCHAL_INTTYPE_MASK_WRITE_ERROR */
 		/* XCHAL_INTTYPE_MASK_NMI */
 		irq_set_chip_and_handler_name(irq, irq_chip,
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 06/13] xtensa: count software page fault perf events
  2015-07-18  8:30 [PATCH v2 00/13] Support hardware perf counters on xtensa Max Filippov
                   ` (4 preceding siblings ...)
  2015-07-18  8:30 ` [PATCH v2 05/13] xtensa: add profiling IRQ type to xtensa_irq_map Max Filippov
@ 2015-07-18  8:30 ` Max Filippov
  2015-07-18  8:30 ` [PATCH v2 07/13] xtensa: implement counting and sampling " Max Filippov
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 21+ messages in thread
From: Max Filippov @ 2015-07-18  8:30 UTC (permalink / raw)
  To: linux-xtensa, linux-kernel; +Cc: Chris Zankel, Marc Gauthier, Max Filippov

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/mm/fault.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 9e3571a..76360a2 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -15,6 +15,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/hardirq.h>
+#include <linux/perf_event.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/hardirq.h>
@@ -142,6 +143,12 @@ good_area:
 	}
 
 	up_read(&mm->mmap_sem);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+	if (flags & VM_FAULT_MAJOR)
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+	else if (flags & VM_FAULT_MINOR)
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
+
 	return;
 
 	/* Something tried to access memory that isn't in our memory map..
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 07/13] xtensa: implement counting and sampling perf events
  2015-07-18  8:30 [PATCH v2 00/13] Support hardware perf counters on xtensa Max Filippov
                   ` (5 preceding siblings ...)
  2015-07-18  8:30 ` [PATCH v2 06/13] xtensa: count software page fault perf events Max Filippov
@ 2015-07-18  8:30 ` Max Filippov
  2015-08-06 19:46   ` Arnaldo Carvalho de Melo
  2015-07-18  8:30 ` [PATCH v2 08/13] perf tools: xtensa: add DWARF register names Max Filippov
                   ` (5 subsequent siblings)
  12 siblings, 1 reply; 21+ messages in thread
From: Max Filippov @ 2015-07-18  8:30 UTC (permalink / raw)
  To: linux-xtensa, linux-kernel
  Cc: Chris Zankel, Marc Gauthier, Max Filippov, Peter Zijlstra,
	Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo

Xtensa Performance Monitor Module has up to 8 32 bit wide performance
counters. Each counter may be enabled independently and can count any
single type of hardware performance events. Event counting may be enabled
and disabled globally (per PMM).
Each counter has status register with bits indicating if the counter has
been overflown and may be programmed to raise profiling IRQ on overflow.
This IRQ is used to rewind counters and allow for counting more than 2^32
samples for counting events and to report samples for sampling events.

For more details see Tensilica Debug User's Guide, chapter 8
"Performance monitor module".

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
Changes v1->v2:
- use -EINVAL instead of -ENOENT for invalid PMU event configuratons.

 arch/xtensa/Kconfig             |  10 +
 arch/xtensa/kernel/Makefile     |   1 +
 arch/xtensa/kernel/perf_event.c | 450 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 461 insertions(+)
 create mode 100644 arch/xtensa/kernel/perf_event.c

diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 3c57934..0e92885 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -126,6 +126,16 @@ config XTENSA_VARIANT_MMU
 	  Build a Conventional Kernel with full MMU support,
 	  ie: it supports a TLB with auto-loading, page protection.
 
+config XTENSA_VARIANT_HAVE_PERF_EVENTS
+	bool "Core variant has Performance Monitor Module"
+	depends on XTENSA_VARIANT_CUSTOM
+	default n
+	help
+	  Enable if core variant has Performance Monitor Module with
+	  External Registers Interface.
+
+	  If unsure, say N.
+
 config XTENSA_UNALIGNED_USER
 	bool "Unaligned memory access in use space"
 	help
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index d3a0f0f..547a757 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_PCI) += pci.o
 obj-$(CONFIG_MODULES) += xtensa_ksyms.o module.o
 obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
 obj-$(CONFIG_SMP) += smp.o mxhead.o
+obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o
 
 AFLAGS_head.o += -mtext-section-literals
 
diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
new file mode 100644
index 0000000..b44df3c
--- /dev/null
+++ b/arch/xtensa/kernel/perf_event.c
@@ -0,0 +1,450 @@
+/*
+ * Xtensa Performance Monitor Module driver
+ * See Tensilica Debug User's Guide for PMU registers documentation.
+ *
+ * Copyright (C) 2015 Cadence Design Systems Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+#include <asm/processor.h>
+#include <asm/stacktrace.h>
+
+/* Global control/status for all perf counters */
+#define XTENSA_PMU_PMG			0x1000
+/* Perf counter values */
+#define XTENSA_PMU_PM(i)		(0x1080 + (i) * 4)
+/* Perf counter control registers */
+#define XTENSA_PMU_PMCTRL(i)		(0x1100 + (i) * 4)
+/* Perf counter status registers */
+#define XTENSA_PMU_PMSTAT(i)		(0x1180 + (i) * 4)
+
+#define XTENSA_PMU_PMG_PMEN		0x1
+
+#define XTENSA_PMU_COUNTER_MASK		0xffffffffULL
+#define XTENSA_PMU_COUNTER_MAX		0x7fffffff
+
+#define XTENSA_PMU_PMCTRL_INTEN		0x00000001
+#define XTENSA_PMU_PMCTRL_KRNLCNT	0x00000008
+#define XTENSA_PMU_PMCTRL_TRACELEVEL	0x000000f0
+#define XTENSA_PMU_PMCTRL_SELECT_SHIFT	8
+#define XTENSA_PMU_PMCTRL_SELECT	0x00001f00
+#define XTENSA_PMU_PMCTRL_MASK_SHIFT	16
+#define XTENSA_PMU_PMCTRL_MASK		0xffff0000
+
+#define XTENSA_PMU_MASK(select, mask) \
+	(((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
+	 ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
+	 XTENSA_PMU_PMCTRL_TRACELEVEL | \
+	 XTENSA_PMU_PMCTRL_INTEN)
+
+#define XTENSA_PMU_PMSTAT_OVFL		0x00000001
+#define XTENSA_PMU_PMSTAT_INTASRT	0x00000010
+
+struct xtensa_pmu_events {
+	/* Array of events currently on this core */
+	struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
+	/* Bitmap of used hardware counters */
+	unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
+};
+static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
+
+static const u32 xtensa_hw_ctl[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= XTENSA_PMU_MASK(0, 0x1),
+	[PERF_COUNT_HW_INSTRUCTIONS]		= XTENSA_PMU_MASK(2, 0xffff),
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= XTENSA_PMU_MASK(10, 0x1),
+	[PERF_COUNT_HW_CACHE_MISSES]		= XTENSA_PMU_MASK(12, 0x1),
+	/* Taken and non-taken branches + taken loop ends */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= XTENSA_PMU_MASK(2, 0x490),
+	/* Instruction-related + other global stall cycles */
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= XTENSA_PMU_MASK(4, 0x1ff),
+	/* Data-related global stall cycles */
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= XTENSA_PMU_MASK(3, 0x1ff),
+};
+
+#define C(_x) PERF_COUNT_HW_CACHE_##_x
+
+static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(10, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(10, 0x2),
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(11, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(11, 0x2),
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(8, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(8, 0x2),
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(9, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(9, 0x8),
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(7, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(7, 0x8),
+		},
+	},
+};
+
+static int xtensa_pmu_cache_event(u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	int ret;
+
+	cache_type = (config >>  0) & 0xff;
+	cache_op = (config >>  8) & 0xff;
+	cache_result = (config >> 16) & 0xff;
+
+	if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
+	    cache_op >= C(OP_MAX) ||
+	    cache_result >= C(RESULT_MAX))
+		return -EINVAL;
+
+	ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
+
+	if (ret == 0)
+		return -EINVAL;
+
+	return ret;
+}
+
+static inline uint32_t xtensa_pmu_read_counter(int idx)
+{
+	return get_er(XTENSA_PMU_PM(idx));
+}
+
+static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
+{
+	set_er(v, XTENSA_PMU_PM(idx));
+}
+
+static void xtensa_perf_event_update(struct perf_event *event,
+				     struct hw_perf_event *hwc, int idx)
+{
+	uint64_t prev_raw_count, new_raw_count;
+	int64_t delta;
+
+	do {
+		prev_raw_count = local64_read(&hwc->prev_count);
+		new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
+	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+				 new_raw_count) != prev_raw_count);
+
+	delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+}
+
+static bool xtensa_perf_event_set_period(struct perf_event *event,
+					 struct hw_perf_event *hwc, int idx)
+{
+	bool rc = false;
+	s64 left;
+
+	if (!is_sampling_event(event)) {
+		left = XTENSA_PMU_COUNTER_MAX;
+	} else {
+		s64 period = hwc->sample_period;
+
+		left = local64_read(&hwc->period_left);
+		if (left <= -period) {
+			left = period;
+			local64_set(&hwc->period_left, left);
+			hwc->last_period = period;
+			rc = true;
+		} else if (left <= 0) {
+			left += period;
+			local64_set(&hwc->period_left, left);
+			hwc->last_period = period;
+			rc = true;
+		}
+		if (left > XTENSA_PMU_COUNTER_MAX)
+			left = XTENSA_PMU_COUNTER_MAX;
+	}
+
+	local64_set(&hwc->prev_count, -left);
+	xtensa_pmu_write_counter(idx, -left);
+	perf_event_update_userpage(event);
+
+	return rc;
+}
+
+static void xtensa_pmu_enable(struct pmu *pmu)
+{
+	set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
+}
+
+static void xtensa_pmu_disable(struct pmu *pmu)
+{
+	set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
+}
+
+static int xtensa_pmu_event_init(struct perf_event *event)
+{
+	int ret;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
+		    xtensa_hw_ctl[event->attr.config] == 0)
+			return -EINVAL;
+		event->hw.config = xtensa_hw_ctl[event->attr.config];
+		return 0;
+
+	case PERF_TYPE_HW_CACHE:
+		ret = xtensa_pmu_cache_event(event->attr.config);
+		if (ret < 0)
+			return ret;
+		event->hw.config = ret;
+		return 0;
+
+	case PERF_TYPE_RAW:
+		/* Not 'previous counter' select */
+		if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
+		    (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
+			return -EINVAL;
+		event->hw.config = (event->attr.config &
+				    (XTENSA_PMU_PMCTRL_KRNLCNT |
+				     XTENSA_PMU_PMCTRL_TRACELEVEL |
+				     XTENSA_PMU_PMCTRL_SELECT |
+				     XTENSA_PMU_PMCTRL_MASK)) |
+			XTENSA_PMU_PMCTRL_INTEN;
+		return 0;
+
+	default:
+		return -ENOENT;
+	}
+}
+
+/*
+ * Starts/Stops a counter present on the PMU. The PMI handler
+ * should stop the counter when perf_event_overflow() returns
+ * !0. ->start() will be used to continue.
+ */
+static void xtensa_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (WARN_ON_ONCE(idx == -1))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+		xtensa_perf_event_set_period(event, hwc, idx);
+	}
+
+	hwc->state = 0;
+
+	set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
+}
+
+static void xtensa_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		set_er(0, XTENSA_PMU_PMCTRL(idx));
+		set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
+		       XTENSA_PMU_PMSTAT(idx));
+		hwc->state |= PERF_HES_STOPPED;
+	}
+
+	if ((flags & PERF_EF_UPDATE) &&
+	    !(event->hw.state & PERF_HES_UPTODATE)) {
+		xtensa_perf_event_update(event, &event->hw, idx);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+}
+
+/*
+ * Adds/Removes a counter to/from the PMU, can be done inside
+ * a transaction, see the ->*_txn() methods.
+ */
+static int xtensa_pmu_add(struct perf_event *event, int flags)
+{
+	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (__test_and_set_bit(idx, ev->used_mask)) {
+		idx = find_first_zero_bit(ev->used_mask,
+					  XCHAL_NUM_PERF_COUNTERS);
+		if (idx == XCHAL_NUM_PERF_COUNTERS)
+			return -EAGAIN;
+
+		__set_bit(idx, ev->used_mask);
+		hwc->idx = idx;
+	}
+	ev->event[idx] = event;
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		xtensa_pmu_start(event, PERF_EF_RELOAD);
+
+	perf_event_update_userpage(event);
+	return 0;
+}
+
+static void xtensa_pmu_del(struct perf_event *event, int flags)
+{
+	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
+
+	xtensa_pmu_stop(event, PERF_EF_UPDATE);
+	__clear_bit(event->hw.idx, ev->used_mask);
+	perf_event_update_userpage(event);
+}
+
+static void xtensa_pmu_read(struct perf_event *event)
+{
+	xtensa_perf_event_update(event, &event->hw, event->hw.idx);
+}
+
+static int callchain_trace(struct stackframe *frame, void *data)
+{
+	struct perf_callchain_entry *entry = data;
+
+	perf_callchain_store(entry, frame->pc);
+	return 0;
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+			   struct pt_regs *regs)
+{
+	xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
+				callchain_trace, NULL, entry);
+}
+
+void perf_callchain_user(struct perf_callchain_entry *entry,
+			 struct pt_regs *regs)
+{
+	xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
+			      callchain_trace, entry);
+}
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
+	unsigned i;
+
+	local_irq_save(flags);
+	pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
+		get_er(XTENSA_PMU_PMG));
+	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
+		pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
+			i, get_er(XTENSA_PMU_PM(i)),
+			i, get_er(XTENSA_PMU_PMCTRL(i)),
+			i, get_er(XTENSA_PMU_PMSTAT(i)));
+	local_irq_restore(flags);
+}
+
+static irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
+{
+	irqreturn_t rc = IRQ_NONE;
+	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
+	unsigned i;
+
+	for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS);
+	     i < XCHAL_NUM_PERF_COUNTERS;
+	     i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) {
+		uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
+		struct perf_event *event = ev->event[i];
+		struct hw_perf_event *hwc = &event->hw;
+		u64 last_period;
+
+		if (!(v & XTENSA_PMU_PMSTAT_OVFL))
+			continue;
+
+		set_er(v, XTENSA_PMU_PMSTAT(i));
+		xtensa_perf_event_update(event, hwc, i);
+		last_period = hwc->last_period;
+		if (xtensa_perf_event_set_period(event, hwc, i)) {
+			struct perf_sample_data data;
+			struct pt_regs *regs = get_irq_regs();
+
+			perf_sample_data_init(&data, 0, last_period);
+			if (perf_event_overflow(event, &data, regs))
+				xtensa_pmu_stop(event, 0);
+		}
+
+		rc = IRQ_HANDLED;
+	}
+	return rc;
+}
+
+static struct pmu xtensa_pmu = {
+	.pmu_enable = xtensa_pmu_enable,
+	.pmu_disable = xtensa_pmu_disable,
+	.event_init = xtensa_pmu_event_init,
+	.add = xtensa_pmu_add,
+	.del = xtensa_pmu_del,
+	.start = xtensa_pmu_start,
+	.stop = xtensa_pmu_stop,
+	.read = xtensa_pmu_read,
+};
+
+static void xtensa_pmu_setup(void)
+{
+	unsigned i;
+
+	set_er(0, XTENSA_PMU_PMG);
+	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
+		set_er(0, XTENSA_PMU_PMCTRL(i));
+		set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
+	}
+}
+
+static int xtensa_pmu_notifier(struct notifier_block *self,
+			       unsigned long action, void *data)
+{
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_STARTING:
+		xtensa_pmu_setup();
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int __init xtensa_pmu_init(void)
+{
+	int ret;
+	int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
+
+	perf_cpu_notifier(xtensa_pmu_notifier);
+	ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
+			  "pmu", NULL);
+	if (ret < 0)
+		return ret;
+
+	ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
+	if (ret)
+		free_irq(irq, NULL);
+
+	return ret;
+}
+early_initcall(xtensa_pmu_init);
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 08/13] perf tools: xtensa: add DWARF register names
  2015-07-18  8:30 [PATCH v2 00/13] Support hardware perf counters on xtensa Max Filippov
                   ` (6 preceding siblings ...)
  2015-07-18  8:30 ` [PATCH v2 07/13] xtensa: implement counting and sampling " Max Filippov
@ 2015-07-18  8:30 ` Max Filippov
  2015-08-07  7:22   ` [tip:perf/core] perf tools xtensa: Add " tip-bot for Max Filippov
  2015-07-18  8:30 ` [PATCH v2 09/13] xtensa: reorganize irq flags tracing Max Filippov
                   ` (4 subsequent siblings)
  12 siblings, 1 reply; 21+ messages in thread
From: Max Filippov @ 2015-07-18  8:30 UTC (permalink / raw)
  To: linux-xtensa, linux-kernel
  Cc: Chris Zankel, Marc Gauthier, Max Filippov, Peter Zijlstra,
	Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 tools/perf/arch/xtensa/Build             |  1 +
 tools/perf/arch/xtensa/Makefile          |  3 +++
 tools/perf/arch/xtensa/util/Build        |  1 +
 tools/perf/arch/xtensa/util/dwarf-regs.c | 25 +++++++++++++++++++++++++
 4 files changed, 30 insertions(+)
 create mode 100644 tools/perf/arch/xtensa/Build
 create mode 100644 tools/perf/arch/xtensa/Makefile
 create mode 100644 tools/perf/arch/xtensa/util/Build
 create mode 100644 tools/perf/arch/xtensa/util/dwarf-regs.c

diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build
new file mode 100644
index 0000000..54afe4a
--- /dev/null
+++ b/tools/perf/arch/xtensa/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile
new file mode 100644
index 0000000..7fbca17
--- /dev/null
+++ b/tools/perf/arch/xtensa/Makefile
@@ -0,0 +1,3 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build
new file mode 100644
index 0000000..954e287
--- /dev/null
+++ b/tools/perf/arch/xtensa/util/Build
@@ -0,0 +1 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c
new file mode 100644
index 0000000..4dba76b
--- /dev/null
+++ b/tools/perf/arch/xtensa/util/dwarf-regs.c
@@ -0,0 +1,25 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (c) 2015 Cadence Design Systems Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+#define XTENSA_MAX_REGS 16
+
+const char *xtensa_regs_table[XTENSA_MAX_REGS] = {
+	"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+	"a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
+};
+
+const char *get_arch_regstr(unsigned int n)
+{
+	return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL;
+}
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 09/13] xtensa: reorganize irq flags tracing
  2015-07-18  8:30 [PATCH v2 00/13] Support hardware perf counters on xtensa Max Filippov
                   ` (7 preceding siblings ...)
  2015-07-18  8:30 ` [PATCH v2 08/13] perf tools: xtensa: add DWARF register names Max Filippov
@ 2015-07-18  8:30 ` Max Filippov
  2015-07-18  8:30 ` [PATCH v2 10/13] xtensa: fix kernel register spilling Max Filippov
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 21+ messages in thread
From: Max Filippov @ 2015-07-18  8:30 UTC (permalink / raw)
  To: linux-xtensa, linux-kernel; +Cc: Chris Zankel, Marc Gauthier, Max Filippov

entry.s only disables IRQs on hardware IRQ, move trace_hardirqs_off call
into do_interrupt. Check actual intlevel that will be restored on return
from exception handler to decide if trace_hardirqs_on should be called.
Annotate IRQ on/off points in the TIF_* handling loop on return from
exception handler.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/entry.S | 39 ++++++++++++++-------------------------
 arch/xtensa/kernel/traps.c |  5 ++++-
 2 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index b64075f..d060fc2 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -415,21 +415,6 @@ common_exception:
 
 	save_xtregs_opt a1 a3 a4 a5 a6 a7 PT_XTREGS_OPT
 	
-#ifdef CONFIG_TRACE_IRQFLAGS
-	l32i	a4, a1, PT_DEPC
-	/* Double exception means we came here with an exception
-	 * while PS.EXCM was set, i.e. interrupts disabled.
-	 */
-	bgeui	a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-	bnei	a2, EXCCAUSE_LEVEL1_INTERRUPT, 1f
-	/* We came here with an interrupt means interrupts were enabled
-	 * and we've just disabled them.
-	 */
-	movi	a4, trace_hardirqs_off
-	callx4	a4
-1:
-#endif
-
 	/* Go to second-level dispatcher. Set up parameters to pass to the
 	 * exception handler and call the exception handler.
 	 */
@@ -450,6 +435,10 @@ common_exception_return:
 
 1:
 	rsil	a2, LOCKLEVEL
+#ifdef CONFIG_TRACE_IRQFLAGS
+	movi	a4, trace_hardirqs_off
+	callx4	a4
+#endif
 
 	/* Jump if we are returning from kernel exceptions. */
 
@@ -474,6 +463,10 @@ common_exception_return:
 
 	/* Call do_signal() */
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+	movi	a4, trace_hardirqs_on
+	callx4	a4
+#endif
 	rsil	a2, 0
 	movi	a4, do_notify_resume	# int do_notify_resume(struct pt_regs*)
 	mov	a6, a1
@@ -482,6 +475,10 @@ common_exception_return:
 
 3:	/* Reschedule */
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+	movi	a4, trace_hardirqs_on
+	callx4	a4
+#endif
 	rsil	a2, 0
 	movi	a4, schedule	# void schedule (void)
 	callx4	a4
@@ -510,16 +507,8 @@ common_exception_return:
 6:
 4:
 #ifdef CONFIG_TRACE_IRQFLAGS
-	l32i	a4, a1, PT_DEPC
-	/* Double exception means we came here with an exception
-	 * while PS.EXCM was set, i.e. interrupts disabled.
-	 */
-	bgeui	a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-	l32i	a4, a1, PT_EXCCAUSE
-	bnei	a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f
-	/* We came here with an interrupt means interrupts were enabled
-	 * and we'll reenable them on return.
-	 */
+	extui	a4, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+	bgei	a4, LOCKLEVEL, 1f
 	movi	a4, trace_hardirqs_on
 	callx4	a4
 1:
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 9d2f45f..a1b5bd2 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -211,8 +211,11 @@ void do_interrupt(struct pt_regs *regs)
 		XCHAL_INTLEVEL6_MASK,
 		XCHAL_INTLEVEL7_MASK,
 	};
-	struct pt_regs *old_regs = set_irq_regs(regs);
+	struct pt_regs *old_regs;
 
+	trace_hardirqs_off();
+
+	old_regs = set_irq_regs(regs);
 	irq_enter();
 
 	for (;;) {
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 10/13] xtensa: fix kernel register spilling
  2015-07-18  8:30 [PATCH v2 00/13] Support hardware perf counters on xtensa Max Filippov
                   ` (8 preceding siblings ...)
  2015-07-18  8:30 ` [PATCH v2 09/13] xtensa: reorganize irq flags tracing Max Filippov
@ 2015-07-18  8:30 ` Max Filippov
  2015-07-18  8:30 ` [PATCH v2 11/13] xtensa: don't touch EXC_TABLE_FIXUP in _switch_to Max Filippov
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 21+ messages in thread
From: Max Filippov @ 2015-07-18  8:30 UTC (permalink / raw)
  To: linux-xtensa, linux-kernel
  Cc: Chris Zankel, Marc Gauthier, Max Filippov, stable

call12 can't be safely used as the first call in the inline function,
because the compiler does not extend the stack frame of the bounding
function accordingly, which may result in corruption of local variables.

If a call needs to be done, do call8 first followed by call12.

For pure assembly code in _switch_to increase stack frame size of the
bounding function.

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/traps.h | 29 +++++++++++++++++++----------
 arch/xtensa/kernel/entry.S      |  4 ++--
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index 677bfcf..28f33a8 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -25,30 +25,39 @@ static inline void spill_registers(void)
 {
 #if XCHAL_NUM_AREGS > 16
 	__asm__ __volatile__ (
-		"	call12	1f\n"
+		"	call8	1f\n"
 		"	_j	2f\n"
 		"	retw\n"
 		"	.align	4\n"
 		"1:\n"
+#if XCHAL_NUM_AREGS == 32
+		"	_entry	a1, 32\n"
+		"	addi	a8, a0, 3\n"
+		"	_entry	a1, 16\n"
+		"	mov	a12, a12\n"
+		"	retw\n"
+#else
 		"	_entry	a1, 48\n"
-		"	addi	a12, a0, 3\n"
-#if XCHAL_NUM_AREGS > 32
-		"	.rept	(" __stringify(XCHAL_NUM_AREGS) " - 32) / 12\n"
+		"	call12	1f\n"
+		"	retw\n"
+		"	.align	4\n"
+		"1:\n"
+		"	.rept	(" __stringify(XCHAL_NUM_AREGS) " - 16) / 12\n"
 		"	_entry	a1, 48\n"
 		"	mov	a12, a0\n"
 		"	.endr\n"
-#endif
-		"	_entry	a1, 48\n"
+		"	_entry	a1, 16\n"
 #if XCHAL_NUM_AREGS % 12 == 0
-		"	mov	a8, a8\n"
-#elif XCHAL_NUM_AREGS % 12 == 4
 		"	mov	a12, a12\n"
-#elif XCHAL_NUM_AREGS % 12 == 8
+#elif XCHAL_NUM_AREGS % 12 == 4
 		"	mov	a4, a4\n"
+#elif XCHAL_NUM_AREGS % 12 == 8
+		"	mov	a8, a8\n"
 #endif
 		"	retw\n"
+#endif
 		"2:\n"
-		: : : "a12", "a13", "memory");
+		: : : "a8", "a9", "memory");
 #else
 	__asm__ __volatile__ (
 		"	mov	a12, a12\n"
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index d060fc2..5771d5c 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1838,7 +1838,7 @@ ENDPROC(system_call)
 	mov	a12, a0
 	.endr
 #endif
-	_entry	a1, 48
+	_entry	a1, 16
 #if XCHAL_NUM_AREGS % 12 == 0
 	mov	a8, a8
 #elif XCHAL_NUM_AREGS % 12 == 4
@@ -1862,7 +1862,7 @@ ENDPROC(system_call)
 
 ENTRY(_switch_to)
 
-	entry	a1, 16
+	entry	a1, 48
 
 	mov	a11, a3			# and 'next' (a3)
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 11/13] xtensa: don't touch EXC_TABLE_FIXUP in _switch_to
  2015-07-18  8:30 [PATCH v2 00/13] Support hardware perf counters on xtensa Max Filippov
                   ` (9 preceding siblings ...)
  2015-07-18  8:30 ` [PATCH v2 10/13] xtensa: fix kernel register spilling Max Filippov
@ 2015-07-18  8:30 ` Max Filippov
  2015-07-18  8:30 ` [PATCH v2 12/13] xtensa: implement fake NMI Max Filippov
  2015-07-18  8:30 ` [PATCH v2 13/13] xtensa: drop unused irq_err_count Max Filippov
  12 siblings, 0 replies; 21+ messages in thread
From: Max Filippov @ 2015-07-18  8:30 UTC (permalink / raw)
  To: linux-xtensa, linux-kernel; +Cc: Chris Zankel, Marc Gauthier, Max Filippov

There's no way _switch_to can produce double exceptions now, don't
enter/leave EXC_TABLE_FIXUP critical section.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/entry.S | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 5771d5c..309e71b 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1883,9 +1883,7 @@ ENTRY(_switch_to)
 	/* Disable ints while we manipulate the stack pointer. */
 
 	rsil	a14, LOCKLEVEL
-	rsr	a3, excsave1
 	rsync
-	s32i	a3, a3, EXC_TABLE_FIXUP	/* enter critical section */
 
 	/* Switch CPENABLE */
 
@@ -1906,9 +1904,7 @@ ENTRY(_switch_to)
 	 */
 
 	rsr	a3, excsave1		# exc_table
-	movi	a6, 0
 	addi	a7, a5, PT_REGS_OFFSET
-	s32i	a6, a3, EXC_TABLE_FIXUP
 	s32i	a7, a3, EXC_TABLE_KSTK
 
 	/* restore context of the task 'next' */
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 12/13] xtensa: implement fake NMI
  2015-07-18  8:30 [PATCH v2 00/13] Support hardware perf counters on xtensa Max Filippov
                   ` (10 preceding siblings ...)
  2015-07-18  8:30 ` [PATCH v2 11/13] xtensa: don't touch EXC_TABLE_FIXUP in _switch_to Max Filippov
@ 2015-07-18  8:30 ` Max Filippov
  2015-07-27 14:36   ` Max Filippov
  2015-07-27 15:14   ` Peter Zijlstra
  2015-07-18  8:30 ` [PATCH v2 13/13] xtensa: drop unused irq_err_count Max Filippov
  12 siblings, 2 replies; 21+ messages in thread
From: Max Filippov @ 2015-07-18  8:30 UTC (permalink / raw)
  To: linux-xtensa, linux-kernel
  Cc: Chris Zankel, Marc Gauthier, Max Filippov, Peter Zijlstra

In case perf IRQ is the highest of the medium-level IRQs, and is alone
on its level, it may be treated as NMI:
- LOCKLEVEL is defined to be one level less than EXCM level,
- IRQ masking never lowers current IRQ level,
- new fake exception cause code, EXCCAUSE_MAPPED_NMI is assigned to that
  IRQ; new second level exception handler, do_nmi, assigned to it
  handles it as NMI,
- atomic operations in configurations without s32c1i still need to mask
  all interrupts.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/atomic.h    | 10 ++--
 arch/xtensa/include/asm/cmpxchg.h   |  4 +-
 arch/xtensa/include/asm/irqflags.h  | 22 ++++++++-
 arch/xtensa/include/asm/processor.h | 31 ++++++++++++-
 arch/xtensa/kernel/entry.S          | 93 +++++++++++++++++++++++++++++++------
 arch/xtensa/kernel/irq.c            |  8 ++++
 arch/xtensa/kernel/perf_event.c     |  6 ++-
 arch/xtensa/kernel/traps.c          | 26 +++++++++++
 arch/xtensa/kernel/vectors.S        | 10 +++-
 9 files changed, 183 insertions(+), 27 deletions(-)

diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 00b7d46..ebcd1f6 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -29,7 +29,7 @@
  *
  * Locking interrupts looks like this:
  *
- *    rsil a15, LOCKLEVEL
+ *    rsil a15, TOPLEVEL
  *    <code>
  *    wsr  a15, PS
  *    rsync
@@ -106,7 +106,7 @@ static inline void atomic_##op(int i, atomic_t * v)			\
 	unsigned int vval;						\
 									\
 	__asm__ __volatile__(						\
-			"       rsil    a15, "__stringify(LOCKLEVEL)"\n"\
+			"       rsil    a15, "__stringify(TOPLEVEL)"\n"\
 			"       l32i    %0, %2, 0\n"			\
 			"       " #op " %0, %0, %1\n"			\
 			"       s32i    %0, %2, 0\n"			\
@@ -124,7 +124,7 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 	unsigned int vval;						\
 									\
 	__asm__ __volatile__(						\
-			"       rsil    a15,"__stringify(LOCKLEVEL)"\n"	\
+			"       rsil    a15,"__stringify(TOPLEVEL)"\n"	\
 			"       l32i    %0, %2, 0\n"			\
 			"       " #op " %0, %0, %1\n"			\
 			"       s32i    %0, %2, 0\n"			\
@@ -272,7 +272,7 @@ static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
 	unsigned int vval;
 
 	__asm__ __volatile__(
-			"       rsil    a15,"__stringify(LOCKLEVEL)"\n"
+			"       rsil    a15,"__stringify(TOPLEVEL)"\n"
 			"       l32i    %0, %2, 0\n"
 			"       xor     %1, %4, %3\n"
 			"       and     %0, %0, %4\n"
@@ -306,7 +306,7 @@ static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
 	unsigned int vval;
 
 	__asm__ __volatile__(
-			"       rsil    a15,"__stringify(LOCKLEVEL)"\n"
+			"       rsil    a15,"__stringify(TOPLEVEL)"\n"
 			"       l32i    %0, %2, 0\n"
 			"       or      %0, %0, %1\n"
 			"       s32i    %0, %2, 0\n"
diff --git a/arch/xtensa/include/asm/cmpxchg.h b/arch/xtensa/include/asm/cmpxchg.h
index 370b26f..201e900 100644
--- a/arch/xtensa/include/asm/cmpxchg.h
+++ b/arch/xtensa/include/asm/cmpxchg.h
@@ -34,7 +34,7 @@ __cmpxchg_u32(volatile int *p, int old, int new)
 	return new;
 #else
 	__asm__ __volatile__(
-			"       rsil    a15, "__stringify(LOCKLEVEL)"\n"
+			"       rsil    a15, "__stringify(TOPLEVEL)"\n"
 			"       l32i    %0, %1, 0\n"
 			"       bne     %0, %2, 1f\n"
 			"       s32i    %3, %1, 0\n"
@@ -123,7 +123,7 @@ static inline unsigned long xchg_u32(volatile int * m, unsigned long val)
 #else
 	unsigned long tmp;
 	__asm__ __volatile__(
-			"       rsil    a15, "__stringify(LOCKLEVEL)"\n"
+			"       rsil    a15, "__stringify(TOPLEVEL)"\n"
 			"       l32i    %0, %1, 0\n"
 			"       s32i    %2, %1, 0\n"
 			"       wsr     a15, ps\n"
diff --git a/arch/xtensa/include/asm/irqflags.h b/arch/xtensa/include/asm/irqflags.h
index ea36674..8e090c7 100644
--- a/arch/xtensa/include/asm/irqflags.h
+++ b/arch/xtensa/include/asm/irqflags.h
@@ -6,6 +6,7 @@
  * for more details.
  *
  * Copyright (C) 2001 - 2005 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  */
 
 #ifndef _XTENSA_IRQFLAGS_H
@@ -23,8 +24,27 @@ static inline unsigned long arch_local_save_flags(void)
 static inline unsigned long arch_local_irq_save(void)
 {
 	unsigned long flags;
-	asm volatile("rsil %0, "__stringify(LOCKLEVEL)
+#if XTENSA_FAKE_NMI
+#if defined(CONFIG_DEBUG_KERNEL) && (LOCKLEVEL | TOPLEVEL) >= XCHAL_DEBUGLEVEL
+	unsigned long tmp;
+
+	asm volatile("rsr	%0, ps\t\n"
+		     "extui	%1, %0, 0, 4\t\n"
+		     "bgei	%1, "__stringify(LOCKLEVEL)", 1f\t\n"
+		     "rsil	%0, "__stringify(LOCKLEVEL)"\n"
+		     "1:"
+		     : "=a" (flags), "=a" (tmp) :: "memory");
+#else
+	asm volatile("rsr	%0, ps\t\n"
+		     "or	%0, %0, %1\t\n"
+		     "xsr	%0, ps\t\n"
+		     "rsync"
+		     : "=&a" (flags) : "a" (LOCKLEVEL) : "memory");
+#endif
+#else
+	asm volatile("rsil	%0, "__stringify(LOCKLEVEL)
 		     : "=a" (flags) :: "memory");
+#endif
 	return flags;
 }
 
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index b61bdf0..9ed9b4a 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -1,11 +1,10 @@
 /*
- * include/asm-xtensa/processor.h
- *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
  * Copyright (C) 2001 - 2008 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  */
 
 #ifndef _XTENSA_PROCESSOR_H
@@ -45,6 +44,14 @@
 #define STACK_TOP_MAX	STACK_TOP
 
 /*
+ * General exception cause assigned to fake NMI. Fake NMI needs to be handled
+ * differently from other interrupts, but it uses common kernel entry/exit
+ * code.
+ */
+
+#define EXCCAUSE_MAPPED_NMI	62
+
+/*
  * General exception cause assigned to debug exceptions. Debug exceptions go
  * to their own vector, rather than the general exception vectors (user,
  * kernel, double); and their specific causes are reported via DEBUGCAUSE
@@ -65,10 +72,30 @@
 
 #define VALID_DOUBLE_EXCEPTION_ADDRESS	64
 
+#define XTENSA_INT_LEVEL(a) _XTENSA_INT_LEVEL(a)
+#define _XTENSA_INT_LEVEL(a) XCHAL_INT##a##_LEVEL
+
+#define XTENSA_INTLEVEL_MASK(a) _XTENSA_INTLEVEL_MASK(a)
+#define _XTENSA_INTLEVEL_MASK(a) (XCHAL_INTLEVEL##a##_MASK)
+
+#define IS_POW2(v) (((v) & ((v) - 1)) == 0)
+
+#define PROFILING_INTLEVEL XTENSA_INT_LEVEL(XCHAL_PROFILING_INTERRUPT)
+
 /* LOCKLEVEL defines the interrupt level that masks all
  * general-purpose interrupts.
  */
+#if defined(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) && \
+	defined(XCHAL_PROFILING_INTERRUPT) && \
+	PROFILING_INTLEVEL == XCHAL_EXCM_LEVEL && \
+	XCHAL_EXCM_LEVEL > 1 && \
+	IS_POW2(XTENSA_INTLEVEL_MASK(PROFILING_INTLEVEL))
+#define LOCKLEVEL (XCHAL_EXCM_LEVEL - 1)
+#else
 #define LOCKLEVEL XCHAL_EXCM_LEVEL
+#endif
+#define TOPLEVEL XCHAL_EXCM_LEVEL
+#define XTENSA_FAKE_NMI (LOCKLEVEL < TOPLEVEL)
 
 /* WSBITS and WBBITS are the width of the WINDOWSTART and WINDOWBASE
  * registers
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 309e71b..dd766ed 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1,6 +1,4 @@
 /*
- * arch/xtensa/kernel/entry.S
- *
  * Low-level exception handling
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -8,6 +6,7 @@
  * for more details.
  *
  * Copyright (C) 2004 - 2008 by Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  *
  * Chris Zankel <chris@zankel.net>
  *
@@ -79,6 +78,27 @@
 #endif
 	.endm
 
+
+	.macro	irq_save flags tmp
+#if XTENSA_FAKE_NMI
+#if defined(CONFIG_DEBUG_KERNEL) && (LOCKLEVEL | TOPLEVEL) >= XCHAL_DEBUGLEVEL
+	rsr	\flags, ps
+	extui	\tmp, \flags, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+	bgei	\tmp, LOCKLEVEL, 99f
+	rsil	\tmp, LOCKLEVEL
+99:
+#else
+	movi	\tmp, LOCKLEVEL
+	rsr	\flags, ps
+	or	\flags, \flags, \tmp
+	xsr	\flags, ps
+	rsync
+#endif
+#else
+	rsil	\flags, LOCKLEVEL
+#endif
+	.endm
+
 /* ----------------- DEFAULT FIRST LEVEL EXCEPTION HANDLERS ----------------- */
 
 /*
@@ -366,11 +386,11 @@ common_exception:
 
 	/* It is now save to restore the EXC_TABLE_FIXUP variable. */
 
-	rsr	a0, exccause
+	rsr	a2, exccause
 	movi	a3, 0
-	rsr	a2, excsave1
-	s32i	a0, a1, PT_EXCCAUSE
-	s32i	a3, a2, EXC_TABLE_FIXUP
+	rsr	a0, excsave1
+	s32i	a2, a1, PT_EXCCAUSE
+	s32i	a3, a0, EXC_TABLE_FIXUP
 
 	/* All unrecoverable states are saved on stack, now, and a1 is valid.
 	 * Now we can allow exceptions again. In case we've got an interrupt
@@ -381,21 +401,48 @@ common_exception:
 	 */
 
 	rsr	a3, ps
-	addi	a0, a0, -EXCCAUSE_LEVEL1_INTERRUPT
-	movi	a2, LOCKLEVEL
+	s32i	a3, a1, PT_PS		# save ps
+
+#if XTENSA_FAKE_NMI
+	/* Correct PS needs to be saved in the PT_PS:
+	 * - in case of exception or level-1 interrupt it's in the PS,
+	 *   and is already saved.
+	 * - in case of medium level interrupt it's in the excsave2.
+	 */
+	movi	a0, EXCCAUSE_MAPPED_NMI
+	extui	a3, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+	beq	a2, a0, .Lmedium_level_irq
+	bnei	a2, EXCCAUSE_LEVEL1_INTERRUPT, .Lexception
+	beqz	a3, .Llevel1_irq	# level-1 IRQ sets ps.intlevel to 0
+
+.Lmedium_level_irq:
+	rsr	a0, excsave2
+	s32i	a0, a1, PT_PS		# save medium-level interrupt ps
+	bgei	a3, LOCKLEVEL, .Lexception
+
+.Llevel1_irq:
+	movi	a3, LOCKLEVEL
+
+.Lexception:
+	movi	a0, 1 << PS_WOE_BIT
+	or	a3, a3, a0
+#else
+	addi	a2, a2, -EXCCAUSE_LEVEL1_INTERRUPT
+	movi	a0, LOCKLEVEL
 	extui	a3, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
 					# a3 = PS.INTLEVEL
-	moveqz	a3, a2, a0		# a3 = LOCKLEVEL iff interrupt
+	moveqz	a3, a0, a2		# a3 = LOCKLEVEL iff interrupt
 	movi	a2, 1 << PS_WOE_BIT
 	or	a3, a3, a2
 	rsr	a2, exccause
+#endif
+
 #ifdef XTENSA_CONTINUOUS_STACK
 	/* restore return address (or 0 if return to userspace) */
 	rsr	a0, depc
 #endif
-	xsr	a3, ps
-
-	s32i	a3, a1, PT_PS		# save ps
+	wsr	a3, ps
+	rsync				# PS.WOE => rsync => overflow
 
 	/* Save lbeg, lend */
 
@@ -433,8 +480,13 @@ common_exception:
 	.global common_exception_return
 common_exception_return:
 
+#if XTENSA_FAKE_NMI
+	l32i	a2, a1, PT_EXCCAUSE
+	movi	a3, EXCCAUSE_MAPPED_NMI
+	beq	a2, a3, .LNMIexit
+#endif
 1:
-	rsil	a2, LOCKLEVEL
+	irq_save a2, a3
 #ifdef CONFIG_TRACE_IRQFLAGS
 	movi	a4, trace_hardirqs_off
 	callx4	a4
@@ -497,6 +549,12 @@ common_exception_return:
 	j	1b
 #endif
 
+#if XTENSA_FAKE_NMI
+.LNMIexit:
+	l32i	a3, a1, PT_PS
+	_bbci.l	a3, PS_UM_BIT, 4f
+#endif
+
 5:
 #ifdef CONFIG_DEBUG_TLB_SANITY
 	l32i	a4, a1, PT_DEPC
@@ -1579,6 +1637,13 @@ ENTRY(fast_second_level_miss)
 	rfde
 
 9:	l32i	a0, a1, TASK_ACTIVE_MM	# unlikely case mm == 0
+	bnez	a0, 8b
+
+	/* Even more unlikely case active_mm == 0.
+	 * We can get here with NMI in the middle of context_switch that
+	 * touches vmalloc area.
+	 */
+	movi	a0, init_mm
 	j	8b
 
 #if (DCACHE_WAY_SIZE > PAGE_SIZE)
@@ -1882,7 +1947,7 @@ ENTRY(_switch_to)
 
 	/* Disable ints while we manipulate the stack pointer. */
 
-	rsil	a14, LOCKLEVEL
+	irq_save a14, a3
 	rsync
 
 	/* Switch CPENABLE */
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 32b6056..8d4f5de 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -29,6 +29,7 @@
 #include <asm/platform.h>
 
 atomic_t irq_err_count;
+DECLARE_PER_CPU(unsigned long, nmi_count);
 
 asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
 {
@@ -57,11 +58,18 @@ asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
 
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
+	unsigned cpu __maybe_unused;
 #ifdef CONFIG_SMP
 	show_ipi_list(p, prec);
 #endif
 	seq_printf(p, "%*s: ", prec, "ERR");
 	seq_printf(p, "%10u\n", atomic_read(&irq_err_count));
+#ifdef XTENSA_FAKE_NMI
+	seq_printf(p, "%*s:", prec, "NMI");
+	for_each_online_cpu(cpu)
+		seq_printf(p, " %10lu", per_cpu(nmi_count, cpu));
+	seq_puts(p, "   Non-maskable interrupts\n");
+#endif
 	return 0;
 }
 
diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
index b44df3c..54f0118 100644
--- a/arch/xtensa/kernel/perf_event.c
+++ b/arch/xtensa/kernel/perf_event.c
@@ -359,7 +359,7 @@ void perf_event_print_debug(void)
 	local_irq_restore(flags);
 }
 
-static irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
+irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
 {
 	irqreturn_t rc = IRQ_NONE;
 	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
@@ -436,10 +436,14 @@ static int __init xtensa_pmu_init(void)
 	int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
 
 	perf_cpu_notifier(xtensa_pmu_notifier);
+#if XTENSA_FAKE_NMI
+	enable_irq(irq);
+#else
 	ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
 			  "pmu", NULL);
 	if (ret < 0)
 		return ret;
+#endif
 
 	ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
 	if (ret)
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index a1b5bd2..42d441f 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -62,6 +62,7 @@ extern void fast_coprocessor(void);
 
 extern void do_illegal_instruction (struct pt_regs*);
 extern void do_interrupt (struct pt_regs*);
+extern void do_nmi(struct pt_regs *);
 extern void do_unaligned_user (struct pt_regs*);
 extern void do_multihit (struct pt_regs*, unsigned long);
 extern void do_page_fault (struct pt_regs*, unsigned long);
@@ -146,6 +147,9 @@ COPROCESSOR(6),
 #if XTENSA_HAVE_COPROCESSOR(7)
 COPROCESSOR(7),
 #endif
+#if XTENSA_FAKE_NMI
+{ EXCCAUSE_MAPPED_NMI,			0,		do_nmi },
+#endif
 { EXCCAUSE_MAPPED_DEBUG,		0,		do_debug },
 { -1, -1, 0 }
 
@@ -199,6 +203,28 @@ void do_multihit(struct pt_regs *regs, unsigned long exccause)
 
 extern void do_IRQ(int, struct pt_regs *);
 
+#if XTENSA_FAKE_NMI
+
+irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id);
+
+DEFINE_PER_CPU(unsigned long, nmi_count);
+
+void do_nmi(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+
+	if ((regs->ps & PS_INTLEVEL_MASK) < LOCKLEVEL)
+		trace_hardirqs_off();
+
+	old_regs = set_irq_regs(regs);
+	nmi_enter();
+	++*this_cpu_ptr(&nmi_count);
+	xtensa_pmu_irq_handler(0, NULL);
+	nmi_exit();
+	set_irq_regs(old_regs);
+}
+#endif
+
 void do_interrupt(struct pt_regs *regs)
 {
 	static const unsigned int_level_mask[] = {
diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S
index 1b397a9..abcdb52 100644
--- a/arch/xtensa/kernel/vectors.S
+++ b/arch/xtensa/kernel/vectors.S
@@ -627,7 +627,11 @@ ENTRY(_Level\level\()InterruptVector)
 	wsr	a0, excsave2
 	rsr	a0, epc\level
 	wsr	a0, epc1
+	.if	\level <= LOCKLEVEL
 	movi	a0, EXCCAUSE_LEVEL1_INTERRUPT
+	.else
+	movi	a0, EXCCAUSE_MAPPED_NMI
+	.endif
 	wsr	a0, exccause
 	rsr	a0, eps\level
 					# branch to user or kernel vector
@@ -682,11 +686,13 @@ ENDPROC(_WindowOverflow4)
 	.align 4
 _SimulateUserKernelVectorException:
 	addi	a0, a0, (1 << PS_EXCM_BIT)
+#if !XTENSA_FAKE_NMI
 	wsr	a0, ps
+#endif
 	bbsi.l	a0, PS_UM_BIT, 1f	# branch if user mode
-	rsr	a0, excsave2		# restore a0
+	xsr	a0, excsave2		# restore a0
 	j	_KernelExceptionVector	# simulate kernel vector exception
-1:	rsr	a0, excsave2		# restore a0
+1:	xsr	a0, excsave2		# restore a0
 	j	_UserExceptionVector	# simulate user vector exception
 #endif
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 13/13] xtensa: drop unused irq_err_count
  2015-07-18  8:30 [PATCH v2 00/13] Support hardware perf counters on xtensa Max Filippov
                   ` (11 preceding siblings ...)
  2015-07-18  8:30 ` [PATCH v2 12/13] xtensa: implement fake NMI Max Filippov
@ 2015-07-18  8:30 ` Max Filippov
  12 siblings, 0 replies; 21+ messages in thread
From: Max Filippov @ 2015-07-18  8:30 UTC (permalink / raw)
  To: linux-xtensa, linux-kernel; +Cc: Chris Zankel, Marc Gauthier, Max Filippov

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/irq.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 8d4f5de..4228603 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -28,7 +28,6 @@
 #include <asm/uaccess.h>
 #include <asm/platform.h>
 
-atomic_t irq_err_count;
 DECLARE_PER_CPU(unsigned long, nmi_count);
 
 asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
@@ -62,8 +61,6 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 #ifdef CONFIG_SMP
 	show_ipi_list(p, prec);
 #endif
-	seq_printf(p, "%*s: ", prec, "ERR");
-	seq_printf(p, "%10u\n", atomic_read(&irq_err_count));
 #ifdef XTENSA_FAKE_NMI
 	seq_printf(p, "%*s:", prec, "NMI");
 	for_each_online_cpu(cpu)
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 12/13] xtensa: implement fake NMI
  2015-07-18  8:30 ` [PATCH v2 12/13] xtensa: implement fake NMI Max Filippov
@ 2015-07-27 14:36   ` Max Filippov
  2015-07-27 15:14   ` Peter Zijlstra
  1 sibling, 0 replies; 21+ messages in thread
From: Max Filippov @ 2015-07-27 14:36 UTC (permalink / raw)
  To: linux-xtensa, LKML
  Cc: Chris Zankel, Marc Gauthier, Max Filippov, Peter Zijlstra

On Sat, Jul 18, 2015 at 11:30 AM, Max Filippov <jcmvbkbc@gmail.com> wrote:
> In case perf IRQ is the highest of the medium-level IRQs, and is alone
> on its level, it may be treated as NMI:
> - LOCKLEVEL is defined to be one level less than EXCM level,
> - IRQ masking never lowers current IRQ level,
> - new fake exception cause code, EXCCAUSE_MAPPED_NMI is assigned to that
>   IRQ; new second level exception handler, do_nmi, assigned to it
>   handles it as NMI,
> - atomic operations in configurations without s32c1i still need to mask
>   all interrupts.
>
> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
> ---
>  arch/xtensa/include/asm/atomic.h    | 10 ++--
>  arch/xtensa/include/asm/cmpxchg.h   |  4 +-
>  arch/xtensa/include/asm/irqflags.h  | 22 ++++++++-
>  arch/xtensa/include/asm/processor.h | 31 ++++++++++++-
>  arch/xtensa/kernel/entry.S          | 93 +++++++++++++++++++++++++++++++------
>  arch/xtensa/kernel/irq.c            |  8 ++++
>  arch/xtensa/kernel/perf_event.c     |  6 ++-
>  arch/xtensa/kernel/traps.c          | 26 +++++++++++
>  arch/xtensa/kernel/vectors.S        | 10 +++-
>  9 files changed, 183 insertions(+), 27 deletions(-)

Ping?

-- 
Thanks.
-- Max

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 12/13] xtensa: implement fake NMI
  2015-07-18  8:30 ` [PATCH v2 12/13] xtensa: implement fake NMI Max Filippov
  2015-07-27 14:36   ` Max Filippov
@ 2015-07-27 15:14   ` Peter Zijlstra
  1 sibling, 0 replies; 21+ messages in thread
From: Peter Zijlstra @ 2015-07-27 15:14 UTC (permalink / raw)
  To: Max Filippov; +Cc: linux-xtensa, linux-kernel, Chris Zankel, Marc Gauthier

On Sat, Jul 18, 2015 at 11:30:15AM +0300, Max Filippov wrote:
> In case perf IRQ is the highest of the medium-level IRQs, and is alone
> on its level, it may be treated as NMI:
> - LOCKLEVEL is defined to be one level less than EXCM level,
> - IRQ masking never lowers current IRQ level,
> - new fake exception cause code, EXCCAUSE_MAPPED_NMI is assigned to that
>   IRQ; new second level exception handler, do_nmi, assigned to it
>   handles it as NMI,
> - atomic operations in configurations without s32c1i still need to mask
>   all interrupts.
> 
> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
> ---
>  arch/xtensa/include/asm/atomic.h    | 10 ++--
>  arch/xtensa/include/asm/cmpxchg.h   |  4 +-
>  arch/xtensa/include/asm/irqflags.h  | 22 ++++++++-
>  arch/xtensa/include/asm/processor.h | 31 ++++++++++++-
>  arch/xtensa/kernel/entry.S          | 93 +++++++++++++++++++++++++++++++------
>  arch/xtensa/kernel/irq.c            |  8 ++++
>  arch/xtensa/kernel/perf_event.c     |  6 ++-
>  arch/xtensa/kernel/traps.c          | 26 +++++++++++
>  arch/xtensa/kernel/vectors.S        | 10 +++-
>  9 files changed, 183 insertions(+), 27 deletions(-)

Looks about right, I've not really gone over the asm bits through.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 07/13] xtensa: implement counting and sampling perf events
  2015-07-18  8:30 ` [PATCH v2 07/13] xtensa: implement counting and sampling " Max Filippov
@ 2015-08-06 19:46   ` Arnaldo Carvalho de Melo
  2015-08-07  9:13     ` Peter Zijlstra
  0 siblings, 1 reply; 21+ messages in thread
From: Arnaldo Carvalho de Melo @ 2015-08-06 19:46 UTC (permalink / raw)
  To: Peter Zijlstra, Max Filippov
  Cc: linux-xtensa, linux-kernel, Chris Zankel, Marc Gauthier,
	Peter Zijlstra, Paul Mackerras, Ingo Molnar

Em Sat, Jul 18, 2015 at 11:30:10AM +0300, Max Filippov escreveu:
> Xtensa Performance Monitor Module has up to 8 32 bit wide performance
> counters. Each counter may be enabled independently and can count any
> single type of hardware performance events. Event counting may be enabled
> and disabled globally (per PMM).
> Each counter has status register with bits indicating if the counter has
> been overflown and may be programmed to raise profiling IRQ on overflow.
> This IRQ is used to rewind counters and allow for counting more than 2^32
> samples for counting events and to report samples for sampling events.
> 
> For more details see Tensilica Debug User's Guide, chapter 8
> "Performance monitor module".

Has this gone via PeterZ? I added the tools/ bits in my perf/core
branch, will go in next pull req,

- Arnaldo
 
> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
> ---
> Changes v1->v2:
> - use -EINVAL instead of -ENOENT for invalid PMU event configuratons.
> 
>  arch/xtensa/Kconfig             |  10 +
>  arch/xtensa/kernel/Makefile     |   1 +
>  arch/xtensa/kernel/perf_event.c | 450 ++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 461 insertions(+)
>  create mode 100644 arch/xtensa/kernel/perf_event.c
> 
> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
> index 3c57934..0e92885 100644
> --- a/arch/xtensa/Kconfig
> +++ b/arch/xtensa/Kconfig
> @@ -126,6 +126,16 @@ config XTENSA_VARIANT_MMU
>  	  Build a Conventional Kernel with full MMU support,
>  	  ie: it supports a TLB with auto-loading, page protection.
>  
> +config XTENSA_VARIANT_HAVE_PERF_EVENTS
> +	bool "Core variant has Performance Monitor Module"
> +	depends on XTENSA_VARIANT_CUSTOM
> +	default n
> +	help
> +	  Enable if core variant has Performance Monitor Module with
> +	  External Registers Interface.
> +
> +	  If unsure, say N.
> +
>  config XTENSA_UNALIGNED_USER
>  	bool "Unaligned memory access in use space"
>  	help
> diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
> index d3a0f0f..547a757 100644
> --- a/arch/xtensa/kernel/Makefile
> +++ b/arch/xtensa/kernel/Makefile
> @@ -13,6 +13,7 @@ obj-$(CONFIG_PCI) += pci.o
>  obj-$(CONFIG_MODULES) += xtensa_ksyms.o module.o
>  obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
>  obj-$(CONFIG_SMP) += smp.o mxhead.o
> +obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o
>  
>  AFLAGS_head.o += -mtext-section-literals
>  
> diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
> new file mode 100644
> index 0000000..b44df3c
> --- /dev/null
> +++ b/arch/xtensa/kernel/perf_event.c
> @@ -0,0 +1,450 @@
> +/*
> + * Xtensa Performance Monitor Module driver
> + * See Tensilica Debug User's Guide for PMU registers documentation.
> + *
> + * Copyright (C) 2015 Cadence Design Systems Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/interrupt.h>
> +#include <linux/irqdomain.h>
> +#include <linux/module.h>
> +#include <linux/of.h>
> +#include <linux/perf_event.h>
> +#include <linux/platform_device.h>
> +
> +#include <asm/processor.h>
> +#include <asm/stacktrace.h>
> +
> +/* Global control/status for all perf counters */
> +#define XTENSA_PMU_PMG			0x1000
> +/* Perf counter values */
> +#define XTENSA_PMU_PM(i)		(0x1080 + (i) * 4)
> +/* Perf counter control registers */
> +#define XTENSA_PMU_PMCTRL(i)		(0x1100 + (i) * 4)
> +/* Perf counter status registers */
> +#define XTENSA_PMU_PMSTAT(i)		(0x1180 + (i) * 4)
> +
> +#define XTENSA_PMU_PMG_PMEN		0x1
> +
> +#define XTENSA_PMU_COUNTER_MASK		0xffffffffULL
> +#define XTENSA_PMU_COUNTER_MAX		0x7fffffff
> +
> +#define XTENSA_PMU_PMCTRL_INTEN		0x00000001
> +#define XTENSA_PMU_PMCTRL_KRNLCNT	0x00000008
> +#define XTENSA_PMU_PMCTRL_TRACELEVEL	0x000000f0
> +#define XTENSA_PMU_PMCTRL_SELECT_SHIFT	8
> +#define XTENSA_PMU_PMCTRL_SELECT	0x00001f00
> +#define XTENSA_PMU_PMCTRL_MASK_SHIFT	16
> +#define XTENSA_PMU_PMCTRL_MASK		0xffff0000
> +
> +#define XTENSA_PMU_MASK(select, mask) \
> +	(((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
> +	 ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
> +	 XTENSA_PMU_PMCTRL_TRACELEVEL | \
> +	 XTENSA_PMU_PMCTRL_INTEN)
> +
> +#define XTENSA_PMU_PMSTAT_OVFL		0x00000001
> +#define XTENSA_PMU_PMSTAT_INTASRT	0x00000010
> +
> +struct xtensa_pmu_events {
> +	/* Array of events currently on this core */
> +	struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
> +	/* Bitmap of used hardware counters */
> +	unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
> +};
> +static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
> +
> +static const u32 xtensa_hw_ctl[] = {
> +	[PERF_COUNT_HW_CPU_CYCLES]		= XTENSA_PMU_MASK(0, 0x1),
> +	[PERF_COUNT_HW_INSTRUCTIONS]		= XTENSA_PMU_MASK(2, 0xffff),
> +	[PERF_COUNT_HW_CACHE_REFERENCES]	= XTENSA_PMU_MASK(10, 0x1),
> +	[PERF_COUNT_HW_CACHE_MISSES]		= XTENSA_PMU_MASK(12, 0x1),
> +	/* Taken and non-taken branches + taken loop ends */
> +	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= XTENSA_PMU_MASK(2, 0x490),
> +	/* Instruction-related + other global stall cycles */
> +	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= XTENSA_PMU_MASK(4, 0x1ff),
> +	/* Data-related global stall cycles */
> +	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= XTENSA_PMU_MASK(3, 0x1ff),
> +};
> +
> +#define C(_x) PERF_COUNT_HW_CACHE_##_x
> +
> +static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
> +	[C(L1D)] = {
> +		[C(OP_READ)] = {
> +			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(10, 0x1),
> +			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(10, 0x2),
> +		},
> +		[C(OP_WRITE)] = {
> +			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(11, 0x1),
> +			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(11, 0x2),
> +		},
> +	},
> +	[C(L1I)] = {
> +		[C(OP_READ)] = {
> +			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(8, 0x1),
> +			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(8, 0x2),
> +		},
> +	},
> +	[C(DTLB)] = {
> +		[C(OP_READ)] = {
> +			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(9, 0x1),
> +			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(9, 0x8),
> +		},
> +	},
> +	[C(ITLB)] = {
> +		[C(OP_READ)] = {
> +			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(7, 0x1),
> +			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(7, 0x8),
> +		},
> +	},
> +};
> +
> +static int xtensa_pmu_cache_event(u64 config)
> +{
> +	unsigned int cache_type, cache_op, cache_result;
> +	int ret;
> +
> +	cache_type = (config >>  0) & 0xff;
> +	cache_op = (config >>  8) & 0xff;
> +	cache_result = (config >> 16) & 0xff;
> +
> +	if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
> +	    cache_op >= C(OP_MAX) ||
> +	    cache_result >= C(RESULT_MAX))
> +		return -EINVAL;
> +
> +	ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
> +
> +	if (ret == 0)
> +		return -EINVAL;
> +
> +	return ret;
> +}
> +
> +static inline uint32_t xtensa_pmu_read_counter(int idx)
> +{
> +	return get_er(XTENSA_PMU_PM(idx));
> +}
> +
> +static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
> +{
> +	set_er(v, XTENSA_PMU_PM(idx));
> +}
> +
> +static void xtensa_perf_event_update(struct perf_event *event,
> +				     struct hw_perf_event *hwc, int idx)
> +{
> +	uint64_t prev_raw_count, new_raw_count;
> +	int64_t delta;
> +
> +	do {
> +		prev_raw_count = local64_read(&hwc->prev_count);
> +		new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
> +	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
> +				 new_raw_count) != prev_raw_count);
> +
> +	delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
> +
> +	local64_add(delta, &event->count);
> +	local64_sub(delta, &hwc->period_left);
> +}
> +
> +static bool xtensa_perf_event_set_period(struct perf_event *event,
> +					 struct hw_perf_event *hwc, int idx)
> +{
> +	bool rc = false;
> +	s64 left;
> +
> +	if (!is_sampling_event(event)) {
> +		left = XTENSA_PMU_COUNTER_MAX;
> +	} else {
> +		s64 period = hwc->sample_period;
> +
> +		left = local64_read(&hwc->period_left);
> +		if (left <= -period) {
> +			left = period;
> +			local64_set(&hwc->period_left, left);
> +			hwc->last_period = period;
> +			rc = true;
> +		} else if (left <= 0) {
> +			left += period;
> +			local64_set(&hwc->period_left, left);
> +			hwc->last_period = period;
> +			rc = true;
> +		}
> +		if (left > XTENSA_PMU_COUNTER_MAX)
> +			left = XTENSA_PMU_COUNTER_MAX;
> +	}
> +
> +	local64_set(&hwc->prev_count, -left);
> +	xtensa_pmu_write_counter(idx, -left);
> +	perf_event_update_userpage(event);
> +
> +	return rc;
> +}
> +
> +static void xtensa_pmu_enable(struct pmu *pmu)
> +{
> +	set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
> +}
> +
> +static void xtensa_pmu_disable(struct pmu *pmu)
> +{
> +	set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
> +}
> +
> +static int xtensa_pmu_event_init(struct perf_event *event)
> +{
> +	int ret;
> +
> +	switch (event->attr.type) {
> +	case PERF_TYPE_HARDWARE:
> +		if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
> +		    xtensa_hw_ctl[event->attr.config] == 0)
> +			return -EINVAL;
> +		event->hw.config = xtensa_hw_ctl[event->attr.config];
> +		return 0;
> +
> +	case PERF_TYPE_HW_CACHE:
> +		ret = xtensa_pmu_cache_event(event->attr.config);
> +		if (ret < 0)
> +			return ret;
> +		event->hw.config = ret;
> +		return 0;
> +
> +	case PERF_TYPE_RAW:
> +		/* Not 'previous counter' select */
> +		if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
> +		    (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
> +			return -EINVAL;
> +		event->hw.config = (event->attr.config &
> +				    (XTENSA_PMU_PMCTRL_KRNLCNT |
> +				     XTENSA_PMU_PMCTRL_TRACELEVEL |
> +				     XTENSA_PMU_PMCTRL_SELECT |
> +				     XTENSA_PMU_PMCTRL_MASK)) |
> +			XTENSA_PMU_PMCTRL_INTEN;
> +		return 0;
> +
> +	default:
> +		return -ENOENT;
> +	}
> +}
> +
> +/*
> + * Starts/Stops a counter present on the PMU. The PMI handler
> + * should stop the counter when perf_event_overflow() returns
> + * !0. ->start() will be used to continue.
> + */
> +static void xtensa_pmu_start(struct perf_event *event, int flags)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +	int idx = hwc->idx;
> +
> +	if (WARN_ON_ONCE(idx == -1))
> +		return;
> +
> +	if (flags & PERF_EF_RELOAD) {
> +		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
> +		xtensa_perf_event_set_period(event, hwc, idx);
> +	}
> +
> +	hwc->state = 0;
> +
> +	set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
> +}
> +
> +static void xtensa_pmu_stop(struct perf_event *event, int flags)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +	int idx = hwc->idx;
> +
> +	if (!(hwc->state & PERF_HES_STOPPED)) {
> +		set_er(0, XTENSA_PMU_PMCTRL(idx));
> +		set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
> +		       XTENSA_PMU_PMSTAT(idx));
> +		hwc->state |= PERF_HES_STOPPED;
> +	}
> +
> +	if ((flags & PERF_EF_UPDATE) &&
> +	    !(event->hw.state & PERF_HES_UPTODATE)) {
> +		xtensa_perf_event_update(event, &event->hw, idx);
> +		event->hw.state |= PERF_HES_UPTODATE;
> +	}
> +}
> +
> +/*
> + * Adds/Removes a counter to/from the PMU, can be done inside
> + * a transaction, see the ->*_txn() methods.
> + */
> +static int xtensa_pmu_add(struct perf_event *event, int flags)
> +{
> +	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
> +	struct hw_perf_event *hwc = &event->hw;
> +	int idx = hwc->idx;
> +
> +	if (__test_and_set_bit(idx, ev->used_mask)) {
> +		idx = find_first_zero_bit(ev->used_mask,
> +					  XCHAL_NUM_PERF_COUNTERS);
> +		if (idx == XCHAL_NUM_PERF_COUNTERS)
> +			return -EAGAIN;
> +
> +		__set_bit(idx, ev->used_mask);
> +		hwc->idx = idx;
> +	}
> +	ev->event[idx] = event;
> +
> +	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
> +
> +	if (flags & PERF_EF_START)
> +		xtensa_pmu_start(event, PERF_EF_RELOAD);
> +
> +	perf_event_update_userpage(event);
> +	return 0;
> +}
> +
> +static void xtensa_pmu_del(struct perf_event *event, int flags)
> +{
> +	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
> +
> +	xtensa_pmu_stop(event, PERF_EF_UPDATE);
> +	__clear_bit(event->hw.idx, ev->used_mask);
> +	perf_event_update_userpage(event);
> +}
> +
> +static void xtensa_pmu_read(struct perf_event *event)
> +{
> +	xtensa_perf_event_update(event, &event->hw, event->hw.idx);
> +}
> +
> +static int callchain_trace(struct stackframe *frame, void *data)
> +{
> +	struct perf_callchain_entry *entry = data;
> +
> +	perf_callchain_store(entry, frame->pc);
> +	return 0;
> +}
> +
> +void perf_callchain_kernel(struct perf_callchain_entry *entry,
> +			   struct pt_regs *regs)
> +{
> +	xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
> +				callchain_trace, NULL, entry);
> +}
> +
> +void perf_callchain_user(struct perf_callchain_entry *entry,
> +			 struct pt_regs *regs)
> +{
> +	xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
> +			      callchain_trace, entry);
> +}
> +
> +void perf_event_print_debug(void)
> +{
> +	unsigned long flags;
> +	unsigned i;
> +
> +	local_irq_save(flags);
> +	pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
> +		get_er(XTENSA_PMU_PMG));
> +	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
> +		pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
> +			i, get_er(XTENSA_PMU_PM(i)),
> +			i, get_er(XTENSA_PMU_PMCTRL(i)),
> +			i, get_er(XTENSA_PMU_PMSTAT(i)));
> +	local_irq_restore(flags);
> +}
> +
> +static irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
> +{
> +	irqreturn_t rc = IRQ_NONE;
> +	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
> +	unsigned i;
> +
> +	for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS);
> +	     i < XCHAL_NUM_PERF_COUNTERS;
> +	     i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) {
> +		uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
> +		struct perf_event *event = ev->event[i];
> +		struct hw_perf_event *hwc = &event->hw;
> +		u64 last_period;
> +
> +		if (!(v & XTENSA_PMU_PMSTAT_OVFL))
> +			continue;
> +
> +		set_er(v, XTENSA_PMU_PMSTAT(i));
> +		xtensa_perf_event_update(event, hwc, i);
> +		last_period = hwc->last_period;
> +		if (xtensa_perf_event_set_period(event, hwc, i)) {
> +			struct perf_sample_data data;
> +			struct pt_regs *regs = get_irq_regs();
> +
> +			perf_sample_data_init(&data, 0, last_period);
> +			if (perf_event_overflow(event, &data, regs))
> +				xtensa_pmu_stop(event, 0);
> +		}
> +
> +		rc = IRQ_HANDLED;
> +	}
> +	return rc;
> +}
> +
> +static struct pmu xtensa_pmu = {
> +	.pmu_enable = xtensa_pmu_enable,
> +	.pmu_disable = xtensa_pmu_disable,
> +	.event_init = xtensa_pmu_event_init,
> +	.add = xtensa_pmu_add,
> +	.del = xtensa_pmu_del,
> +	.start = xtensa_pmu_start,
> +	.stop = xtensa_pmu_stop,
> +	.read = xtensa_pmu_read,
> +};
> +
> +static void xtensa_pmu_setup(void)
> +{
> +	unsigned i;
> +
> +	set_er(0, XTENSA_PMU_PMG);
> +	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
> +		set_er(0, XTENSA_PMU_PMCTRL(i));
> +		set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
> +	}
> +}
> +
> +static int xtensa_pmu_notifier(struct notifier_block *self,
> +			       unsigned long action, void *data)
> +{
> +	switch (action & ~CPU_TASKS_FROZEN) {
> +	case CPU_STARTING:
> +		xtensa_pmu_setup();
> +		break;
> +
> +	default:
> +		break;
> +	}
> +
> +	return NOTIFY_OK;
> +}
> +
> +static int __init xtensa_pmu_init(void)
> +{
> +	int ret;
> +	int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
> +
> +	perf_cpu_notifier(xtensa_pmu_notifier);
> +	ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
> +			  "pmu", NULL);
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
> +	if (ret)
> +		free_irq(irq, NULL);
> +
> +	return ret;
> +}
> +early_initcall(xtensa_pmu_init);
> -- 
> 1.8.1.4

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [tip:perf/core] perf tools xtensa: Add DWARF register names
  2015-07-18  8:30 ` [PATCH v2 08/13] perf tools: xtensa: add DWARF register names Max Filippov
@ 2015-08-07  7:22   ` tip-bot for Max Filippov
  0 siblings, 0 replies; 21+ messages in thread
From: tip-bot for Max Filippov @ 2015-08-07  7:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mingo, paulus, tglx, linux-kernel, chris, jcmvbkbc, acme,
	a.p.zijlstra, marc, hpa

Commit-ID:  74d4582f430a797564f92fbff0bd3a21945528b7
Gitweb:     http://git.kernel.org/tip/74d4582f430a797564f92fbff0bd3a21945528b7
Author:     Max Filippov <jcmvbkbc@gmail.com>
AuthorDate: Sat, 18 Jul 2015 11:30:11 +0300
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Thu, 6 Aug 2015 16:45:05 -0300

perf tools xtensa: Add DWARF register names

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Marc Gauthier <marc@cadence.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: linux-xtensa@linux-xtensa.org
Link: http://lkml.kernel.org/r/1437208216-15729-9-git-send-email-jcmvbkbc@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/{powerpc => xtensa}/Build |  0
 tools/perf/arch/{arm => xtensa}/Makefile  |  0
 tools/perf/arch/{sh => xtensa}/util/Build |  0
 tools/perf/arch/xtensa/util/dwarf-regs.c  | 25 +++++++++++++++++++++++++
 4 files changed, 25 insertions(+)

diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/xtensa/Build
similarity index 100%
copy from tools/perf/arch/powerpc/Build
copy to tools/perf/arch/xtensa/Build
diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/xtensa/Makefile
similarity index 100%
copy from tools/perf/arch/arm/Makefile
copy to tools/perf/arch/xtensa/Makefile
diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/xtensa/util/Build
similarity index 100%
copy from tools/perf/arch/sh/util/Build
copy to tools/perf/arch/xtensa/util/Build
diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c
new file mode 100644
index 0000000..4dba76b
--- /dev/null
+++ b/tools/perf/arch/xtensa/util/dwarf-regs.c
@@ -0,0 +1,25 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (c) 2015 Cadence Design Systems Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+#define XTENSA_MAX_REGS 16
+
+const char *xtensa_regs_table[XTENSA_MAX_REGS] = {
+	"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+	"a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
+};
+
+const char *get_arch_regstr(unsigned int n)
+{
+	return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL;
+}

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 07/13] xtensa: implement counting and sampling perf events
  2015-08-06 19:46   ` Arnaldo Carvalho de Melo
@ 2015-08-07  9:13     ` Peter Zijlstra
  2015-08-07 12:03       ` Max Filippov
  0 siblings, 1 reply; 21+ messages in thread
From: Peter Zijlstra @ 2015-08-07  9:13 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Max Filippov, linux-xtensa, linux-kernel, Chris Zankel,
	Marc Gauthier, Paul Mackerras, Ingo Molnar

On Thu, Aug 06, 2015 at 04:46:32PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Sat, Jul 18, 2015 at 11:30:10AM +0300, Max Filippov escreveu:
> > Xtensa Performance Monitor Module has up to 8 32 bit wide performance
> > counters. Each counter may be enabled independently and can count any
> > single type of hardware performance events. Event counting may be enabled
> > and disabled globally (per PMM).
> > Each counter has status register with bits indicating if the counter has
> > been overflown and may be programmed to raise profiling IRQ on overflow.
> > This IRQ is used to rewind counters and allow for counting more than 2^32
> > samples for counting events and to report samples for sampling events.
> > 
> > For more details see Tensilica Debug User's Guide, chapter 8
> > "Performance monitor module".
> 
> Has this gone via PeterZ? I added the tools/ bits in my perf/core
> branch, will go in next pull req,

I was thinking it would go through the xtensa tree. Looks fine at a
quick glance and they can actually test it.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 07/13] xtensa: implement counting and sampling perf events
  2015-08-07  9:13     ` Peter Zijlstra
@ 2015-08-07 12:03       ` Max Filippov
  2015-08-07 13:12         ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 21+ messages in thread
From: Max Filippov @ 2015-08-07 12:03 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Arnaldo Carvalho de Melo, linux-xtensa, LKML, Chris Zankel,
	Marc Gauthier, Paul Mackerras, Ingo Molnar

On Fri, Aug 7, 2015 at 12:13 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Thu, Aug 06, 2015 at 04:46:32PM -0300, Arnaldo Carvalho de Melo wrote:
>> Em Sat, Jul 18, 2015 at 11:30:10AM +0300, Max Filippov escreveu:
>> > Xtensa Performance Monitor Module has up to 8 32 bit wide performance
>> > counters. Each counter may be enabled independently and can count any
>> > single type of hardware performance events. Event counting may be enabled
>> > and disabled globally (per PMM).
>> > Each counter has status register with bits indicating if the counter has
>> > been overflown and may be programmed to raise profiling IRQ on overflow.
>> > This IRQ is used to rewind counters and allow for counting more than 2^32
>> > samples for counting events and to report samples for sampling events.
>> >
>> > For more details see Tensilica Debug User's Guide, chapter 8
>> > "Performance monitor module".
>>
>> Has this gone via PeterZ? I added the tools/ bits in my perf/core
>> branch, will go in next pull req,
>
> I was thinking it would go through the xtensa tree. Looks fine at a
> quick glance and they can actually test it.

Right, had the same idea.

-- 
Thanks.
-- Max

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 07/13] xtensa: implement counting and sampling perf events
  2015-08-07 12:03       ` Max Filippov
@ 2015-08-07 13:12         ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 21+ messages in thread
From: Arnaldo Carvalho de Melo @ 2015-08-07 13:12 UTC (permalink / raw)
  To: Max Filippov
  Cc: Peter Zijlstra, linux-xtensa, LKML, Chris Zankel, Marc Gauthier,
	Paul Mackerras, Ingo Molnar

Em Fri, Aug 07, 2015 at 03:03:02PM +0300, Max Filippov escreveu:
> On Fri, Aug 7, 2015 at 12:13 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> > On Thu, Aug 06, 2015 at 04:46:32PM -0300, Arnaldo Carvalho de Melo wrote:
> >> Em Sat, Jul 18, 2015 at 11:30:10AM +0300, Max Filippov escreveu:
> >> > Xtensa Performance Monitor Module has up to 8 32 bit wide performance
> >> > counters. Each counter may be enabled independently and can count any

> >> Has this gone via PeterZ? I added the tools/ bits in my perf/core
> >> branch, will go in next pull req,

> > I was thinking it would go through the xtensa tree. Looks fine at a
> > quick glance and they can actually test it.
 
> Right, had the same idea.

Ok, when you do that, i.e. get the kernel bits in, the tooling part will
be there, as it was already pulled by Ingo, i.e. its in tip.git and thus
should hit 4.3.

- ARnaldo

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2015-08-07 13:12 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-18  8:30 [PATCH v2 00/13] Support hardware perf counters on xtensa Max Filippov
2015-07-18  8:30 ` [PATCH v2 01/13] xtensa: clean up Kconfig dependencies for custom cores Max Filippov
2015-07-18  8:30 ` [PATCH v2 02/13] xtensa: keep exception/interrupt stack continuous Max Filippov
2015-07-18  8:30 ` [PATCH v2 03/13] xtensa: move oprofile stack tracing to stacktrace.c Max Filippov
2015-07-18  8:30 ` [PATCH v2 04/13] xtensa: select PERF_USE_VMALLOC for cache-aliasing configurations Max Filippov
2015-07-18  8:30 ` [PATCH v2 05/13] xtensa: add profiling IRQ type to xtensa_irq_map Max Filippov
2015-07-18  8:30 ` [PATCH v2 06/13] xtensa: count software page fault perf events Max Filippov
2015-07-18  8:30 ` [PATCH v2 07/13] xtensa: implement counting and sampling " Max Filippov
2015-08-06 19:46   ` Arnaldo Carvalho de Melo
2015-08-07  9:13     ` Peter Zijlstra
2015-08-07 12:03       ` Max Filippov
2015-08-07 13:12         ` Arnaldo Carvalho de Melo
2015-07-18  8:30 ` [PATCH v2 08/13] perf tools: xtensa: add DWARF register names Max Filippov
2015-08-07  7:22   ` [tip:perf/core] perf tools xtensa: Add " tip-bot for Max Filippov
2015-07-18  8:30 ` [PATCH v2 09/13] xtensa: reorganize irq flags tracing Max Filippov
2015-07-18  8:30 ` [PATCH v2 10/13] xtensa: fix kernel register spilling Max Filippov
2015-07-18  8:30 ` [PATCH v2 11/13] xtensa: don't touch EXC_TABLE_FIXUP in _switch_to Max Filippov
2015-07-18  8:30 ` [PATCH v2 12/13] xtensa: implement fake NMI Max Filippov
2015-07-27 14:36   ` Max Filippov
2015-07-27 15:14   ` Peter Zijlstra
2015-07-18  8:30 ` [PATCH v2 13/13] xtensa: drop unused irq_err_count Max Filippov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.