All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] arm64: Store breakpoint single step state into pstate
@ 2016-03-21  8:37 ` He Kuang
  0 siblings, 0 replies; 24+ messages in thread
From: He Kuang @ 2016-03-21  8:37 UTC (permalink / raw)
  To: catalin.marinas, will.deacon, mark.rutland, Dave.Martin,
	hanjun.guo, james.morse, yang.shi, gregkh, marc.zyngier, richard
  Cc: wangnan0, hekuang, linux-arm-kernel, linux-kernel

From: Wang Nan <wangnan0@huawei.com>

Store breakpoint single step state into pstate to fix the
recursion issue on ARM64.

Signed-off-by: Kaixu Xia <xiakaixu@huawei.com>
Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
---
 arch/arm64/include/asm/debug-monitors.h |  9 ++++++
 arch/arm64/include/uapi/asm/ptrace.h    | 10 +++++++
 arch/arm64/kernel/hw_breakpoint.c       | 49 +++++++++++++++++++++++++++++++++
 arch/arm64/kernel/signal.c              |  2 ++
 4 files changed, 70 insertions(+)

diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 279c85b5..b5902e8 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -132,11 +132,20 @@ int kernel_active_single_step(void);
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 int reinstall_suspended_bps(struct pt_regs *regs);
+u64 signal_single_step_enable_bps(void);
+void signal_reinstall_single_step(u64 pstate);
 #else
 static inline int reinstall_suspended_bps(struct pt_regs *regs)
 {
 	return -ENODEV;
 }
+
+static inline u64 signal_single_step_enable_bps(void)
+{
+	return 0;
+}
+
+static inline void signal_reinstall_single_step(u64 pstate) { }
 #endif
 
 int aarch32_break_handler(struct pt_regs *regs);
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 208db3d..8dbfdac 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -52,6 +52,16 @@
 #define PSR_N_BIT	0x80000000
 
 /*
+ * pstat in pt_regs and user_pt_regs are 64 bits. The highest 32 bits
+ * of it can be used by kernel. One user of them is signal handler.
+ */
+#define PSR_LINUX_MASK        0xffffffff00000000UL
+#define PSR_LINUX_HW_BP_SS    0x0000000100000000UL    /* Single step and disable breakpoints */
+#define PSR_LINUX_HW_WP_SS    0x0000000200000000UL    /* Single step and disable watchpoints */
+
+#define PSR_LINUX_HW_SS    (PSR_LINUX_HW_BP_SS | PSR_LINUX_HW_WP_SS)
+
+/*
  * Groups of PSR bits
  */
 #define PSR_f		0xff000000	/* Flags		*/
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index b45c95d..18fd3d3 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -954,3 +954,52 @@ int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
 {
 	return NOTIFY_DONE;
 }
+
+u64 signal_single_step_enable_bps(void)
+{
+	struct debug_info *debug_info = &current->thread.debug;
+	u64 retval = 0;
+
+	if (likely(!debug_info->bps_disabled && !debug_info->wps_disabled))
+		return 0;
+
+	if (debug_info->bps_disabled) {
+		retval |= PSR_LINUX_HW_BP_SS;
+		toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 1);
+		debug_info->bps_disabled = 0;
+	}
+
+	if (debug_info->wps_disabled) {
+		retval |= PSR_LINUX_HW_WP_SS;
+		toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1);
+		debug_info->wps_disabled = 0;
+	}
+
+	if (debug_info->suspended_step)
+		debug_info->suspended_step = 0;
+	else
+		user_disable_single_step(current);
+	return retval;
+}
+
+void signal_reinstall_single_step(u64 pstate)
+{
+	struct debug_info *debug_info = &current->thread.debug;
+
+	if (likely(!(pstate & PSR_LINUX_HW_SS)))
+		return;
+
+	if (pstate & PSR_LINUX_HW_BP_SS) {
+		debug_info->bps_disabled = 1;
+		toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 0);
+	}
+	if (pstate & PSR_LINUX_HW_WP_SS) {
+		debug_info->wps_disabled = 1;
+		toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 0);
+	}
+
+	if (test_thread_flag(TIF_SINGLESTEP))
+		debug_info->suspended_step = 1;
+	else
+		user_enable_single_step(current);
+}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index e18c48c..6cb1e49 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -151,6 +151,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 	if (restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
+	signal_reinstall_single_step(regs->pstate);
 	return regs->regs[0];
 
 badframe:
@@ -292,6 +293,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 	int usig = ksig->sig;
 	int ret;
 
+	regs->pstate |= signal_single_step_enable_bps();
 	/*
 	 * Set up the stack frame
 	 */
-- 
1.8.5.2

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 1/2] arm64: Store breakpoint single step state into pstate
@ 2016-03-21  8:37 ` He Kuang
  0 siblings, 0 replies; 24+ messages in thread
From: He Kuang @ 2016-03-21  8:37 UTC (permalink / raw)
  To: linux-arm-kernel

From: Wang Nan <wangnan0@huawei.com>

Store breakpoint single step state into pstate to fix the
recursion issue on ARM64.

Signed-off-by: Kaixu Xia <xiakaixu@huawei.com>
Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
---
 arch/arm64/include/asm/debug-monitors.h |  9 ++++++
 arch/arm64/include/uapi/asm/ptrace.h    | 10 +++++++
 arch/arm64/kernel/hw_breakpoint.c       | 49 +++++++++++++++++++++++++++++++++
 arch/arm64/kernel/signal.c              |  2 ++
 4 files changed, 70 insertions(+)

diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 279c85b5..b5902e8 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -132,11 +132,20 @@ int kernel_active_single_step(void);
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 int reinstall_suspended_bps(struct pt_regs *regs);
+u64 signal_single_step_enable_bps(void);
+void signal_reinstall_single_step(u64 pstate);
 #else
 static inline int reinstall_suspended_bps(struct pt_regs *regs)
 {
 	return -ENODEV;
 }
+
+static inline u64 signal_single_step_enable_bps(void)
+{
+	return 0;
+}
+
+static inline void signal_reinstall_single_step(u64 pstate) { }
 #endif
 
 int aarch32_break_handler(struct pt_regs *regs);
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 208db3d..8dbfdac 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -52,6 +52,16 @@
 #define PSR_N_BIT	0x80000000
 
 /*
+ * pstat in pt_regs and user_pt_regs are 64 bits. The highest 32 bits
+ * of it can be used by kernel. One user of them is signal handler.
+ */
+#define PSR_LINUX_MASK        0xffffffff00000000UL
+#define PSR_LINUX_HW_BP_SS    0x0000000100000000UL    /* Single step and disable breakpoints */
+#define PSR_LINUX_HW_WP_SS    0x0000000200000000UL    /* Single step and disable watchpoints */
+
+#define PSR_LINUX_HW_SS    (PSR_LINUX_HW_BP_SS | PSR_LINUX_HW_WP_SS)
+
+/*
  * Groups of PSR bits
  */
 #define PSR_f		0xff000000	/* Flags		*/
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index b45c95d..18fd3d3 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -954,3 +954,52 @@ int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
 {
 	return NOTIFY_DONE;
 }
+
+u64 signal_single_step_enable_bps(void)
+{
+	struct debug_info *debug_info = &current->thread.debug;
+	u64 retval = 0;
+
+	if (likely(!debug_info->bps_disabled && !debug_info->wps_disabled))
+		return 0;
+
+	if (debug_info->bps_disabled) {
+		retval |= PSR_LINUX_HW_BP_SS;
+		toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 1);
+		debug_info->bps_disabled = 0;
+	}
+
+	if (debug_info->wps_disabled) {
+		retval |= PSR_LINUX_HW_WP_SS;
+		toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1);
+		debug_info->wps_disabled = 0;
+	}
+
+	if (debug_info->suspended_step)
+		debug_info->suspended_step = 0;
+	else
+		user_disable_single_step(current);
+	return retval;
+}
+
+void signal_reinstall_single_step(u64 pstate)
+{
+	struct debug_info *debug_info = &current->thread.debug;
+
+	if (likely(!(pstate & PSR_LINUX_HW_SS)))
+		return;
+
+	if (pstate & PSR_LINUX_HW_BP_SS) {
+		debug_info->bps_disabled = 1;
+		toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 0);
+	}
+	if (pstate & PSR_LINUX_HW_WP_SS) {
+		debug_info->wps_disabled = 1;
+		toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 0);
+	}
+
+	if (test_thread_flag(TIF_SINGLESTEP))
+		debug_info->suspended_step = 1;
+	else
+		user_enable_single_step(current);
+}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index e18c48c..6cb1e49 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -151,6 +151,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 	if (restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
+	signal_reinstall_single_step(regs->pstate);
 	return regs->regs[0];
 
 badframe:
@@ -292,6 +293,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 	int usig = ksig->sig;
 	int ret;
 
+	regs->pstate |= signal_single_step_enable_bps();
 	/*
 	 * Set up the stack frame
 	 */
-- 
1.8.5.2

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
  2016-03-21  8:37 ` He Kuang
@ 2016-03-21  8:37   ` He Kuang
  -1 siblings, 0 replies; 24+ messages in thread
From: He Kuang @ 2016-03-21  8:37 UTC (permalink / raw)
  To: catalin.marinas, will.deacon, mark.rutland, Dave.Martin,
	hanjun.guo, james.morse, yang.shi, gregkh, marc.zyngier, richard
  Cc: wangnan0, hekuang, linux-arm-kernel, linux-kernel

On arm64, watchpoint handler enables single-step to bypass the next
instruction for not recursive enter. If an irq is triggered right
after the watchpoint, a single-step will be wrongly triggered in irq
handler, which causes the watchpoint address not stepped over and
system hang.

Problem can be found at the following URL:
"http://thread.gmane.org/gmane.linux.kernel/2167918"

This patch pushes watchpoint status and disables single step if it is
triggered in irq handler and restores them back after irq is handled.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: He Kuang <hekuang@huawei.com>
---
 arch/arm64/include/asm/debug-monitors.h |  9 +++++++
 arch/arm64/kernel/debug-monitors.c      | 13 ++++++++++
 arch/arm64/kernel/entry.S               |  6 +++++
 arch/arm64/kernel/hw_breakpoint.c       | 44 +++++++++++++++++++++++++++++++--
 4 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index b5902e8..fe6939e 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -133,7 +133,10 @@ int kernel_active_single_step(void);
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 int reinstall_suspended_bps(struct pt_regs *regs);
 u64 signal_single_step_enable_bps(void);
+u64 irq_single_step_enable_bps(void);
+
 void signal_reinstall_single_step(u64 pstate);
+void irq_reinstall_single_step(struct pt_regs *regs);
 #else
 static inline int reinstall_suspended_bps(struct pt_regs *regs)
 {
@@ -145,7 +148,13 @@ static inline u64 signal_single_step_enable_bps(void)
 	return 0;
 }
 
+static inline u64 irq_single_step_enable_bps(void)
+{
+	return 0;
+}
+
 static inline void signal_reinstall_single_step(u64 pstate) { }
+static inline void irq_reinstall_single_step(struct pt_regs *regs) { }
 #endif
 
 int aarch32_break_handler(struct pt_regs *regs);
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index c536c9e..fab1faa 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -245,9 +245,22 @@ static void send_user_sigtrap(int si_code)
 	force_sig_info(SIGTRAP, &info, current);
 }
 
+extern unsigned long el1_irq_ss_entry[];
+
 static int single_step_handler(unsigned long addr, unsigned int esr,
 			       struct pt_regs *regs)
 {
+	void *pc = (void *)instruction_pointer(regs);
+
+	if (pc == &el1_irq_ss_entry) {
+		struct pt_regs *irq_regs = (struct pt_regs *)(regs->sp);
+
+		irq_regs->pstate |= irq_single_step_enable_bps();
+		kernel_disable_single_step();
+
+		return 0;
+	}
+
 	/*
 	 * If we are stepping a pending breakpoint, call the hw_breakpoint
 	 * handler first.
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 1f7f5a2..836d98e 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -402,12 +402,18 @@ ENDPROC(el1_sync)
 el1_irq:
 	kernel_entry 1
 	enable_dbg
+	.global el1_irq_ss_entry
+el1_irq_ss_entry:
 #ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_off
 #endif
 
 	get_thread_info tsk
 	irq_handler
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	mov	x0, sp
+	bl	irq_reinstall_single_step
+#endif
 
 #ifdef CONFIG_PREEMPT
 	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 18fd3d3..0cf13ee 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -540,11 +540,12 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
  * exception level at the register level.
  * This is used when single-stepping after a breakpoint exception.
  */
-static void toggle_bp_registers(int reg, enum dbg_active_el el, int enable)
+static bool toggle_bp_registers(int reg, enum dbg_active_el el, int enable)
 {
 	int i, max_slots, privilege;
 	u32 ctrl;
 	struct perf_event **slots;
+	bool origin_state = false;
 
 	switch (reg) {
 	case AARCH64_DBG_REG_BCR:
@@ -556,7 +557,7 @@ static void toggle_bp_registers(int reg, enum dbg_active_el el, int enable)
 		max_slots = core_num_wrps;
 		break;
 	default:
-		return;
+		return false;
 	}
 
 	for (i = 0; i < max_slots; ++i) {
@@ -568,12 +569,16 @@ static void toggle_bp_registers(int reg, enum dbg_active_el el, int enable)
 			continue;
 
 		ctrl = read_wb_reg(reg, i);
+		if (ctrl & 0x1)
+			origin_state = true;
 		if (enable)
 			ctrl |= 0x1;
 		else
 			ctrl &= ~0x1;
 		write_wb_reg(reg, i, ctrl);
 	}
+
+	return origin_state;
 }
 
 /*
@@ -982,6 +987,41 @@ u64 signal_single_step_enable_bps(void)
 	return retval;
 }
 
+u64 irq_single_step_enable_bps(void)
+{
+	u64 retval = 0;
+
+	if (!toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 1))
+		retval |= PSR_LINUX_HW_WP_SS;
+
+	if (!toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 1))
+		retval |= PSR_LINUX_HW_BP_SS;
+
+	return retval;
+}
+
+void irq_reinstall_single_step(struct pt_regs *regs)
+{
+	u64 pstate = regs->pstate;
+
+	if (likely(!(regs->pstate & PSR_LINUX_HW_SS)))
+		return;
+
+	if (!user_mode(regs)) {
+		if (pstate & PSR_LINUX_HW_BP_SS)
+			toggle_bp_registers(AARCH64_DBG_REG_BCR,
+					    DBG_ACTIVE_EL1, 0);
+		if (pstate & PSR_LINUX_HW_WP_SS)
+			toggle_bp_registers(AARCH64_DBG_REG_WCR,
+					    DBG_ACTIVE_EL1, 0);
+
+		if (!kernel_active_single_step()) {
+			asm volatile ("msr     daifset, #8\n");
+			kernel_enable_single_step(regs);
+		}
+	}
+}
+
 void signal_reinstall_single_step(u64 pstate)
 {
 	struct debug_info *debug_info = &current->thread.debug;
-- 
1.8.5.2

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
@ 2016-03-21  8:37   ` He Kuang
  0 siblings, 0 replies; 24+ messages in thread
From: He Kuang @ 2016-03-21  8:37 UTC (permalink / raw)
  To: linux-arm-kernel

On arm64, watchpoint handler enables single-step to bypass the next
instruction for not recursive enter. If an irq is triggered right
after the watchpoint, a single-step will be wrongly triggered in irq
handler, which causes the watchpoint address not stepped over and
system hang.

Problem can be found at the following URL:
"http://thread.gmane.org/gmane.linux.kernel/2167918"

This patch pushes watchpoint status and disables single step if it is
triggered in irq handler and restores them back after irq is handled.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: He Kuang <hekuang@huawei.com>
---
 arch/arm64/include/asm/debug-monitors.h |  9 +++++++
 arch/arm64/kernel/debug-monitors.c      | 13 ++++++++++
 arch/arm64/kernel/entry.S               |  6 +++++
 arch/arm64/kernel/hw_breakpoint.c       | 44 +++++++++++++++++++++++++++++++--
 4 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index b5902e8..fe6939e 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -133,7 +133,10 @@ int kernel_active_single_step(void);
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 int reinstall_suspended_bps(struct pt_regs *regs);
 u64 signal_single_step_enable_bps(void);
+u64 irq_single_step_enable_bps(void);
+
 void signal_reinstall_single_step(u64 pstate);
+void irq_reinstall_single_step(struct pt_regs *regs);
 #else
 static inline int reinstall_suspended_bps(struct pt_regs *regs)
 {
@@ -145,7 +148,13 @@ static inline u64 signal_single_step_enable_bps(void)
 	return 0;
 }
 
+static inline u64 irq_single_step_enable_bps(void)
+{
+	return 0;
+}
+
 static inline void signal_reinstall_single_step(u64 pstate) { }
+static inline void irq_reinstall_single_step(struct pt_regs *regs) { }
 #endif
 
 int aarch32_break_handler(struct pt_regs *regs);
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index c536c9e..fab1faa 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -245,9 +245,22 @@ static void send_user_sigtrap(int si_code)
 	force_sig_info(SIGTRAP, &info, current);
 }
 
+extern unsigned long el1_irq_ss_entry[];
+
 static int single_step_handler(unsigned long addr, unsigned int esr,
 			       struct pt_regs *regs)
 {
+	void *pc = (void *)instruction_pointer(regs);
+
+	if (pc == &el1_irq_ss_entry) {
+		struct pt_regs *irq_regs = (struct pt_regs *)(regs->sp);
+
+		irq_regs->pstate |= irq_single_step_enable_bps();
+		kernel_disable_single_step();
+
+		return 0;
+	}
+
 	/*
 	 * If we are stepping a pending breakpoint, call the hw_breakpoint
 	 * handler first.
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 1f7f5a2..836d98e 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -402,12 +402,18 @@ ENDPROC(el1_sync)
 el1_irq:
 	kernel_entry 1
 	enable_dbg
+	.global el1_irq_ss_entry
+el1_irq_ss_entry:
 #ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_off
 #endif
 
 	get_thread_info tsk
 	irq_handler
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	mov	x0, sp
+	bl	irq_reinstall_single_step
+#endif
 
 #ifdef CONFIG_PREEMPT
 	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 18fd3d3..0cf13ee 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -540,11 +540,12 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
  * exception level at the register level.
  * This is used when single-stepping after a breakpoint exception.
  */
-static void toggle_bp_registers(int reg, enum dbg_active_el el, int enable)
+static bool toggle_bp_registers(int reg, enum dbg_active_el el, int enable)
 {
 	int i, max_slots, privilege;
 	u32 ctrl;
 	struct perf_event **slots;
+	bool origin_state = false;
 
 	switch (reg) {
 	case AARCH64_DBG_REG_BCR:
@@ -556,7 +557,7 @@ static void toggle_bp_registers(int reg, enum dbg_active_el el, int enable)
 		max_slots = core_num_wrps;
 		break;
 	default:
-		return;
+		return false;
 	}
 
 	for (i = 0; i < max_slots; ++i) {
@@ -568,12 +569,16 @@ static void toggle_bp_registers(int reg, enum dbg_active_el el, int enable)
 			continue;
 
 		ctrl = read_wb_reg(reg, i);
+		if (ctrl & 0x1)
+			origin_state = true;
 		if (enable)
 			ctrl |= 0x1;
 		else
 			ctrl &= ~0x1;
 		write_wb_reg(reg, i, ctrl);
 	}
+
+	return origin_state;
 }
 
 /*
@@ -982,6 +987,41 @@ u64 signal_single_step_enable_bps(void)
 	return retval;
 }
 
+u64 irq_single_step_enable_bps(void)
+{
+	u64 retval = 0;
+
+	if (!toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 1))
+		retval |= PSR_LINUX_HW_WP_SS;
+
+	if (!toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 1))
+		retval |= PSR_LINUX_HW_BP_SS;
+
+	return retval;
+}
+
+void irq_reinstall_single_step(struct pt_regs *regs)
+{
+	u64 pstate = regs->pstate;
+
+	if (likely(!(regs->pstate & PSR_LINUX_HW_SS)))
+		return;
+
+	if (!user_mode(regs)) {
+		if (pstate & PSR_LINUX_HW_BP_SS)
+			toggle_bp_registers(AARCH64_DBG_REG_BCR,
+					    DBG_ACTIVE_EL1, 0);
+		if (pstate & PSR_LINUX_HW_WP_SS)
+			toggle_bp_registers(AARCH64_DBG_REG_WCR,
+					    DBG_ACTIVE_EL1, 0);
+
+		if (!kernel_active_single_step()) {
+			asm volatile ("msr     daifset, #8\n");
+			kernel_enable_single_step(regs);
+		}
+	}
+}
+
 void signal_reinstall_single_step(u64 pstate)
 {
 	struct debug_info *debug_info = &current->thread.debug;
-- 
1.8.5.2

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
  2016-03-21  8:37   ` He Kuang
@ 2016-03-21 10:24     ` Pratyush Anand
  -1 siblings, 0 replies; 24+ messages in thread
From: Pratyush Anand @ 2016-03-21 10:24 UTC (permalink / raw)
  To: He Kuang
  Cc: catalin.marinas, will.deacon, mark.rutland, Dave.Martin,
	hanjun.guo, james.morse, yang.shi, gregkh, marc.zyngier, richard,
	wangnan0, linux-arm-kernel, linux-kernel

On 21/03/2016:08:37:50 AM, He Kuang wrote:
> On arm64, watchpoint handler enables single-step to bypass the next
> instruction for not recursive enter. If an irq is triggered right
> after the watchpoint, a single-step will be wrongly triggered in irq
> handler, which causes the watchpoint address not stepped over and
> system hang.

Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
not been sent for review. Your test result will be helpful.

~Pratyush

[1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
@ 2016-03-21 10:24     ` Pratyush Anand
  0 siblings, 0 replies; 24+ messages in thread
From: Pratyush Anand @ 2016-03-21 10:24 UTC (permalink / raw)
  To: linux-arm-kernel

On 21/03/2016:08:37:50 AM, He Kuang wrote:
> On arm64, watchpoint handler enables single-step to bypass the next
> instruction for not recursive enter. If an irq is triggered right
> after the watchpoint, a single-step will be wrongly triggered in irq
> handler, which causes the watchpoint address not stepped over and
> system hang.

Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
not been sent for review. Your test result will be helpful.

~Pratyush

[1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
  2016-03-21 10:24     ` Pratyush Anand
@ 2016-03-21 10:38       ` Wangnan (F)
  -1 siblings, 0 replies; 24+ messages in thread
From: Wangnan (F) @ 2016-03-21 10:38 UTC (permalink / raw)
  To: Pratyush Anand, He Kuang
  Cc: catalin.marinas, will.deacon, mark.rutland, Dave.Martin,
	hanjun.guo, james.morse, yang.shi, gregkh, marc.zyngier, richard,
	linux-arm-kernel, linux-kernel



On 2016/3/21 18:24, Pratyush Anand wrote:
> On 21/03/2016:08:37:50 AM, He Kuang wrote:
>> On arm64, watchpoint handler enables single-step to bypass the next
>> instruction for not recursive enter. If an irq is triggered right
>> after the watchpoint, a single-step will be wrongly triggered in irq
>> handler, which causes the watchpoint address not stepped over and
>> system hang.
> Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
> not been sent for review. Your test result will be helpful.
>
> ~Pratyush
>
> [1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652

Could you please provide a test program for your case so we can test
it on our devices? I guess setting breakpoint on a "copy_from_user()"
accessing an invalid address can trigger this problem?

Thank you.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
@ 2016-03-21 10:38       ` Wangnan (F)
  0 siblings, 0 replies; 24+ messages in thread
From: Wangnan (F) @ 2016-03-21 10:38 UTC (permalink / raw)
  To: linux-arm-kernel



On 2016/3/21 18:24, Pratyush Anand wrote:
> On 21/03/2016:08:37:50 AM, He Kuang wrote:
>> On arm64, watchpoint handler enables single-step to bypass the next
>> instruction for not recursive enter. If an irq is triggered right
>> after the watchpoint, a single-step will be wrongly triggered in irq
>> handler, which causes the watchpoint address not stepped over and
>> system hang.
> Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
> not been sent for review. Your test result will be helpful.
>
> ~Pratyush
>
> [1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652

Could you please provide a test program for your case so we can test
it on our devices? I guess setting breakpoint on a "copy_from_user()"
accessing an invalid address can trigger this problem?

Thank you.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
  2016-03-21 10:38       ` Wangnan (F)
@ 2016-03-21 11:05         ` Pratyush Anand
  -1 siblings, 0 replies; 24+ messages in thread
From: Pratyush Anand @ 2016-03-21 11:05 UTC (permalink / raw)
  To: Wangnan (F)
  Cc: He Kuang, catalin.marinas, will.deacon, mark.rutland,
	Dave.Martin, hanjun.guo, james.morse, yang.shi, gregkh,
	marc.zyngier, richard, linux-arm-kernel, linux-kernel

On 21/03/2016:06:38:31 PM, Wangnan (F) wrote:
> 
> 
> On 2016/3/21 18:24, Pratyush Anand wrote:
> >On 21/03/2016:08:37:50 AM, He Kuang wrote:
> >>On arm64, watchpoint handler enables single-step to bypass the next
> >>instruction for not recursive enter. If an irq is triggered right
> >>after the watchpoint, a single-step will be wrongly triggered in irq
> >>handler, which causes the watchpoint address not stepped over and
> >>system hang.
> >Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
> >not been sent for review. Your test result will be helpful.
> >
> >~Pratyush
> >
> >[1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652
> 
> Could you please provide a test program for your case so we can test
> it on our devices? I guess setting breakpoint on a "copy_from_user()"
> accessing an invalid address can trigger this problem?

My test case was to test kprobing of copy_from_user. I used kprobe64-v11.

I reverted "patch v11 3/9" and used following script  for __copy_to_user(),
which instruments kprobe at every instruction of a given function. I can easily
see "Unexpected kernel single-step exception at EL1".
-------------------------------------------------------------
#kprobe_at_function_all_inst.sh  
-------------------------------------------------------------
#! /bin/sh
#$1: function name
echo 0 > /sys/kernel/debug/tracing/events/kprobes/enable
echo >  /sys/kernel/debug/tracing/trace
echo > /sys/kernel/debug/tracing/kprobe_events
func=$(cat /proc/kallsyms | grep -A 1 -w $1 | cut -d ' ' -f 1)
func_start=$((0x$(echo $func | cut -d ' ' -f 1)))
func_end=$((0x$(echo $func | cut -d ' ' -f 2)))
offset=0
while [ $(($func_start + $offset)) -lt $func_end ]
  do
          printf -v cmd "p:probe_%x $1+0x%x" $offset $offset
          echo $cmd >> /sys/kernel/debug/tracing/kprobe_events
          offset=$((offset + 4))
  done
echo 1 > /sys/kernel/debug/tracing/events/kprobes/enable
-------------------------------------------------------------

# ./kprobe_at_function_all_inst.sh __copy_to_user

Now, if I apply the patch which I referred in [1], I can no longer see any
"Unexpected kernel single-step exception at EL1" with above test script.

If I understood correctly, then the problem you described in your patch is that
an irq (el1_irq) is raised when watchpoint was being handled by kernel(specially
before kernel could call reinstall_suspended_bps() to disable single stepping).
Since, I disable single stepping for all the el1 exception mode, if
kernel_enable_single_step() had been called but kernel_disable_single_step() had
n't been called. So, your test case could be another good test for my
patch.

~Pratyush

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
@ 2016-03-21 11:05         ` Pratyush Anand
  0 siblings, 0 replies; 24+ messages in thread
From: Pratyush Anand @ 2016-03-21 11:05 UTC (permalink / raw)
  To: linux-arm-kernel

On 21/03/2016:06:38:31 PM, Wangnan (F) wrote:
> 
> 
> On 2016/3/21 18:24, Pratyush Anand wrote:
> >On 21/03/2016:08:37:50 AM, He Kuang wrote:
> >>On arm64, watchpoint handler enables single-step to bypass the next
> >>instruction for not recursive enter. If an irq is triggered right
> >>after the watchpoint, a single-step will be wrongly triggered in irq
> >>handler, which causes the watchpoint address not stepped over and
> >>system hang.
> >Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
> >not been sent for review. Your test result will be helpful.
> >
> >~Pratyush
> >
> >[1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652
> 
> Could you please provide a test program for your case so we can test
> it on our devices? I guess setting breakpoint on a "copy_from_user()"
> accessing an invalid address can trigger this problem?

My test case was to test kprobing of copy_from_user. I used kprobe64-v11.

I reverted "patch v11 3/9" and used following script  for __copy_to_user(),
which instruments kprobe at every instruction of a given function. I can easily
see "Unexpected kernel single-step exception at EL1".
-------------------------------------------------------------
#kprobe_at_function_all_inst.sh  
-------------------------------------------------------------
#! /bin/sh
#$1: function name
echo 0 > /sys/kernel/debug/tracing/events/kprobes/enable
echo >  /sys/kernel/debug/tracing/trace
echo > /sys/kernel/debug/tracing/kprobe_events
func=$(cat /proc/kallsyms | grep -A 1 -w $1 | cut -d ' ' -f 1)
func_start=$((0x$(echo $func | cut -d ' ' -f 1)))
func_end=$((0x$(echo $func | cut -d ' ' -f 2)))
offset=0
while [ $(($func_start + $offset)) -lt $func_end ]
  do
          printf -v cmd "p:probe_%x $1+0x%x" $offset $offset
          echo $cmd >> /sys/kernel/debug/tracing/kprobe_events
          offset=$((offset + 4))
  done
echo 1 > /sys/kernel/debug/tracing/events/kprobes/enable
-------------------------------------------------------------

# ./kprobe_at_function_all_inst.sh __copy_to_user

Now, if I apply the patch which I referred in [1], I can no longer see any
"Unexpected kernel single-step exception at EL1" with above test script.

If I understood correctly, then the problem you described in your patch is that
an irq (el1_irq) is raised when watchpoint was being handled by kernel(specially
before kernel could call reinstall_suspended_bps() to disable single stepping).
Since, I disable single stepping for all the el1 exception mode, if
kernel_enable_single_step() had been called but kernel_disable_single_step() had
n't been called. So, your test case could be another good test for my
patch.

~Pratyush

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] arm64: Store breakpoint single step state into pstate
  2016-03-21  8:37 ` He Kuang
@ 2016-03-21 16:08   ` Will Deacon
  -1 siblings, 0 replies; 24+ messages in thread
From: Will Deacon @ 2016-03-21 16:08 UTC (permalink / raw)
  To: He Kuang
  Cc: catalin.marinas, mark.rutland, Dave.Martin, hanjun.guo,
	james.morse, yang.shi, gregkh, marc.zyngier, richard, wangnan0,
	linux-arm-kernel, linux-kernel

On Mon, Mar 21, 2016 at 08:37:49AM +0000, He Kuang wrote:
> From: Wang Nan <wangnan0@huawei.com>
> 
> Store breakpoint single step state into pstate to fix the
> recursion issue on ARM64.
> 
> Signed-off-by: Kaixu Xia <xiakaixu@huawei.com>
> Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
> ---
>  arch/arm64/include/asm/debug-monitors.h |  9 ++++++
>  arch/arm64/include/uapi/asm/ptrace.h    | 10 +++++++
>  arch/arm64/kernel/hw_breakpoint.c       | 49 +++++++++++++++++++++++++++++++++
>  arch/arm64/kernel/signal.c              |  2 ++
>  4 files changed, 70 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
> index 279c85b5..b5902e8 100644
> --- a/arch/arm64/include/asm/debug-monitors.h
> +++ b/arch/arm64/include/asm/debug-monitors.h
> @@ -132,11 +132,20 @@ int kernel_active_single_step(void);
>  
>  #ifdef CONFIG_HAVE_HW_BREAKPOINT
>  int reinstall_suspended_bps(struct pt_regs *regs);
> +u64 signal_single_step_enable_bps(void);
> +void signal_reinstall_single_step(u64 pstate);
>  #else
>  static inline int reinstall_suspended_bps(struct pt_regs *regs)
>  {
>  	return -ENODEV;
>  }
> +
> +static inline u64 signal_single_step_enable_bps(void)
> +{
> +	return 0;
> +}
> +
> +static inline void signal_reinstall_single_step(u64 pstate) { }
>  #endif
>  
>  int aarch32_break_handler(struct pt_regs *regs);
> diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
> index 208db3d..8dbfdac 100644
> --- a/arch/arm64/include/uapi/asm/ptrace.h
> +++ b/arch/arm64/include/uapi/asm/ptrace.h
> @@ -52,6 +52,16 @@
>  #define PSR_N_BIT	0x80000000
>  
>  /*
> + * pstat in pt_regs and user_pt_regs are 64 bits. The highest 32 bits
> + * of it can be used by kernel. One user of them is signal handler.
> + */
> +#define PSR_LINUX_MASK        0xffffffff00000000UL
> +#define PSR_LINUX_HW_BP_SS    0x0000000100000000UL    /* Single step and disable breakpoints */
> +#define PSR_LINUX_HW_WP_SS    0x0000000200000000UL    /* Single step and disable watchpoints */
> +
> +#define PSR_LINUX_HW_SS    (PSR_LINUX_HW_BP_SS | PSR_LINUX_HW_WP_SS)

As I've said before, I'm not at all keen on this approach. We're changing
a UAPI header to include magic numbers that may or may not conflict with
the architecture in future in order to fix a problem that doesn't exist
outside of a contrived test case.

I'd much rather place a restriction on .wakeup_events of the hw_breakpoint
and simply refuse to initialise things in a way that leads to problems down
the line. Ptrace and GDB are the primary users of this interface and I'm not
willing to risk breaking them with these sorts of invasive changes.

Will

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/2] arm64: Store breakpoint single step state into pstate
@ 2016-03-21 16:08   ` Will Deacon
  0 siblings, 0 replies; 24+ messages in thread
From: Will Deacon @ 2016-03-21 16:08 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Mar 21, 2016 at 08:37:49AM +0000, He Kuang wrote:
> From: Wang Nan <wangnan0@huawei.com>
> 
> Store breakpoint single step state into pstate to fix the
> recursion issue on ARM64.
> 
> Signed-off-by: Kaixu Xia <xiakaixu@huawei.com>
> Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
> ---
>  arch/arm64/include/asm/debug-monitors.h |  9 ++++++
>  arch/arm64/include/uapi/asm/ptrace.h    | 10 +++++++
>  arch/arm64/kernel/hw_breakpoint.c       | 49 +++++++++++++++++++++++++++++++++
>  arch/arm64/kernel/signal.c              |  2 ++
>  4 files changed, 70 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
> index 279c85b5..b5902e8 100644
> --- a/arch/arm64/include/asm/debug-monitors.h
> +++ b/arch/arm64/include/asm/debug-monitors.h
> @@ -132,11 +132,20 @@ int kernel_active_single_step(void);
>  
>  #ifdef CONFIG_HAVE_HW_BREAKPOINT
>  int reinstall_suspended_bps(struct pt_regs *regs);
> +u64 signal_single_step_enable_bps(void);
> +void signal_reinstall_single_step(u64 pstate);
>  #else
>  static inline int reinstall_suspended_bps(struct pt_regs *regs)
>  {
>  	return -ENODEV;
>  }
> +
> +static inline u64 signal_single_step_enable_bps(void)
> +{
> +	return 0;
> +}
> +
> +static inline void signal_reinstall_single_step(u64 pstate) { }
>  #endif
>  
>  int aarch32_break_handler(struct pt_regs *regs);
> diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
> index 208db3d..8dbfdac 100644
> --- a/arch/arm64/include/uapi/asm/ptrace.h
> +++ b/arch/arm64/include/uapi/asm/ptrace.h
> @@ -52,6 +52,16 @@
>  #define PSR_N_BIT	0x80000000
>  
>  /*
> + * pstat in pt_regs and user_pt_regs are 64 bits. The highest 32 bits
> + * of it can be used by kernel. One user of them is signal handler.
> + */
> +#define PSR_LINUX_MASK        0xffffffff00000000UL
> +#define PSR_LINUX_HW_BP_SS    0x0000000100000000UL    /* Single step and disable breakpoints */
> +#define PSR_LINUX_HW_WP_SS    0x0000000200000000UL    /* Single step and disable watchpoints */
> +
> +#define PSR_LINUX_HW_SS    (PSR_LINUX_HW_BP_SS | PSR_LINUX_HW_WP_SS)

As I've said before, I'm not at all keen on this approach. We're changing
a UAPI header to include magic numbers that may or may not conflict with
the architecture in future in order to fix a problem that doesn't exist
outside of a contrived test case.

I'd much rather place a restriction on .wakeup_events of the hw_breakpoint
and simply refuse to initialise things in a way that leads to problems down
the line. Ptrace and GDB are the primary users of this interface and I'm not
willing to risk breaking them with these sorts of invasive changes.

Will

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
  2016-03-21 10:24     ` Pratyush Anand
@ 2016-03-31 12:45       ` Li Bin
  -1 siblings, 0 replies; 24+ messages in thread
From: Li Bin @ 2016-03-31 12:45 UTC (permalink / raw)
  To: Pratyush Anand, He Kuang
  Cc: mark.rutland, yang.shi, wangnan0, marc.zyngier, catalin.marinas,
	will.deacon, linux-kernel, richard, james.morse, hanjun.guo,
	gregkh, Dave.Martin, linux-arm-kernel, Hanjun Guo, Ding Tianhong

Hi Pratyush,

on 2016/3/21 18:24, Pratyush Anand wrote:
> On 21/03/2016:08:37:50 AM, He Kuang wrote:
>> On arm64, watchpoint handler enables single-step to bypass the next
>> instruction for not recursive enter. If an irq is triggered right
>> after the watchpoint, a single-step will be wrongly triggered in irq
>> handler, which causes the watchpoint address not stepped over and
>> system hang.
> 
> Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
> not been sent for review. Your test result will be helpful.
> 
> ~Pratyush
> 
> [1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652

This patch did not consider that, when excetpion return, the singlestep flag
should be restored, otherwise the right singlestep will not triggered.
Right?

Thanks,
Li Bin

> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 
> .
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
@ 2016-03-31 12:45       ` Li Bin
  0 siblings, 0 replies; 24+ messages in thread
From: Li Bin @ 2016-03-31 12:45 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Pratyush,

on 2016/3/21 18:24, Pratyush Anand wrote:
> On 21/03/2016:08:37:50 AM, He Kuang wrote:
>> On arm64, watchpoint handler enables single-step to bypass the next
>> instruction for not recursive enter. If an irq is triggered right
>> after the watchpoint, a single-step will be wrongly triggered in irq
>> handler, which causes the watchpoint address not stepped over and
>> system hang.
> 
> Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
> not been sent for review. Your test result will be helpful.
> 
> ~Pratyush
> 
> [1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652

This patch did not consider that, when excetpion return, the singlestep flag
should be restored, otherwise the right singlestep will not triggered.
Right?

Thanks,
Li Bin

> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 
> .
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
  2016-03-31 12:45       ` Li Bin
@ 2016-04-04  5:17         ` Pratyush Anand
  -1 siblings, 0 replies; 24+ messages in thread
From: Pratyush Anand @ 2016-04-04  5:17 UTC (permalink / raw)
  To: Li Bin
  Cc: He Kuang, mark.rutland, yang.shi, wangnan0, marc.zyngier,
	catalin.marinas, will.deacon, linux-kernel, richard, james.morse,
	hanjun.guo, gregkh, Dave.Martin, linux-arm-kernel, Hanjun Guo,
	Ding Tianhong

Hi Li,

On 31/03/2016:08:45:05 PM, Li Bin wrote:
> Hi Pratyush,
> 
> on 2016/3/21 18:24, Pratyush Anand wrote:
> > On 21/03/2016:08:37:50 AM, He Kuang wrote:
> >> On arm64, watchpoint handler enables single-step to bypass the next
> >> instruction for not recursive enter. If an irq is triggered right
> >> after the watchpoint, a single-step will be wrongly triggered in irq
> >> handler, which causes the watchpoint address not stepped over and
> >> system hang.
> > 
> > Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
> > not been sent for review. Your test result will be helpful.
> > 
> > ~Pratyush
> > 
> > [1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652
> 
> This patch did not consider that, when excetpion return, the singlestep flag
> should be restored, otherwise the right singlestep will not triggered.
> Right?

Yes, you are right, and there are other problems as well. Will Deacon pointed
out [1] that kernel debugging is per-cpu rather than per-task. So, I did thought
of a per-cpu implementation by introducing a new element "flags" in struct
pt_regs. But even with that I see issues. For example:
- While executing single step instruction, we get a data abort
- In the kernel_entry of data abort we disable single stepping based on "flags"
  bit field
- While handling data abort we receive anther interrupt, so we are again in
  kernel_entry (for el1_irq). Single stepping will be disabled again (although
  it does not matter).

Now the issue is that, what condition should be verified in kernel_exit for
enabling single step again? In the above scenario, kernel_exit for el1_irq
should not enable single stepping, but how to prevent that elegantly?

[1] http://www.spinics.net/lists/arm-kernel/msg491844.html

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
@ 2016-04-04  5:17         ` Pratyush Anand
  0 siblings, 0 replies; 24+ messages in thread
From: Pratyush Anand @ 2016-04-04  5:17 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Li,

On 31/03/2016:08:45:05 PM, Li Bin wrote:
> Hi Pratyush,
> 
> on 2016/3/21 18:24, Pratyush Anand wrote:
> > On 21/03/2016:08:37:50 AM, He Kuang wrote:
> >> On arm64, watchpoint handler enables single-step to bypass the next
> >> instruction for not recursive enter. If an irq is triggered right
> >> after the watchpoint, a single-step will be wrongly triggered in irq
> >> handler, which causes the watchpoint address not stepped over and
> >> system hang.
> > 
> > Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
> > not been sent for review. Your test result will be helpful.
> > 
> > ~Pratyush
> > 
> > [1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652
> 
> This patch did not consider that, when excetpion return, the singlestep flag
> should be restored, otherwise the right singlestep will not triggered.
> Right?

Yes, you are right, and there are other problems as well. Will Deacon pointed
out [1] that kernel debugging is per-cpu rather than per-task. So, I did thought
of a per-cpu implementation by introducing a new element "flags" in struct
pt_regs. But even with that I see issues. For example:
- While executing single step instruction, we get a data abort
- In the kernel_entry of data abort we disable single stepping based on "flags"
  bit field
- While handling data abort we receive anther interrupt, so we are again in
  kernel_entry (for el1_irq). Single stepping will be disabled again (although
  it does not matter).

Now the issue is that, what condition should be verified in kernel_exit for
enabling single step again? In the above scenario, kernel_exit for el1_irq
should not enable single stepping, but how to prevent that elegantly?

[1] http://www.spinics.net/lists/arm-kernel/msg491844.html

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
  2016-04-04  5:17         ` Pratyush Anand
@ 2016-04-07 11:34           ` Li Bin
  -1 siblings, 0 replies; 24+ messages in thread
From: Li Bin @ 2016-04-07 11:34 UTC (permalink / raw)
  To: Pratyush Anand
  Cc: He Kuang, mark.rutland, yang.shi, wangnan0, marc.zyngier,
	catalin.marinas, will.deacon, linux-kernel, richard, james.morse,
	hanjun.guo, gregkh, Dave.Martin, linux-arm-kernel, Hanjun Guo,
	Ding Tianhong

Hi Pratyush,

on 2016/4/4 13:17, Pratyush Anand wrote:
> Hi Li,
> 
> On 31/03/2016:08:45:05 PM, Li Bin wrote:
>> Hi Pratyush,
>>
>> on 2016/3/21 18:24, Pratyush Anand wrote:
>>> On 21/03/2016:08:37:50 AM, He Kuang wrote:
>>>> On arm64, watchpoint handler enables single-step to bypass the next
>>>> instruction for not recursive enter. If an irq is triggered right
>>>> after the watchpoint, a single-step will be wrongly triggered in irq
>>>> handler, which causes the watchpoint address not stepped over and
>>>> system hang.
>>>
>>> Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
>>> not been sent for review. Your test result will be helpful.
>>>
>>> ~Pratyush
>>>
>>> [1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652
>>
>> This patch did not consider that, when excetpion return, the singlestep flag
>> should be restored, otherwise the right singlestep will not triggered.
>> Right?
> 
> Yes, you are right, and there are other problems as well. Will Deacon pointed
> out [1] that kernel debugging is per-cpu rather than per-task. So, I did thought
> of a per-cpu implementation by introducing a new element "flags" in struct
> pt_regs. But even with that I see issues. For example:
> - While executing single step instruction, we get a data abort
> - In the kernel_entry of data abort we disable single stepping based on "flags"
>   bit field
> - While handling data abort we receive anther interrupt, so we are again in
>   kernel_entry (for el1_irq). Single stepping will be disabled again (although
>   it does not matter).
> 
> Now the issue is that, what condition should be verified in kernel_exit for
> enabling single step again? In the above scenario, kernel_exit for el1_irq
> should not enable single stepping, but how to prevent that elegantly?

The condition for kernel_entry to disable the single step is that MDSCR_EL1.SS
has been set. And only when the corresponding kernel_entry has disabled the single
step, the kernel_exit should enable it, but the kernel_exit of single-step exception
should be handled specially, that when disable single step in single-step exception
handler, flag of pt_regs stored in stack should be cleard to prevent to be re-enabled
by kernel_exit.

Thanks,
Li Bin

---
 arch/arm64/include/asm/assembler.h      |   11 +++++++++++
 arch/arm64/include/asm/debug-monitors.h |    2 +-
 arch/arm64/include/asm/ptrace.h         |    2 ++
 arch/arm64/kernel/asm-offsets.c         |    1 +
 arch/arm64/kernel/debug-monitors.c      |    3 ++-
 arch/arm64/kernel/entry.S               |   18 ++++++++++++++++++
 arch/arm64/kernel/hw_breakpoint.c       |    2 +-
 arch/arm64/kernel/kgdb.c                |    2 +-
 8 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 70f7b9e..56a4335 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -60,6 +60,17 @@
 	msr	daifclr, #8
 	.endm

+	.macro	disable_step, orig
+	bic \orig, \orig, #1
+	msr	mdscr_el1, \orig
+	.endm
+
+	.macro	enable_step, orig
+	disable_dbg
+	orr \orig, \orig, #1
+	msr	mdscr_el1, \orig
+	.endm
+
 	.macro	disable_step_tsk, flgs, tmp
 	tbz	\flgs, #TIF_SINGLESTEP, 9990f
 	mrs	\tmp, mdscr_el1
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 2fcb9b7..50f114c 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -115,7 +115,7 @@ void user_rewind_single_step(struct task_struct *task);
 void user_fastforward_single_step(struct task_struct *task);

 void kernel_enable_single_step(struct pt_regs *regs);
-void kernel_disable_single_step(void);
+void kernel_disable_single_step(struct pt_regs *regs);
 int kernel_active_single_step(void);

 #ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index a307eb6..da00cd8 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -113,6 +113,8 @@ struct pt_regs {
 			u64 sp;
 			u64 pc;
 			u64 pstate;
+			u64 flags;
+			u64 padding;
 		};
 	};
 	u64 orig_x0;
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 3ae6b31..7936ba9 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -56,6 +56,7 @@ int main(void)
   DEFINE(S_COMPAT_SP,		offsetof(struct pt_regs, compat_sp));
 #endif
   DEFINE(S_PSTATE,		offsetof(struct pt_regs, pstate));
+  DEFINE(S_FLAGS,		offsetof(struct pt_regs, flags));
   DEFINE(S_PC,			offsetof(struct pt_regs, pc));
   DEFINE(S_ORIG_X0,		offsetof(struct pt_regs, orig_x0));
   DEFINE(S_SYSCALLNO,		offsetof(struct pt_regs, syscallno));
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index c45f296..9d2bdbf 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -403,9 +403,10 @@ void kernel_enable_single_step(struct pt_regs *regs)
 	enable_debug_monitors(DBG_ACTIVE_EL1);
 }

-void kernel_disable_single_step(void)
+void kernel_disable_single_step(struct pt_regs *regs)
 {
 	WARN_ON(!irqs_disabled());
+	regs->flags = 0;
 	mdscr_write(mdscr_read() & ~DBG_MDSCR_SS);
 	disable_debug_monitors(DBG_ACTIVE_EL1);
 }
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 12e8d2b..753bef2 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -87,6 +87,15 @@
 	stp	x26, x27, [sp, #16 * 13]
 	stp	x28, x29, [sp, #16 * 14]

+	.if \el == 1
+	mrs     x19, mdscr_el1
+	and     x20, x19, #1
+	str     x20, [sp, #S_FLAGS]
+	tbz     x20, #0, 1f
+	disable_step x19
+1:
+	.endif
+
 	.if	\el == 0
 	mrs	x21, sp_el0
 	mov	tsk, sp
@@ -154,6 +163,15 @@ alternative_endif
 	.endif
 	msr	elr_el1, x21			// set up the return data
 	msr	spsr_el1, x22
+
+	.if	\el == 1
+	ldr	x19, [sp, #S_FLAGS]
+	tbz	x19, #0, 1f
+	mrs	x19, mdscr_el1
+	enable_step	x19
+1:
+	.endif
+
 	ldp	x0, x1, [sp, #16 * 0]
 	ldp	x2, x3, [sp, #16 * 1]
 	ldp	x4, x5, [sp, #16 * 2]
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index b45c95d..6afbb80 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -802,7 +802,7 @@ int reinstall_suspended_bps(struct pt_regs *regs)
 			toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1);

 		if (*kernel_step != ARM_KERNEL_STEP_SUSPEND) {
-			kernel_disable_single_step();
+			kernel_disable_single_step(regs);
 			handled_exception = 1;
 		} else {
 			handled_exception = 0;
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index b67531a..f92c5fb 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -183,7 +183,7 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
 		 * Received continue command, disable single step
 		 */
 		if (kernel_active_single_step())
-			kernel_disable_single_step();
+			kernel_disable_single_step(linux_regs);

 		err = 0;
 		break;
-- 
1.7.1

> 
> [1] http://www.spinics.net/lists/arm-kernel/msg491844.html
> 
> .
> 

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
@ 2016-04-07 11:34           ` Li Bin
  0 siblings, 0 replies; 24+ messages in thread
From: Li Bin @ 2016-04-07 11:34 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Pratyush,

on 2016/4/4 13:17, Pratyush Anand wrote:
> Hi Li,
> 
> On 31/03/2016:08:45:05 PM, Li Bin wrote:
>> Hi Pratyush,
>>
>> on 2016/3/21 18:24, Pratyush Anand wrote:
>>> On 21/03/2016:08:37:50 AM, He Kuang wrote:
>>>> On arm64, watchpoint handler enables single-step to bypass the next
>>>> instruction for not recursive enter. If an irq is triggered right
>>>> after the watchpoint, a single-step will be wrongly triggered in irq
>>>> handler, which causes the watchpoint address not stepped over and
>>>> system hang.
>>>
>>> Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
>>> not been sent for review. Your test result will be helpful.
>>>
>>> ~Pratyush
>>>
>>> [1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652
>>
>> This patch did not consider that, when excetpion return, the singlestep flag
>> should be restored, otherwise the right singlestep will not triggered.
>> Right?
> 
> Yes, you are right, and there are other problems as well. Will Deacon pointed
> out [1] that kernel debugging is per-cpu rather than per-task. So, I did thought
> of a per-cpu implementation by introducing a new element "flags" in struct
> pt_regs. But even with that I see issues. For example:
> - While executing single step instruction, we get a data abort
> - In the kernel_entry of data abort we disable single stepping based on "flags"
>   bit field
> - While handling data abort we receive anther interrupt, so we are again in
>   kernel_entry (for el1_irq). Single stepping will be disabled again (although
>   it does not matter).
> 
> Now the issue is that, what condition should be verified in kernel_exit for
> enabling single step again? In the above scenario, kernel_exit for el1_irq
> should not enable single stepping, but how to prevent that elegantly?

The condition for kernel_entry to disable the single step is that MDSCR_EL1.SS
has been set. And only when the corresponding kernel_entry has disabled the single
step, the kernel_exit should enable it, but the kernel_exit of single-step exception
should be handled specially, that when disable single step in single-step exception
handler, flag of pt_regs stored in stack should be cleard to prevent to be re-enabled
by kernel_exit.

Thanks,
Li Bin

---
 arch/arm64/include/asm/assembler.h      |   11 +++++++++++
 arch/arm64/include/asm/debug-monitors.h |    2 +-
 arch/arm64/include/asm/ptrace.h         |    2 ++
 arch/arm64/kernel/asm-offsets.c         |    1 +
 arch/arm64/kernel/debug-monitors.c      |    3 ++-
 arch/arm64/kernel/entry.S               |   18 ++++++++++++++++++
 arch/arm64/kernel/hw_breakpoint.c       |    2 +-
 arch/arm64/kernel/kgdb.c                |    2 +-
 8 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 70f7b9e..56a4335 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -60,6 +60,17 @@
 	msr	daifclr, #8
 	.endm

+	.macro	disable_step, orig
+	bic \orig, \orig, #1
+	msr	mdscr_el1, \orig
+	.endm
+
+	.macro	enable_step, orig
+	disable_dbg
+	orr \orig, \orig, #1
+	msr	mdscr_el1, \orig
+	.endm
+
 	.macro	disable_step_tsk, flgs, tmp
 	tbz	\flgs, #TIF_SINGLESTEP, 9990f
 	mrs	\tmp, mdscr_el1
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 2fcb9b7..50f114c 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -115,7 +115,7 @@ void user_rewind_single_step(struct task_struct *task);
 void user_fastforward_single_step(struct task_struct *task);

 void kernel_enable_single_step(struct pt_regs *regs);
-void kernel_disable_single_step(void);
+void kernel_disable_single_step(struct pt_regs *regs);
 int kernel_active_single_step(void);

 #ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index a307eb6..da00cd8 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -113,6 +113,8 @@ struct pt_regs {
 			u64 sp;
 			u64 pc;
 			u64 pstate;
+			u64 flags;
+			u64 padding;
 		};
 	};
 	u64 orig_x0;
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 3ae6b31..7936ba9 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -56,6 +56,7 @@ int main(void)
   DEFINE(S_COMPAT_SP,		offsetof(struct pt_regs, compat_sp));
 #endif
   DEFINE(S_PSTATE,		offsetof(struct pt_regs, pstate));
+  DEFINE(S_FLAGS,		offsetof(struct pt_regs, flags));
   DEFINE(S_PC,			offsetof(struct pt_regs, pc));
   DEFINE(S_ORIG_X0,		offsetof(struct pt_regs, orig_x0));
   DEFINE(S_SYSCALLNO,		offsetof(struct pt_regs, syscallno));
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index c45f296..9d2bdbf 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -403,9 +403,10 @@ void kernel_enable_single_step(struct pt_regs *regs)
 	enable_debug_monitors(DBG_ACTIVE_EL1);
 }

-void kernel_disable_single_step(void)
+void kernel_disable_single_step(struct pt_regs *regs)
 {
 	WARN_ON(!irqs_disabled());
+	regs->flags = 0;
 	mdscr_write(mdscr_read() & ~DBG_MDSCR_SS);
 	disable_debug_monitors(DBG_ACTIVE_EL1);
 }
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 12e8d2b..753bef2 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -87,6 +87,15 @@
 	stp	x26, x27, [sp, #16 * 13]
 	stp	x28, x29, [sp, #16 * 14]

+	.if \el == 1
+	mrs     x19, mdscr_el1
+	and     x20, x19, #1
+	str     x20, [sp, #S_FLAGS]
+	tbz     x20, #0, 1f
+	disable_step x19
+1:
+	.endif
+
 	.if	\el == 0
 	mrs	x21, sp_el0
 	mov	tsk, sp
@@ -154,6 +163,15 @@ alternative_endif
 	.endif
 	msr	elr_el1, x21			// set up the return data
 	msr	spsr_el1, x22
+
+	.if	\el == 1
+	ldr	x19, [sp, #S_FLAGS]
+	tbz	x19, #0, 1f
+	mrs	x19, mdscr_el1
+	enable_step	x19
+1:
+	.endif
+
 	ldp	x0, x1, [sp, #16 * 0]
 	ldp	x2, x3, [sp, #16 * 1]
 	ldp	x4, x5, [sp, #16 * 2]
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index b45c95d..6afbb80 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -802,7 +802,7 @@ int reinstall_suspended_bps(struct pt_regs *regs)
 			toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1);

 		if (*kernel_step != ARM_KERNEL_STEP_SUSPEND) {
-			kernel_disable_single_step();
+			kernel_disable_single_step(regs);
 			handled_exception = 1;
 		} else {
 			handled_exception = 0;
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index b67531a..f92c5fb 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -183,7 +183,7 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
 		 * Received continue command, disable single step
 		 */
 		if (kernel_active_single_step())
-			kernel_disable_single_step();
+			kernel_disable_single_step(linux_regs);

 		err = 0;
 		break;
-- 
1.7.1

> 
> [1] http://www.spinics.net/lists/arm-kernel/msg491844.html
> 
> .
> 

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
  2016-04-07 11:34           ` Li Bin
@ 2016-04-08  5:14             ` Pratyush Anand
  -1 siblings, 0 replies; 24+ messages in thread
From: Pratyush Anand @ 2016-04-08  5:14 UTC (permalink / raw)
  To: Li Bin
  Cc: He Kuang, mark.rutland, yang.shi, wangnan0, marc.zyngier,
	catalin.marinas, will.deacon, linux-kernel, richard, james.morse,
	hanjun.guo, gregkh, Dave.Martin, linux-arm-kernel, Hanjun Guo,
	Ding Tianhong

Hi Li,

On 07/04/2016:07:34:37 PM, Li Bin wrote:
> Hi Pratyush,
> 
> on 2016/4/4 13:17, Pratyush Anand wrote:
> > Hi Li,
> > 
> > On 31/03/2016:08:45:05 PM, Li Bin wrote:
> >> Hi Pratyush,
> >>
> >> on 2016/3/21 18:24, Pratyush Anand wrote:
> >>> On 21/03/2016:08:37:50 AM, He Kuang wrote:
> >>>> On arm64, watchpoint handler enables single-step to bypass the next
> >>>> instruction for not recursive enter. If an irq is triggered right
> >>>> after the watchpoint, a single-step will be wrongly triggered in irq
> >>>> handler, which causes the watchpoint address not stepped over and
> >>>> system hang.
> >>>
> >>> Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
> >>> not been sent for review. Your test result will be helpful.
> >>>
> >>> ~Pratyush
> >>>
> >>> [1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652
> >>
> >> This patch did not consider that, when excetpion return, the singlestep flag
> >> should be restored, otherwise the right singlestep will not triggered.
> >> Right?
> > 
> > Yes, you are right, and there are other problems as well. Will Deacon pointed
> > out [1] that kernel debugging is per-cpu rather than per-task. So, I did thought
> > of a per-cpu implementation by introducing a new element "flags" in struct
> > pt_regs. But even with that I see issues. For example:
> > - While executing single step instruction, we get a data abort
> > - In the kernel_entry of data abort we disable single stepping based on "flags"
> >   bit field
> > - While handling data abort we receive anther interrupt, so we are again in
> >   kernel_entry (for el1_irq). Single stepping will be disabled again (although
> >   it does not matter).
> > 
> > Now the issue is that, what condition should be verified in kernel_exit for
> > enabling single step again? In the above scenario, kernel_exit for el1_irq
> > should not enable single stepping, but how to prevent that elegantly?
> 
> The condition for kernel_entry to disable the single step is that MDSCR_EL1.SS
> has been set. And only when the corresponding kernel_entry has disabled the single
> step, the kernel_exit should enable it, but the kernel_exit of single-step exception
> should be handled specially, that when disable single step in single-step exception
> handler, flag of pt_regs stored in stack should be cleard to prevent to be re-enabled
> by kernel_exit.

Nice, :-)
I had latter on almost similar patch [1], but it did fail when I merged two of
the tests.
-- I inserted kprobe to an instruction in function __copy_to_user() which could
generate data abort.
-- In parallel I also run test case which is defined here [2]
-- As soon as I did `cat /proc/version`, kernel crashed.

Although, just by comparing visually I do not see any functional difference with
the patch you inlined below, still can you please try both of the above test
case in parallel and see how does it behave? To insert kprobe within
__copy_to_user() you need to revert "arm64: add copy_to/from_user to kprobes
blacklist".

[1] https://github.com/pratyushanand/linux/commit/9182afbe640ea7caf52e209eb8e5f00bdf91b0c0
[2] http://thread.gmane.org/gmane.linux.kernel/2167918
> 
> Thanks,
> Li Bin
> 
> ---
>  arch/arm64/include/asm/assembler.h      |   11 +++++++++++
>  arch/arm64/include/asm/debug-monitors.h |    2 +-
>  arch/arm64/include/asm/ptrace.h         |    2 ++
>  arch/arm64/kernel/asm-offsets.c         |    1 +
>  arch/arm64/kernel/debug-monitors.c      |    3 ++-
>  arch/arm64/kernel/entry.S               |   18 ++++++++++++++++++
>  arch/arm64/kernel/hw_breakpoint.c       |    2 +-
>  arch/arm64/kernel/kgdb.c                |    2 +-
>  8 files changed, 37 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 70f7b9e..56a4335 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -60,6 +60,17 @@
>  	msr	daifclr, #8
>  	.endm
> 
> +	.macro	disable_step, orig
> +	bic \orig, \orig, #1
> +	msr	mdscr_el1, \orig
> +	.endm
> +
> +	.macro	enable_step, orig
> +	disable_dbg
> +	orr \orig, \orig, #1
> +	msr	mdscr_el1, \orig
> +	.endm
> +
>  	.macro	disable_step_tsk, flgs, tmp
>  	tbz	\flgs, #TIF_SINGLESTEP, 9990f
>  	mrs	\tmp, mdscr_el1
> diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
> index 2fcb9b7..50f114c 100644
> --- a/arch/arm64/include/asm/debug-monitors.h
> +++ b/arch/arm64/include/asm/debug-monitors.h
> @@ -115,7 +115,7 @@ void user_rewind_single_step(struct task_struct *task);
>  void user_fastforward_single_step(struct task_struct *task);
> 
>  void kernel_enable_single_step(struct pt_regs *regs);
> -void kernel_disable_single_step(void);
> +void kernel_disable_single_step(struct pt_regs *regs);
>  int kernel_active_single_step(void);
> 
>  #ifdef CONFIG_HAVE_HW_BREAKPOINT
> diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
> index a307eb6..da00cd8 100644
> --- a/arch/arm64/include/asm/ptrace.h
> +++ b/arch/arm64/include/asm/ptrace.h
> @@ -113,6 +113,8 @@ struct pt_regs {
>  			u64 sp;
>  			u64 pc;
>  			u64 pstate;
> +			u64 flags;
> +			u64 padding;
>  		};
>  	};
>  	u64 orig_x0;
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index 3ae6b31..7936ba9 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -56,6 +56,7 @@ int main(void)
>    DEFINE(S_COMPAT_SP,		offsetof(struct pt_regs, compat_sp));
>  #endif
>    DEFINE(S_PSTATE,		offsetof(struct pt_regs, pstate));
> +  DEFINE(S_FLAGS,		offsetof(struct pt_regs, flags));
>    DEFINE(S_PC,			offsetof(struct pt_regs, pc));
>    DEFINE(S_ORIG_X0,		offsetof(struct pt_regs, orig_x0));
>    DEFINE(S_SYSCALLNO,		offsetof(struct pt_regs, syscallno));
> diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
> index c45f296..9d2bdbf 100644
> --- a/arch/arm64/kernel/debug-monitors.c
> +++ b/arch/arm64/kernel/debug-monitors.c
> @@ -403,9 +403,10 @@ void kernel_enable_single_step(struct pt_regs *regs)
>  	enable_debug_monitors(DBG_ACTIVE_EL1);
>  }
> 
> -void kernel_disable_single_step(void)
> +void kernel_disable_single_step(struct pt_regs *regs)
>  {
>  	WARN_ON(!irqs_disabled());
> +	regs->flags = 0;
>  	mdscr_write(mdscr_read() & ~DBG_MDSCR_SS);
>  	disable_debug_monitors(DBG_ACTIVE_EL1);
>  }
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 12e8d2b..753bef2 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -87,6 +87,15 @@
>  	stp	x26, x27, [sp, #16 * 13]
>  	stp	x28, x29, [sp, #16 * 14]
> 
> +	.if \el == 1
> +	mrs     x19, mdscr_el1
> +	and     x20, x19, #1
> +	str     x20, [sp, #S_FLAGS]
> +	tbz     x20, #0, 1f
> +	disable_step x19
> +1:
> +	.endif
> +
>  	.if	\el == 0
>  	mrs	x21, sp_el0
>  	mov	tsk, sp
> @@ -154,6 +163,15 @@ alternative_endif
>  	.endif
>  	msr	elr_el1, x21			// set up the return data
>  	msr	spsr_el1, x22
> +
> +	.if	\el == 1
> +	ldr	x19, [sp, #S_FLAGS]
> +	tbz	x19, #0, 1f
> +	mrs	x19, mdscr_el1
> +	enable_step	x19
> +1:
> +	.endif
> +
>  	ldp	x0, x1, [sp, #16 * 0]
>  	ldp	x2, x3, [sp, #16 * 1]
>  	ldp	x4, x5, [sp, #16 * 2]
> diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
> index b45c95d..6afbb80 100644
> --- a/arch/arm64/kernel/hw_breakpoint.c
> +++ b/arch/arm64/kernel/hw_breakpoint.c
> @@ -802,7 +802,7 @@ int reinstall_suspended_bps(struct pt_regs *regs)
>  			toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1);
> 
>  		if (*kernel_step != ARM_KERNEL_STEP_SUSPEND) {
> -			kernel_disable_single_step();
> +			kernel_disable_single_step(regs);
>  			handled_exception = 1;
>  		} else {
>  			handled_exception = 0;
> diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
> index b67531a..f92c5fb 100644
> --- a/arch/arm64/kernel/kgdb.c
> +++ b/arch/arm64/kernel/kgdb.c
> @@ -183,7 +183,7 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
>  		 * Received continue command, disable single step
>  		 */
>  		if (kernel_active_single_step())
> -			kernel_disable_single_step();
> +			kernel_disable_single_step(linux_regs);
> 
>  		err = 0;
>  		break;
> -- 
> 1.7.1
> 
> > 
> > [1] http://www.spinics.net/lists/arm-kernel/msg491844.html
> > 
> > .
> > 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
@ 2016-04-08  5:14             ` Pratyush Anand
  0 siblings, 0 replies; 24+ messages in thread
From: Pratyush Anand @ 2016-04-08  5:14 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Li,

On 07/04/2016:07:34:37 PM, Li Bin wrote:
> Hi Pratyush,
> 
> on 2016/4/4 13:17, Pratyush Anand wrote:
> > Hi Li,
> > 
> > On 31/03/2016:08:45:05 PM, Li Bin wrote:
> >> Hi Pratyush,
> >>
> >> on 2016/3/21 18:24, Pratyush Anand wrote:
> >>> On 21/03/2016:08:37:50 AM, He Kuang wrote:
> >>>> On arm64, watchpoint handler enables single-step to bypass the next
> >>>> instruction for not recursive enter. If an irq is triggered right
> >>>> after the watchpoint, a single-step will be wrongly triggered in irq
> >>>> handler, which causes the watchpoint address not stepped over and
> >>>> system hang.
> >>>
> >>> Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
> >>> not been sent for review. Your test result will be helpful.
> >>>
> >>> ~Pratyush
> >>>
> >>> [1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652
> >>
> >> This patch did not consider that, when excetpion return, the singlestep flag
> >> should be restored, otherwise the right singlestep will not triggered.
> >> Right?
> > 
> > Yes, you are right, and there are other problems as well. Will Deacon pointed
> > out [1] that kernel debugging is per-cpu rather than per-task. So, I did thought
> > of a per-cpu implementation by introducing a new element "flags" in struct
> > pt_regs. But even with that I see issues. For example:
> > - While executing single step instruction, we get a data abort
> > - In the kernel_entry of data abort we disable single stepping based on "flags"
> >   bit field
> > - While handling data abort we receive anther interrupt, so we are again in
> >   kernel_entry (for el1_irq). Single stepping will be disabled again (although
> >   it does not matter).
> > 
> > Now the issue is that, what condition should be verified in kernel_exit for
> > enabling single step again? In the above scenario, kernel_exit for el1_irq
> > should not enable single stepping, but how to prevent that elegantly?
> 
> The condition for kernel_entry to disable the single step is that MDSCR_EL1.SS
> has been set. And only when the corresponding kernel_entry has disabled the single
> step, the kernel_exit should enable it, but the kernel_exit of single-step exception
> should be handled specially, that when disable single step in single-step exception
> handler, flag of pt_regs stored in stack should be cleard to prevent to be re-enabled
> by kernel_exit.

Nice, :-)
I had latter on almost similar patch [1], but it did fail when I merged two of
the tests.
-- I inserted kprobe to an instruction in function __copy_to_user() which could
generate data abort.
-- In parallel I also run test case which is defined here [2]
-- As soon as I did `cat /proc/version`, kernel crashed.

Although, just by comparing visually I do not see any functional difference with
the patch you inlined below, still can you please try both of the above test
case in parallel and see how does it behave? To insert kprobe within
__copy_to_user() you need to revert "arm64: add copy_to/from_user to kprobes
blacklist".

[1] https://github.com/pratyushanand/linux/commit/9182afbe640ea7caf52e209eb8e5f00bdf91b0c0
[2] http://thread.gmane.org/gmane.linux.kernel/2167918
> 
> Thanks,
> Li Bin
> 
> ---
>  arch/arm64/include/asm/assembler.h      |   11 +++++++++++
>  arch/arm64/include/asm/debug-monitors.h |    2 +-
>  arch/arm64/include/asm/ptrace.h         |    2 ++
>  arch/arm64/kernel/asm-offsets.c         |    1 +
>  arch/arm64/kernel/debug-monitors.c      |    3 ++-
>  arch/arm64/kernel/entry.S               |   18 ++++++++++++++++++
>  arch/arm64/kernel/hw_breakpoint.c       |    2 +-
>  arch/arm64/kernel/kgdb.c                |    2 +-
>  8 files changed, 37 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 70f7b9e..56a4335 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -60,6 +60,17 @@
>  	msr	daifclr, #8
>  	.endm
> 
> +	.macro	disable_step, orig
> +	bic \orig, \orig, #1
> +	msr	mdscr_el1, \orig
> +	.endm
> +
> +	.macro	enable_step, orig
> +	disable_dbg
> +	orr \orig, \orig, #1
> +	msr	mdscr_el1, \orig
> +	.endm
> +
>  	.macro	disable_step_tsk, flgs, tmp
>  	tbz	\flgs, #TIF_SINGLESTEP, 9990f
>  	mrs	\tmp, mdscr_el1
> diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
> index 2fcb9b7..50f114c 100644
> --- a/arch/arm64/include/asm/debug-monitors.h
> +++ b/arch/arm64/include/asm/debug-monitors.h
> @@ -115,7 +115,7 @@ void user_rewind_single_step(struct task_struct *task);
>  void user_fastforward_single_step(struct task_struct *task);
> 
>  void kernel_enable_single_step(struct pt_regs *regs);
> -void kernel_disable_single_step(void);
> +void kernel_disable_single_step(struct pt_regs *regs);
>  int kernel_active_single_step(void);
> 
>  #ifdef CONFIG_HAVE_HW_BREAKPOINT
> diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
> index a307eb6..da00cd8 100644
> --- a/arch/arm64/include/asm/ptrace.h
> +++ b/arch/arm64/include/asm/ptrace.h
> @@ -113,6 +113,8 @@ struct pt_regs {
>  			u64 sp;
>  			u64 pc;
>  			u64 pstate;
> +			u64 flags;
> +			u64 padding;
>  		};
>  	};
>  	u64 orig_x0;
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index 3ae6b31..7936ba9 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -56,6 +56,7 @@ int main(void)
>    DEFINE(S_COMPAT_SP,		offsetof(struct pt_regs, compat_sp));
>  #endif
>    DEFINE(S_PSTATE,		offsetof(struct pt_regs, pstate));
> +  DEFINE(S_FLAGS,		offsetof(struct pt_regs, flags));
>    DEFINE(S_PC,			offsetof(struct pt_regs, pc));
>    DEFINE(S_ORIG_X0,		offsetof(struct pt_regs, orig_x0));
>    DEFINE(S_SYSCALLNO,		offsetof(struct pt_regs, syscallno));
> diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
> index c45f296..9d2bdbf 100644
> --- a/arch/arm64/kernel/debug-monitors.c
> +++ b/arch/arm64/kernel/debug-monitors.c
> @@ -403,9 +403,10 @@ void kernel_enable_single_step(struct pt_regs *regs)
>  	enable_debug_monitors(DBG_ACTIVE_EL1);
>  }
> 
> -void kernel_disable_single_step(void)
> +void kernel_disable_single_step(struct pt_regs *regs)
>  {
>  	WARN_ON(!irqs_disabled());
> +	regs->flags = 0;
>  	mdscr_write(mdscr_read() & ~DBG_MDSCR_SS);
>  	disable_debug_monitors(DBG_ACTIVE_EL1);
>  }
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 12e8d2b..753bef2 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -87,6 +87,15 @@
>  	stp	x26, x27, [sp, #16 * 13]
>  	stp	x28, x29, [sp, #16 * 14]
> 
> +	.if \el == 1
> +	mrs     x19, mdscr_el1
> +	and     x20, x19, #1
> +	str     x20, [sp, #S_FLAGS]
> +	tbz     x20, #0, 1f
> +	disable_step x19
> +1:
> +	.endif
> +
>  	.if	\el == 0
>  	mrs	x21, sp_el0
>  	mov	tsk, sp
> @@ -154,6 +163,15 @@ alternative_endif
>  	.endif
>  	msr	elr_el1, x21			// set up the return data
>  	msr	spsr_el1, x22
> +
> +	.if	\el == 1
> +	ldr	x19, [sp, #S_FLAGS]
> +	tbz	x19, #0, 1f
> +	mrs	x19, mdscr_el1
> +	enable_step	x19
> +1:
> +	.endif
> +
>  	ldp	x0, x1, [sp, #16 * 0]
>  	ldp	x2, x3, [sp, #16 * 1]
>  	ldp	x4, x5, [sp, #16 * 2]
> diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
> index b45c95d..6afbb80 100644
> --- a/arch/arm64/kernel/hw_breakpoint.c
> +++ b/arch/arm64/kernel/hw_breakpoint.c
> @@ -802,7 +802,7 @@ int reinstall_suspended_bps(struct pt_regs *regs)
>  			toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1);
> 
>  		if (*kernel_step != ARM_KERNEL_STEP_SUSPEND) {
> -			kernel_disable_single_step();
> +			kernel_disable_single_step(regs);
>  			handled_exception = 1;
>  		} else {
>  			handled_exception = 0;
> diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
> index b67531a..f92c5fb 100644
> --- a/arch/arm64/kernel/kgdb.c
> +++ b/arch/arm64/kernel/kgdb.c
> @@ -183,7 +183,7 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
>  		 * Received continue command, disable single step
>  		 */
>  		if (kernel_active_single_step())
> -			kernel_disable_single_step();
> +			kernel_disable_single_step(linux_regs);
> 
>  		err = 0;
>  		break;
> -- 
> 1.7.1
> 
> > 
> > [1] http://www.spinics.net/lists/arm-kernel/msg491844.html
> > 
> > .
> > 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
  2016-04-08  5:14             ` Pratyush Anand
@ 2016-04-08  8:07               ` Li Bin
  -1 siblings, 0 replies; 24+ messages in thread
From: Li Bin @ 2016-04-08  8:07 UTC (permalink / raw)
  To: Pratyush Anand
  Cc: He Kuang, mark.rutland, yang.shi, wangnan0, marc.zyngier,
	catalin.marinas, will.deacon, linux-kernel, richard, james.morse,
	hanjun.guo, gregkh, Dave.Martin, linux-arm-kernel, Hanjun Guo,
	Ding Tianhong, huawei.libin



on 2016/4/8 13:14, Pratyush Anand wrote:
> Hi Li,
> 
> On 07/04/2016:07:34:37 PM, Li Bin wrote:
>> Hi Pratyush,
>>
>> on 2016/4/4 13:17, Pratyush Anand wrote:
>>> Hi Li,
>>>
>>> On 31/03/2016:08:45:05 PM, Li Bin wrote:
>>>> Hi Pratyush,
>>>>
>>>> on 2016/3/21 18:24, Pratyush Anand wrote:
>>>>> On 21/03/2016:08:37:50 AM, He Kuang wrote:
>>>>>> On arm64, watchpoint handler enables single-step to bypass the next
>>>>>> instruction for not recursive enter. If an irq is triggered right
>>>>>> after the watchpoint, a single-step will be wrongly triggered in irq
>>>>>> handler, which causes the watchpoint address not stepped over and
>>>>>> system hang.
>>>>>
>>>>> Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
>>>>> not been sent for review. Your test result will be helpful.
>>>>>
>>>>> ~Pratyush
>>>>>
>>>>> [1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652
>>>>
>>>> This patch did not consider that, when excetpion return, the singlestep flag
>>>> should be restored, otherwise the right singlestep will not triggered.
>>>> Right?
>>>
>>> Yes, you are right, and there are other problems as well. Will Deacon pointed
>>> out [1] that kernel debugging is per-cpu rather than per-task. So, I did thought
>>> of a per-cpu implementation by introducing a new element "flags" in struct
>>> pt_regs. But even with that I see issues. For example:
>>> - While executing single step instruction, we get a data abort
>>> - In the kernel_entry of data abort we disable single stepping based on "flags"
>>>   bit field
>>> - While handling data abort we receive anther interrupt, so we are again in
>>>   kernel_entry (for el1_irq). Single stepping will be disabled again (although
>>>   it does not matter).
>>>
>>> Now the issue is that, what condition should be verified in kernel_exit for
>>> enabling single step again? In the above scenario, kernel_exit for el1_irq
>>> should not enable single stepping, but how to prevent that elegantly?
>>
>> The condition for kernel_entry to disable the single step is that MDSCR_EL1.SS
>> has been set. And only when the corresponding kernel_entry has disabled the single
>> step, the kernel_exit should enable it, but the kernel_exit of single-step exception
>> should be handled specially, that when disable single step in single-step exception
>> handler, flag of pt_regs stored in stack should be cleard to prevent to be re-enabled
>> by kernel_exit.
> 
> Nice, :-)
> I had latter on almost similar patch [1], but it did fail when I merged two of
> the tests.
> -- I inserted kprobe to an instruction in function __copy_to_user() which could
> generate data abort.
> -- In parallel I also run test case which is defined here [2]
> -- As soon as I did `cat /proc/version`, kernel crashed.

Hi Pratyush,

Firstly, I have test this case, and it does not trigger failture as you describing.
But it indeed may trigger problem, and it is an another issue that if an exception
triggered before single-step exception, changes the regs->pc (data abort exception will
fixup_exception), the current implemetion of kprobes does not support, for example:
1. kprobes brk exception setup single-step, regs->pc points to the slot, MDSCR.SS=1,
SPSR_EL1.SS=1 (Inactive state)
2. brk exception eret (Active-not-pending state)
3. execute the slot instruction and trigger data abort exception, and this case the
return addr is also the slot instruction, so the SPSR_EL1.SS is set to 1 (Inactive state)
4. but in the data abort exception, fixup_exception will change the regs->pc to the fixup
code
5. data abort exception eret, going into Active-not-pending state, executing fixup code
without taking an exception, going into Active-pending state, triggering single-step
exception. But the single-step instruction is not the target instrution, so kprobe fails.

And so this case including copy_to/from_user should be added to kprobes blacklist.
Right, or am i missing something?

Thanks,
Li Bin

> 
> Although, just by comparing visually I do not see any functional difference with
> the patch you inlined below, still can you please try both of the above test
> case in parallel and see how does it behave? To insert kprobe within
> __copy_to_user() you need to revert "arm64: add copy_to/from_user to kprobes
> blacklist".
> 
> [1] https://github.com/pratyushanand/linux/commit/9182afbe640ea7caf52e209eb8e5f00bdf91b0c0
> [2] http://thread.gmane.org/gmane.linux.kernel/2167918
>>
>> Thanks,
>> Li Bin
>>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
@ 2016-04-08  8:07               ` Li Bin
  0 siblings, 0 replies; 24+ messages in thread
From: Li Bin @ 2016-04-08  8:07 UTC (permalink / raw)
  To: linux-arm-kernel



on 2016/4/8 13:14, Pratyush Anand wrote:
> Hi Li,
> 
> On 07/04/2016:07:34:37 PM, Li Bin wrote:
>> Hi Pratyush,
>>
>> on 2016/4/4 13:17, Pratyush Anand wrote:
>>> Hi Li,
>>>
>>> On 31/03/2016:08:45:05 PM, Li Bin wrote:
>>>> Hi Pratyush,
>>>>
>>>> on 2016/3/21 18:24, Pratyush Anand wrote:
>>>>> On 21/03/2016:08:37:50 AM, He Kuang wrote:
>>>>>> On arm64, watchpoint handler enables single-step to bypass the next
>>>>>> instruction for not recursive enter. If an irq is triggered right
>>>>>> after the watchpoint, a single-step will be wrongly triggered in irq
>>>>>> handler, which causes the watchpoint address not stepped over and
>>>>>> system hang.
>>>>>
>>>>> Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
>>>>> not been sent for review. Your test result will be helpful.
>>>>>
>>>>> ~Pratyush
>>>>>
>>>>> [1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652
>>>>
>>>> This patch did not consider that, when excetpion return, the singlestep flag
>>>> should be restored, otherwise the right singlestep will not triggered.
>>>> Right?
>>>
>>> Yes, you are right, and there are other problems as well. Will Deacon pointed
>>> out [1] that kernel debugging is per-cpu rather than per-task. So, I did thought
>>> of a per-cpu implementation by introducing a new element "flags" in struct
>>> pt_regs. But even with that I see issues. For example:
>>> - While executing single step instruction, we get a data abort
>>> - In the kernel_entry of data abort we disable single stepping based on "flags"
>>>   bit field
>>> - While handling data abort we receive anther interrupt, so we are again in
>>>   kernel_entry (for el1_irq). Single stepping will be disabled again (although
>>>   it does not matter).
>>>
>>> Now the issue is that, what condition should be verified in kernel_exit for
>>> enabling single step again? In the above scenario, kernel_exit for el1_irq
>>> should not enable single stepping, but how to prevent that elegantly?
>>
>> The condition for kernel_entry to disable the single step is that MDSCR_EL1.SS
>> has been set. And only when the corresponding kernel_entry has disabled the single
>> step, the kernel_exit should enable it, but the kernel_exit of single-step exception
>> should be handled specially, that when disable single step in single-step exception
>> handler, flag of pt_regs stored in stack should be cleard to prevent to be re-enabled
>> by kernel_exit.
> 
> Nice, :-)
> I had latter on almost similar patch [1], but it did fail when I merged two of
> the tests.
> -- I inserted kprobe to an instruction in function __copy_to_user() which could
> generate data abort.
> -- In parallel I also run test case which is defined here [2]
> -- As soon as I did `cat /proc/version`, kernel crashed.

Hi Pratyush,

Firstly, I have test this case, and it does not trigger failture as you describing.
But it indeed may trigger problem, and it is an another issue that if an exception
triggered before single-step exception, changes the regs->pc (data abort exception will
fixup_exception), the current implemetion of kprobes does not support, for example:
1. kprobes brk exception setup single-step, regs->pc points to the slot, MDSCR.SS=1,
SPSR_EL1.SS=1 (Inactive state)
2. brk exception eret (Active-not-pending state)
3. execute the slot instruction and trigger data abort exception, and this case the
return addr is also the slot instruction, so the SPSR_EL1.SS is set to 1 (Inactive state)
4. but in the data abort exception, fixup_exception will change the regs->pc to the fixup
code
5. data abort exception eret, going into Active-not-pending state, executing fixup code
without taking an exception, going into Active-pending state, triggering single-step
exception. But the single-step instruction is not the target instrution, so kprobe fails.

And so this case including copy_to/from_user should be added to kprobes blacklist.
Right, or am i missing something?

Thanks,
Li Bin

> 
> Although, just by comparing visually I do not see any functional difference with
> the patch you inlined below, still can you please try both of the above test
> case in parallel and see how does it behave? To insert kprobe within
> __copy_to_user() you need to revert "arm64: add copy_to/from_user to kprobes
> blacklist".
> 
> [1] https://github.com/pratyushanand/linux/commit/9182afbe640ea7caf52e209eb8e5f00bdf91b0c0
> [2] http://thread.gmane.org/gmane.linux.kernel/2167918
>>
>> Thanks,
>> Li Bin
>>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
  2016-04-08  8:07               ` Li Bin
@ 2016-04-08  8:58                 ` Pratyush Anand
  -1 siblings, 0 replies; 24+ messages in thread
From: Pratyush Anand @ 2016-04-08  8:58 UTC (permalink / raw)
  To: Li Bin
  Cc: He Kuang, mark.rutland, yang.shi, wangnan0, marc.zyngier,
	catalin.marinas, will.deacon, linux-kernel, richard, james.morse,
	hanjun.guo, gregkh, Dave.Martin, linux-arm-kernel, Hanjun Guo,
	Ding Tianhong

On 08/04/2016:04:07:12 PM, Li Bin wrote:
> 
> 
> on 2016/4/8 13:14, Pratyush Anand wrote:
> > Hi Li,
> > 
> > On 07/04/2016:07:34:37 PM, Li Bin wrote:
> >> Hi Pratyush,
> >>
> >> on 2016/4/4 13:17, Pratyush Anand wrote:
> >>> Hi Li,
> >>>
> >>> On 31/03/2016:08:45:05 PM, Li Bin wrote:
> >>>> Hi Pratyush,
> >>>>
> >>>> on 2016/3/21 18:24, Pratyush Anand wrote:
> >>>>> On 21/03/2016:08:37:50 AM, He Kuang wrote:
> >>>>>> On arm64, watchpoint handler enables single-step to bypass the next
> >>>>>> instruction for not recursive enter. If an irq is triggered right
> >>>>>> after the watchpoint, a single-step will be wrongly triggered in irq
> >>>>>> handler, which causes the watchpoint address not stepped over and
> >>>>>> system hang.
> >>>>>
> >>>>> Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
> >>>>> not been sent for review. Your test result will be helpful.
> >>>>>
> >>>>> ~Pratyush
> >>>>>
> >>>>> [1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652
> >>>>
> >>>> This patch did not consider that, when excetpion return, the singlestep flag
> >>>> should be restored, otherwise the right singlestep will not triggered.
> >>>> Right?
> >>>
> >>> Yes, you are right, and there are other problems as well. Will Deacon pointed
> >>> out [1] that kernel debugging is per-cpu rather than per-task. So, I did thought
> >>> of a per-cpu implementation by introducing a new element "flags" in struct
> >>> pt_regs. But even with that I see issues. For example:
> >>> - While executing single step instruction, we get a data abort
> >>> - In the kernel_entry of data abort we disable single stepping based on "flags"
> >>>   bit field
> >>> - While handling data abort we receive anther interrupt, so we are again in
> >>>   kernel_entry (for el1_irq). Single stepping will be disabled again (although
> >>>   it does not matter).
> >>>
> >>> Now the issue is that, what condition should be verified in kernel_exit for
> >>> enabling single step again? In the above scenario, kernel_exit for el1_irq
> >>> should not enable single stepping, but how to prevent that elegantly?
> >>
> >> The condition for kernel_entry to disable the single step is that MDSCR_EL1.SS
> >> has been set. And only when the corresponding kernel_entry has disabled the single
> >> step, the kernel_exit should enable it, but the kernel_exit of single-step exception
> >> should be handled specially, that when disable single step in single-step exception
> >> handler, flag of pt_regs stored in stack should be cleard to prevent to be re-enabled
> >> by kernel_exit.
> > 
> > Nice, :-)
> > I had latter on almost similar patch [1], but it did fail when I merged two of
> > the tests.
> > -- I inserted kprobe to an instruction in function __copy_to_user() which could
> > generate data abort.
> > -- In parallel I also run test case which is defined here [2]
> > -- As soon as I did `cat /proc/version`, kernel crashed.
> 
> Hi Pratyush,
> 
> Firstly, I have test this case, and it does not trigger failture as you describing.
> But it indeed may trigger problem, and it is an another issue that if an exception
> triggered before single-step exception, changes the regs->pc (data abort exception will
> fixup_exception), the current implemetion of kprobes does not support, for example:

Yes, you are right, I missed it. All those aborts which has a fixup defined,
will fail. While, I did not see any issue when running test individually, ie
only hitting kprobe at __copy_to_user() instructions, because there is no fixup
for them. I was able to trace instruction which was aborting. Problem occurred
only when I run perf memory read to linux_proc_banner in parallel. Since I do
not see failure due to fixup_exception in this test case, so I think we are
missing some more pitfalls.

But certainly it is going to fail in the case  __get_user/__put_user etc are
being traced, because there exists a fixup section for them.

> 1. kprobes brk exception setup single-step, regs->pc points to the slot, MDSCR.SS=1,
> SPSR_EL1.SS=1 (Inactive state)
> 2. brk exception eret (Active-not-pending state)
> 3. execute the slot instruction and trigger data abort exception, and this case the
> return addr is also the slot instruction, so the SPSR_EL1.SS is set to 1 (Inactive state)
> 4. but in the data abort exception, fixup_exception will change the regs->pc to the fixup

Yes, for the instructions with fixup defined.

> code
> 5. data abort exception eret, going into Active-not-pending state, executing fixup code
> without taking an exception, going into Active-pending state, triggering single-step
> exception. But the single-step instruction is not the target instrution, so kprobe fails.
> 
> And so this case including copy_to/from_user should be added to kprobes blacklist.
> Right, or am i missing something?

As of now, we are be going with blacklisting approach only.

~Pratyush

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq
@ 2016-04-08  8:58                 ` Pratyush Anand
  0 siblings, 0 replies; 24+ messages in thread
From: Pratyush Anand @ 2016-04-08  8:58 UTC (permalink / raw)
  To: linux-arm-kernel

On 08/04/2016:04:07:12 PM, Li Bin wrote:
> 
> 
> on 2016/4/8 13:14, Pratyush Anand wrote:
> > Hi Li,
> > 
> > On 07/04/2016:07:34:37 PM, Li Bin wrote:
> >> Hi Pratyush,
> >>
> >> on 2016/4/4 13:17, Pratyush Anand wrote:
> >>> Hi Li,
> >>>
> >>> On 31/03/2016:08:45:05 PM, Li Bin wrote:
> >>>> Hi Pratyush,
> >>>>
> >>>> on 2016/3/21 18:24, Pratyush Anand wrote:
> >>>>> On 21/03/2016:08:37:50 AM, He Kuang wrote:
> >>>>>> On arm64, watchpoint handler enables single-step to bypass the next
> >>>>>> instruction for not recursive enter. If an irq is triggered right
> >>>>>> after the watchpoint, a single-step will be wrongly triggered in irq
> >>>>>> handler, which causes the watchpoint address not stepped over and
> >>>>>> system hang.
> >>>>>
> >>>>> Does patch [1] resolves this issue as well? I hope it should. Patch[1] has still
> >>>>> not been sent for review. Your test result will be helpful.
> >>>>>
> >>>>> ~Pratyush
> >>>>>
> >>>>> [1] https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652
> >>>>
> >>>> This patch did not consider that, when excetpion return, the singlestep flag
> >>>> should be restored, otherwise the right singlestep will not triggered.
> >>>> Right?
> >>>
> >>> Yes, you are right, and there are other problems as well. Will Deacon pointed
> >>> out [1] that kernel debugging is per-cpu rather than per-task. So, I did thought
> >>> of a per-cpu implementation by introducing a new element "flags" in struct
> >>> pt_regs. But even with that I see issues. For example:
> >>> - While executing single step instruction, we get a data abort
> >>> - In the kernel_entry of data abort we disable single stepping based on "flags"
> >>>   bit field
> >>> - While handling data abort we receive anther interrupt, so we are again in
> >>>   kernel_entry (for el1_irq). Single stepping will be disabled again (although
> >>>   it does not matter).
> >>>
> >>> Now the issue is that, what condition should be verified in kernel_exit for
> >>> enabling single step again? In the above scenario, kernel_exit for el1_irq
> >>> should not enable single stepping, but how to prevent that elegantly?
> >>
> >> The condition for kernel_entry to disable the single step is that MDSCR_EL1.SS
> >> has been set. And only when the corresponding kernel_entry has disabled the single
> >> step, the kernel_exit should enable it, but the kernel_exit of single-step exception
> >> should be handled specially, that when disable single step in single-step exception
> >> handler, flag of pt_regs stored in stack should be cleard to prevent to be re-enabled
> >> by kernel_exit.
> > 
> > Nice, :-)
> > I had latter on almost similar patch [1], but it did fail when I merged two of
> > the tests.
> > -- I inserted kprobe to an instruction in function __copy_to_user() which could
> > generate data abort.
> > -- In parallel I also run test case which is defined here [2]
> > -- As soon as I did `cat /proc/version`, kernel crashed.
> 
> Hi Pratyush,
> 
> Firstly, I have test this case, and it does not trigger failture as you describing.
> But it indeed may trigger problem, and it is an another issue that if an exception
> triggered before single-step exception, changes the regs->pc (data abort exception will
> fixup_exception), the current implemetion of kprobes does not support, for example:

Yes, you are right, I missed it. All those aborts which has a fixup defined,
will fail. While, I did not see any issue when running test individually, ie
only hitting kprobe at __copy_to_user() instructions, because there is no fixup
for them. I was able to trace instruction which was aborting. Problem occurred
only when I run perf memory read to linux_proc_banner in parallel. Since I do
not see failure due to fixup_exception in this test case, so I think we are
missing some more pitfalls.

But certainly it is going to fail in the case  __get_user/__put_user etc are
being traced, because there exists a fixup section for them.

> 1. kprobes brk exception setup single-step, regs->pc points to the slot, MDSCR.SS=1,
> SPSR_EL1.SS=1 (Inactive state)
> 2. brk exception eret (Active-not-pending state)
> 3. execute the slot instruction and trigger data abort exception, and this case the
> return addr is also the slot instruction, so the SPSR_EL1.SS is set to 1 (Inactive state)
> 4. but in the data abort exception, fixup_exception will change the regs->pc to the fixup

Yes, for the instructions with fixup defined.

> code
> 5. data abort exception eret, going into Active-not-pending state, executing fixup code
> without taking an exception, going into Active-pending state, triggering single-step
> exception. But the single-step instruction is not the target instrution, so kprobe fails.
> 
> And so this case including copy_to/from_user should be added to kprobes blacklist.
> Right, or am i missing something?

As of now, we are be going with blacklisting approach only.

~Pratyush

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2016-04-08  8:58 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-21  8:37 [PATCH 1/2] arm64: Store breakpoint single step state into pstate He Kuang
2016-03-21  8:37 ` He Kuang
2016-03-21  8:37 ` [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq He Kuang
2016-03-21  8:37   ` He Kuang
2016-03-21 10:24   ` Pratyush Anand
2016-03-21 10:24     ` Pratyush Anand
2016-03-21 10:38     ` Wangnan (F)
2016-03-21 10:38       ` Wangnan (F)
2016-03-21 11:05       ` Pratyush Anand
2016-03-21 11:05         ` Pratyush Anand
2016-03-31 12:45     ` Li Bin
2016-03-31 12:45       ` Li Bin
2016-04-04  5:17       ` Pratyush Anand
2016-04-04  5:17         ` Pratyush Anand
2016-04-07 11:34         ` Li Bin
2016-04-07 11:34           ` Li Bin
2016-04-08  5:14           ` Pratyush Anand
2016-04-08  5:14             ` Pratyush Anand
2016-04-08  8:07             ` Li Bin
2016-04-08  8:07               ` Li Bin
2016-04-08  8:58               ` Pratyush Anand
2016-04-08  8:58                 ` Pratyush Anand
2016-03-21 16:08 ` [PATCH 1/2] arm64: Store breakpoint single step state into pstate Will Deacon
2016-03-21 16:08   ` Will Deacon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.