[PATCH 5/7] powerpc/watchpoints: Remove ptrace/perf exclusion tracking

From: Benjamin Gray <bgray@linux.ibm.com>
To: linuxppc-dev@lists.ozlabs.org
Cc: Benjamin Gray <bgray@linux.ibm.com>
Subject: [PATCH 5/7] powerpc/watchpoints: Remove ptrace/perf exclusion tracking
Date: Tue,  1 Aug 2023 11:17:42 +1000	[thread overview]
Message-ID: <20230801011744.153973-6-bgray@linux.ibm.com> (raw)
In-Reply-To: <20230801011744.153973-1-bgray@linux.ibm.com>

ptrace and perf watchpoints were considered incompatible in
commit 29da4f91c0c1 ("powerpc/watchpoint: Don't allow concurrent perf
and ptrace events"), but the logic in that commit doesn't really apply.

Ptrace doesn't automatically single step; the ptracer must request this
explicitly. And the ptracer can do so regardless of whether a
ptrace/perf watchpoint triggered or not: it could single step every
instruction if it wanted to. Whatever stopped the ptracee before
executing the instruction that would trigger the perf watchpoint is no
longer relevant by this point.

To get correct behaviour when perf and ptrace are watching the same
data we must ignore the perf watchpoint. After all, ptrace has
before-execute semantics, and perf is after-execute, so perf doesn't
actually care about the watchpoint trigger at this point in time.
Pausing before execution does not mean we will actually end up executing
the instruction.

Importantly though, we don't remove the perf watchpoint yet. This is
key.

The ptracer is free to do whatever it likes right now. E.g., it can
continue the process, single step. or even set the child PC somewhere
completely different.

If it does try to execute the instruction though, without reinserting
the watchpoint (in which case we go back to the start of this example),
the perf watchpoint would immediately trigger. This time there is no
ptrace watchpoint, so we can safely perform a single step and increment
the perf counter. Upon receiving the single step exception, the existing
code already handles propagating or consuming it based on whether
another subsystem (e.g. ptrace) requested a single step. Again, this is
needed with or without perf/ptrace exclusion, because ptrace could be
single stepping this instruction regardless of if a watchpoint is
involved.

Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>

---

Whether it's a _good_ idea to mix ptrace and perf is another thing
entirely mind... . But they are not inherently incompatible.
---
 arch/powerpc/kernel/hw_breakpoint.c | 249 +---------------------------
 1 file changed, 1 insertion(+), 248 deletions(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index bf8dda1a7e04..b8513dc3e53a 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -114,253 +114,6 @@ static bool is_ptrace_bp(struct perf_event *bp)
 	return bp->overflow_handler == ptrace_triggered;
 }
 
-struct breakpoint {
-	struct list_head list;
-	struct perf_event *bp;
-	bool ptrace_bp;
-};
-
-/*
- * While kernel/events/hw_breakpoint.c does its own synchronization, we cannot
- * rely on it safely synchronizing internals here; however, we can rely on it
- * not requesting more breakpoints than available.
- */
-static DEFINE_SPINLOCK(cpu_bps_lock);
-static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]);
-static DEFINE_SPINLOCK(task_bps_lock);
-static LIST_HEAD(task_bps);
-
-static struct breakpoint *alloc_breakpoint(struct perf_event *bp)
-{
-	struct breakpoint *tmp;
-
-	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
-	if (!tmp)
-		return ERR_PTR(-ENOMEM);
-	tmp->bp = bp;
-	tmp->ptrace_bp = is_ptrace_bp(bp);
-	return tmp;
-}
-
-static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2)
-{
-	__u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr;
-
-	bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE);
-	bp1_eaddr = ALIGN(bp1->attr.bp_addr + bp1->attr.bp_len, HW_BREAKPOINT_SIZE);
-	bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE);
-	bp2_eaddr = ALIGN(bp2->attr.bp_addr + bp2->attr.bp_len, HW_BREAKPOINT_SIZE);
-
-	return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr);
-}
-
-static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp)
-{
-	return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp;
-}
-
-static bool can_co_exist(struct breakpoint *b, struct perf_event *bp)
-{
-	return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp));
-}
-
-static int task_bps_add(struct perf_event *bp)
-{
-	struct breakpoint *tmp;
-
-	tmp = alloc_breakpoint(bp);
-	if (IS_ERR(tmp))
-		return PTR_ERR(tmp);
-
-	spin_lock(&task_bps_lock);
-	list_add(&tmp->list, &task_bps);
-	spin_unlock(&task_bps_lock);
-	return 0;
-}
-
-static void task_bps_remove(struct perf_event *bp)
-{
-	struct list_head *pos, *q;
-
-	spin_lock(&task_bps_lock);
-	list_for_each_safe(pos, q, &task_bps) {
-		struct breakpoint *tmp = list_entry(pos, struct breakpoint, list);
-
-		if (tmp->bp == bp) {
-			list_del(&tmp->list);
-			kfree(tmp);
-			break;
-		}
-	}
-	spin_unlock(&task_bps_lock);
-}
-
-/*
- * If any task has breakpoint from alternate infrastructure,
- * return true. Otherwise return false.
- */
-static bool all_task_bps_check(struct perf_event *bp)
-{
-	struct breakpoint *tmp;
-	bool ret = false;
-
-	spin_lock(&task_bps_lock);
-	list_for_each_entry(tmp, &task_bps, list) {
-		if (!can_co_exist(tmp, bp)) {
-			ret = true;
-			break;
-		}
-	}
-	spin_unlock(&task_bps_lock);
-	return ret;
-}
-
-/*
- * If same task has breakpoint from alternate infrastructure,
- * return true. Otherwise return false.
- */
-static bool same_task_bps_check(struct perf_event *bp)
-{
-	struct breakpoint *tmp;
-	bool ret = false;
-
-	spin_lock(&task_bps_lock);
-	list_for_each_entry(tmp, &task_bps, list) {
-		if (tmp->bp->hw.target == bp->hw.target &&
-		    !can_co_exist(tmp, bp)) {
-			ret = true;
-			break;
-		}
-	}
-	spin_unlock(&task_bps_lock);
-	return ret;
-}
-
-static int cpu_bps_add(struct perf_event *bp)
-{
-	struct breakpoint **cpu_bp;
-	struct breakpoint *tmp;
-	int i = 0;
-
-	tmp = alloc_breakpoint(bp);
-	if (IS_ERR(tmp))
-		return PTR_ERR(tmp);
-
-	spin_lock(&cpu_bps_lock);
-	cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
-	for (i = 0; i < nr_wp_slots(); i++) {
-		if (!cpu_bp[i]) {
-			cpu_bp[i] = tmp;
-			break;
-		}
-	}
-	spin_unlock(&cpu_bps_lock);
-	return 0;
-}
-
-static void cpu_bps_remove(struct perf_event *bp)
-{
-	struct breakpoint **cpu_bp;
-	int i = 0;
-
-	spin_lock(&cpu_bps_lock);
-	cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
-	for (i = 0; i < nr_wp_slots(); i++) {
-		if (!cpu_bp[i])
-			continue;
-
-		if (cpu_bp[i]->bp == bp) {
-			kfree(cpu_bp[i]);
-			cpu_bp[i] = NULL;
-			break;
-		}
-	}
-	spin_unlock(&cpu_bps_lock);
-}
-
-static bool cpu_bps_check(int cpu, struct perf_event *bp)
-{
-	struct breakpoint **cpu_bp;
-	bool ret = false;
-	int i;
-
-	spin_lock(&cpu_bps_lock);
-	cpu_bp = per_cpu_ptr(cpu_bps, cpu);
-	for (i = 0; i < nr_wp_slots(); i++) {
-		if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) {
-			ret = true;
-			break;
-		}
-	}
-	spin_unlock(&cpu_bps_lock);
-	return ret;
-}
-
-static bool all_cpu_bps_check(struct perf_event *bp)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		if (cpu_bps_check(cpu, bp))
-			return true;
-	}
-	return false;
-}
-
-int arch_reserve_bp_slot(struct perf_event *bp)
-{
-	int ret;
-
-	/* ptrace breakpoint */
-	if (is_ptrace_bp(bp)) {
-		if (all_cpu_bps_check(bp))
-			return -ENOSPC;
-
-		if (same_task_bps_check(bp))
-			return -ENOSPC;
-
-		return task_bps_add(bp);
-	}
-
-	/* perf breakpoint */
-	if (is_kernel_addr(bp->attr.bp_addr))
-		return 0;
-
-	if (bp->hw.target && bp->cpu == -1) {
-		if (same_task_bps_check(bp))
-			return -ENOSPC;
-
-		return task_bps_add(bp);
-	} else if (!bp->hw.target && bp->cpu != -1) {
-		if (all_task_bps_check(bp))
-			return -ENOSPC;
-
-		return cpu_bps_add(bp);
-	}
-
-	if (same_task_bps_check(bp))
-		return -ENOSPC;
-
-	ret = cpu_bps_add(bp);
-	if (ret)
-		return ret;
-	ret = task_bps_add(bp);
-	if (ret)
-		cpu_bps_remove(bp);
-
-	return ret;
-}
-
-void arch_release_bp_slot(struct perf_event *bp)
-{
-	if (!is_kernel_addr(bp->attr.bp_addr)) {
-		if (bp->hw.target)
-			task_bps_remove(bp);
-		if (bp->cpu != -1)
-			cpu_bps_remove(bp);
-	}
-}
-
 /*
  * Check for virtual address in kernel space.
  */
@@ -687,7 +440,7 @@ int hw_breakpoint_handler(struct die_args *args)
 	 */
 	if (ptrace_bp) {
 		for (i = 0; i < nr_wp_slots(); i++) {
-			if (!hit[i])
+			if (!hit[i] || !is_ptrace_bp(bp[i]))
 				continue;
 			perf_bp_event(bp[i], regs);
 			bp[i] = NULL;
-- 
2.41.0