All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Pengfei Xu <pengfei.xu@intel.com>
Cc: peter.zijlstra@intel.com, linux-kernel@vger.kernel.org,
	heng.su@intel.com, Marco Elver <elver@google.com>,
	Mark Rutland <mark.rutland@arm.com>
Subject: Re: [Syzkaller & bisect] There is "__perf_event_overflow" WARNING in v6.1-rc5 kernel in guest
Date: Thu, 17 Nov 2022 19:11:37 +0100	[thread overview]
Message-ID: <Y3Z5WTk+cvHSt0lf@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <Y3RbiRmAKrDlVCxC@xpf.sh.intel.com>

On Wed, Nov 16, 2022 at 11:39:53AM +0800, Pengfei Xu wrote:
> int main(void)
> {
>   syscall(__NR_mmap, 0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
>   syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul);
>   syscall(__NR_mmap, 0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
>   setup_sysctl();
> 
>   *(uint32_t*)0x20000200 = 0;
>   *(uint32_t*)0x20000204 = 0x80;
>   *(uint8_t*)0x20000208 = 0;
>   *(uint8_t*)0x20000209 = 0;
>   *(uint8_t*)0x2000020a = 0;
>   *(uint8_t*)0x2000020b = 0;
>   *(uint32_t*)0x2000020c = 0;
>   *(uint64_t*)0x20000210 = 8;
>   *(uint64_t*)0x20000218 = 0;
>   *(uint64_t*)0x20000220 = 0;
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 0, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 1, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 2, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 3, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 4, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 1, 5, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 6, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 7, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 8, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 9, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 1, 10, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 11, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 12, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 13, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 14, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 15, 2);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 17, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 18, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 19, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 20, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 21, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 22, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 23, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 24, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 25, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 26, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 27, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 28, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 29, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 30, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 31, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 1, 32, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 1, 33, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 34, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 35, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 1, 36, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 1, 37, 1);
>   STORE_BY_BITMASK(uint64_t, , 0x20000228, 0, 38, 26);
>   *(uint32_t*)0x20000230 = 0;
>   *(uint32_t*)0x20000234 = 0;
>   *(uint64_t*)0x20000238 = 0;
>   *(uint64_t*)0x20000240 = 0;
>   *(uint64_t*)0x20000248 = 0;
>   *(uint64_t*)0x20000250 = 0;
>   *(uint32_t*)0x20000258 = 0;
>   *(uint32_t*)0x2000025c = 0;
>   *(uint64_t*)0x20000260 = 0;
>   *(uint32_t*)0x20000268 = 0;
>   *(uint16_t*)0x2000026c = 0;
>   *(uint16_t*)0x2000026e = 0;
>   *(uint32_t*)0x20000270 = 0;
>   *(uint32_t*)0x20000274 = 0;
>   *(uint64_t*)0x20000278 = 0xd62;
>   syscall(__NR_perf_event_open, 0x20000200ul, 0, 0ul, -1, 3ul);
>   return 0;
> }

Putting that next to 'pahole --hex -C perf_event_attr' seems to suggest
this is HW_CPU_CYCLES with a sampling on and exclude_kernel and sigtrap
set.

Now, for hardware events like this, you'd expect the PMU OS filter to
respect exclude_kernel (specifically not set ARCH_PERFMON_EVENTSEL_OS in
the relevant MSR), except there's a bunch of erratas and skid related
'funnies' that mean a user event can trigger across the kernel boundary.

Does the below patch help -- it should filter out those sort of things.

(still also including that previous patch)

---
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4ec3717003d5..cc86bf25f0c3 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9273,6 +9273,21 @@ int perf_event_account_interrupt(struct perf_event *event)
 	return __perf_event_account_interrupt(event, 1);
 }
 
+static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
+{
+	/*
+	 * Due to interrupt latency (AKA "skid"), we may enter the
+	 * kernel before taking an overflow, even if the PMU is only
+	 * counting user events.
+	 * To avoid leaking information to userspace, we must always
+	 * reject kernel samples when exclude_kernel is set.
+	 */
+	if (event->attr.exclude_kernel && !user_mode(regs))
+		return false;
+
+	return true;
+}
+
 /*
  * Generic event overflow handling, sampling.
  */
@@ -9305,15 +9320,28 @@ static int __perf_event_overflow(struct perf_event *event,
 		perf_event_disable_inatomic(event);
 	}
 
-	if (event->attr.sigtrap) {
-		/*
-		 * Should not be able to return to user space without processing
-		 * pending_sigtrap (kernel events can overflow multiple times).
-		 */
-		WARN_ON_ONCE(event->pending_sigtrap && event->attr.exclude_kernel);
+	if (event->attr.sigtrap && sample_is_allowed(event, regs)) {
+		unsigned int pending_id = 1;
+
+		if (regs)
+			pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1;
 		if (!event->pending_sigtrap) {
-			event->pending_sigtrap = 1;
+			event->pending_sigtrap = pending_id;
 			local_inc(&event->ctx->nr_pending);
+		} else if (event->attr.exclude_kernel) {
+			/*
+			 * Should not be able to return to user space without
+			 * consuming pending_sigtrap; with exceptions:
+			 *
+			 *  1. Where !exclude_kernel, events can overflow again
+			 *     in the kernel without returning to user space.
+			 *
+			 *  2. Events that can overflow again before the IRQ-
+			 *     work without user space progress (e.g. hrtimer).
+			 *     To approximate progress (with false negatives),
+			 *     check 32-bit hash of the current IP.
+			 */
+			WARN_ON_ONCE(event->pending_sigtrap != pending_id);
 		}
 		event->pending_addr = data->addr;
 		irq_work_queue(&event->pending_irq);

  parent reply	other threads:[~2022-11-17 18:12 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-16  3:39 [Syzkaller & bisect] There is "__perf_event_overflow" WARNING in v6.1-rc5 kernel in guest Pengfei Xu
2022-11-16  3:45 ` Pengfei Xu
2022-11-16 14:40 ` Peter Zijlstra
2022-11-17  1:37   ` Pengfei Xu
2022-11-17 18:11 ` Peter Zijlstra [this message]
2022-11-19  2:45   ` Pengfei Xu
2022-11-23 15:05     ` Peter Zijlstra
2022-11-23 15:06       ` Peter Zijlstra
2022-11-23 15:26       ` Pengfei Xu
2022-11-24  1:40         ` Pengfei Xu
2022-11-24  8:31       ` Marco Elver
2022-11-24  9:00         ` Peter Zijlstra
2022-11-24 10:34           ` Peter Zijlstra
2022-11-24  9:43     ` [tip: perf/urgent] perf: Consider OS filter fail tip-bot2 for Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y3Z5WTk+cvHSt0lf@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=elver@google.com \
    --cc=heng.su@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=pengfei.xu@intel.com \
    --cc=peter.zijlstra@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.