Stable Archive on lore.kernel.org
 help / color / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Tom Lendacky <thomas.lendacky@amd.com>,
	"Peter Zijlstra (Intel)" <peterz@infradead.org>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Arnaldo Carvalho de Melo <acme@redhat.com>,
	Borislav Petkov <bp@alien8.de>, Jiri Olsa <jolsa@redhat.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Namhyung Kim <namhyung@kernel.org>,
	Stephane Eranian <eranian@google.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Vince Weaver <vincent.weaver@maine.edu>,
	Ingo Molnar <mingo@kernel.org>
Subject: [PATCH 4.14 63/69] x86/perf/amd: Resolve race condition when disabling PMC
Date: Mon, 15 Apr 2019 20:59:21 +0200
Message-ID: <20190415183735.786951968@linuxfoundation.org> (raw)
In-Reply-To: <20190415183726.036654568@linuxfoundation.org>

From: Lendacky, Thomas <Thomas.Lendacky@amd.com>

commit 914123fa39042e651d79eaf86bbf63a1b938dddf upstream.

On AMD processors, the detection of an overflowed counter in the NMI
handler relies on the current value of the counter. So, for example, to
check for overflow on a 48 bit counter, bit 47 is checked to see if it
is 1 (not overflowed) or 0 (overflowed).

There is currently a race condition present when disabling and then
updating the PMC. Increased NMI latency in newer AMD processors makes this
race condition more pronounced. If the counter value has overflowed, it is
possible to update the PMC value before the NMI handler can run. The
updated PMC value is not an overflowed value, so when the perf NMI handler
does run, it will not find an overflowed counter. This may appear as an
unknown NMI resulting in either a panic or a series of messages, depending
on how the kernel is configured.

To eliminate this race condition, the PMC value must be checked after
disabling the counter. Add an AMD function, amd_pmu_disable_all(), that
will wait for the NMI handler to reset any active and overflowed counter
after calling x86_pmu_disable_all().

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org> # 4.14.x-
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: https://lkml.kernel.org/r/Message-ID:
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 arch/x86/events/amd/core.c |   65 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 62 insertions(+), 3 deletions(-)

--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -3,6 +3,7 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/delay.h>
 #include <asm/apicdef.h>
 
 #include "../perf_event.h"
@@ -429,6 +430,64 @@ static void amd_pmu_cpu_dead(int cpu)
 	}
 }
 
+/*
+ * When a PMC counter overflows, an NMI is used to process the event and
+ * reset the counter. NMI latency can result in the counter being updated
+ * before the NMI can run, which can result in what appear to be spurious
+ * NMIs. This function is intended to wait for the NMI to run and reset
+ * the counter to avoid possible unhandled NMI messages.
+ */
+#define OVERFLOW_WAIT_COUNT	50
+
+static void amd_pmu_wait_on_overflow(int idx)
+{
+	unsigned int i;
+	u64 counter;
+
+	/*
+	 * Wait for the counter to be reset if it has overflowed. This loop
+	 * should exit very, very quickly, but just in case, don't wait
+	 * forever...
+	 */
+	for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
+		rdmsrl(x86_pmu_event_addr(idx), counter);
+		if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
+			break;
+
+		/* Might be in IRQ context, so can't sleep */
+		udelay(1);
+	}
+}
+
+static void amd_pmu_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int idx;
+
+	x86_pmu_disable_all();
+
+	/*
+	 * This shouldn't be called from NMI context, but add a safeguard here
+	 * to return, since if we're in NMI context we can't wait for an NMI
+	 * to reset an overflowed counter value.
+	 */
+	if (in_nmi())
+		return;
+
+	/*
+	 * Check each counter for overflow and wait for it to be reset by the
+	 * NMI if it has overflowed. This relies on the fact that all active
+	 * counters are always enabled when this function is caled and
+	 * ARCH_PERFMON_EVENTSEL_INT is always set.
+	 */
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		amd_pmu_wait_on_overflow(idx);
+	}
+}
+
 static struct event_constraint *
 amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 			  struct perf_event *event)
@@ -622,7 +681,7 @@ static ssize_t amd_event_sysfs_show(char
 static __initconst const struct x86_pmu amd_pmu = {
 	.name			= "AMD",
 	.handle_irq		= x86_pmu_handle_irq,
-	.disable_all		= x86_pmu_disable_all,
+	.disable_all		= amd_pmu_disable_all,
 	.enable_all		= x86_pmu_enable_all,
 	.enable			= x86_pmu_enable_event,
 	.disable		= x86_pmu_disable_event,
@@ -728,7 +787,7 @@ void amd_pmu_enable_virt(void)
 	cpuc->perf_ctr_virt_mask = 0;
 
 	/* Reload all events */
-	x86_pmu_disable_all();
+	amd_pmu_disable_all();
 	x86_pmu_enable_all(0);
 }
 EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
@@ -746,7 +805,7 @@ void amd_pmu_disable_virt(void)
 	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
 
 	/* Reload all events */
-	x86_pmu_disable_all();
+	amd_pmu_disable_all();
 	x86_pmu_enable_all(0);
 }
 EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);



  parent reply index

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-15 18:58 [PATCH 4.14 00/69] 4.14.112-stable review Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 01/69] net: sfp: move sfp_register_socket call from sfp_remove to sfp_probe Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 02/69] x86/power: Fix some ordering bugs in __restore_processor_context() Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 03/69] x86/power/64: Use struct desc_ptr for the IDT in struct saved_context Greg Kroah-Hartman
2019-04-15 20:07   ` Pavel Machek
2019-04-15 18:58 ` [PATCH 4.14 04/69] x86/power/32: Move SYSENTER MSR restoration to fix_processor_context() Greg Kroah-Hartman
2019-04-15 20:07   ` Pavel Machek
2019-04-15 22:07     ` Sasha Levin
2019-04-15 18:58 ` [PATCH 4.14 05/69] x86/power: Make restore_processor_context() sane Greg Kroah-Hartman
2019-04-15 20:04   ` Pavel Machek
2019-04-15 20:18     ` Andy Lutomirski
2019-04-16 11:56       ` Pavel Machek
2019-04-16 13:22         ` Sasha Levin
2019-04-15 18:58 ` [PATCH 4.14 06/69] drm/i915/gvt: do not let pin count of shadow mm go negative Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 07/69] powerpc/tm: Limit TM code inside PPC_TRANSACTIONAL_MEM Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 08/69] kbuild: clang: choose GCC_TOOLCHAIN_DIR not on LD Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 09/69] x86: vdso: Use $LD instead of $CC to link Greg Kroah-Hartman
2019-04-26 11:41   ` Rantala, Tommi T. (Nokia - FI/Espoo)
2019-04-26 12:48     ` Nathan Chancellor
2019-04-26 13:23       ` Rantala, Tommi T. (Nokia - FI/Espoo)
2019-04-26 13:34         ` Nathan Chancellor
2019-04-27 13:54           ` gregkh
2019-04-15 18:58 ` [PATCH 4.14 10/69] x86/vdso: Drop implicit common-page-size linker flag Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 11/69] lib/string.c: implement a basic bcmp Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 12/69] stating: ccree: revert "staging: ccree: fix leak of import() after init()" Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 13/69] arm64: kaslr: Reserve size of ARM64_MEMSTART_ALIGN in linear region Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 14/69] tty: mark Siemens R3964 line discipline as BROKEN Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 15/69] tty: ldisc: add sysctl to prevent autoloading of ldiscs Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 16/69] ipv6: Fix dangling pointer when ipv6 fragment Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 17/69] ipv6: sit: reset ip header pointer in ipip6_rcv Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 18/69] kcm: switch order of device registration to fix a crash Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 19/69] net-gro: Fix GRO flush when receiving a GSO packet Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 20/69] net/mlx5: Decrease default mr cache size Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 21/69] net: rds: force to destroy connection if t_sock is NULL in rds_tcp_kill_sock() Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 22/69] net/sched: fix ->get helper of the matchall cls Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 23/69] openvswitch: fix flow actions reallocation Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 24/69] qmi_wwan: add Olicard 600 Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 25/69] sctp: initialize _pad of sockaddr_in before copying to user memory Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 26/69] tcp: Ensure DCTCP reacts to losses Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 27/69] vrf: check accept_source_route on the original netdevice Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 28/69] net/mlx5e: Fix error handling when refreshing TIRs Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 29/69] net/mlx5e: Add a lock on tir list Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 30/69] nfp: validate the return code from dev_queue_xmit() Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 31/69] bnxt_en: Improve RX consumer index validity check Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 32/69] bnxt_en: Reset device on RX buffer errors Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 33/69] net/sched: act_sample: fix divide by zero in the traffic path Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 34/69] netns: provide pure entropy for net_hash_mix() Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 35/69] net: ethtool: not call vzalloc for zero sized memory request Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 36/69] ALSA: seq: Fix OOB-reads from strlcpy Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 37/69] ip6_tunnel: Match to ARPHRD_TUNNEL6 for dev type Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 38/69] hv_netvsc: Fix unwanted wakeup after tx_disable Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 39/69] arm64: dts: rockchip: fix rk3328 sdmmc0 write errors Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 40/69] parisc: Detect QEMU earlier in boot process Greg Kroah-Hartman
2019-04-15 18:58 ` [PATCH 4.14 41/69] parisc: regs_return_value() should return gpr28 Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 42/69] alarmtimer: Return correct remaining time Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 43/69] drm/udl: add a release method and delay modeset teardown Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 44/69] include/linux/bitrev.h: fix constant bitrev Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 45/69] ASoC: fsl_esai: fix channel swap issue when stream starts Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 46/69] Btrfs: do not allow trimming when a fs is mounted with the nologreplay option Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 47/69] btrfs: prop: fix zstd compression parameter validation Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 48/69] btrfs: prop: fix vanished compression property after failed set Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 49/69] block: do not leak memory in bio_copy_user_iov() Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 50/69] block: fix the return errno for direct IO Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 51/69] genirq: Respect IRQCHIP_SKIP_SET_WAKE in irq_chip_set_wake_parent() Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 52/69] genirq: Initialize request_mutex if CONFIG_SPARSE_IRQ=n Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 53/69] virtio: Honour may_reduce_num in vring_create_virtqueue Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 54/69] ARM: dts: am335x-evmsk: Correct the regulators for the audio codec Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 55/69] ARM: dts: am335x-evm: " Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 56/69] ARM: dts: at91: Fix typo in ISC_D0 on PC9 Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 57/69] arm64: futex: Fix FUTEX_WAKE_OP atomic ops with non-zero result value Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 58/69] arm64: dts: rockchip: fix rk3328 rgmii high tx error rate Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 59/69] arm64: backtrace: Dont bother trying to unwind the userspace stack Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 60/69] xen: Prevent buffer overflow in privcmd ioctl Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 61/69] sched/fair: Do not re-read ->h_load_next during hierarchical load calculation Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 62/69] xtensa: fix return_address Greg Kroah-Hartman
2019-04-15 18:59 ` Greg Kroah-Hartman [this message]
2019-04-15 18:59 ` [PATCH 4.14 64/69] x86/perf/amd: Resolve NMI latency issues for active PMCs Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 65/69] x86/perf/amd: Remove need to check "running" bit in NMI handler Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 66/69] PCI: Add function 1 DMA alias quirk for Marvell 9170 SATA controller Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 67/69] dm table: propagate BDI_CAP_STABLE_WRITES to fix sporadic checksum errors Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 68/69] arm64: dts: rockchip: fix vcc_host1_5v pin assign on rk3328-rock64 Greg Kroah-Hartman
2019-04-15 18:59 ` [PATCH 4.14 69/69] arm64: dts: rockchip: Fix vcc_host1_5v GPIO polarity " Greg Kroah-Hartman
2019-04-16  0:04 ` [PATCH 4.14 00/69] 4.14.112-stable review kernelci.org bot
2019-04-16  4:29 ` Naresh Kamboju
2019-04-16 10:33 ` Jon Hunter
2019-04-16 16:30 ` Guenter Roeck
2019-04-16 21:40 ` shuah
2019-04-16 22:17 ` Bharath Vedartham

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190415183735.786951968@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=acme@kernel.org \
    --cc=acme@redhat.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=eranian@google.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    --cc=torvalds@linux-foundation.org \
    --cc=vincent.weaver@maine.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Stable Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/stable/0 stable/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 stable stable/ https://lore.kernel.org/stable \
		stable@vger.kernel.org
	public-inbox-index stable

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.stable


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git