All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ashok Raj <ashok.raj@intel.com>
To: bp@suse.de
Cc: Ashok Raj <ashok.raj@intel.com>, X86 ML <x86@kernel.org>,
	LKML <linux-kernel@vger.kernel.org>,
	Tom Lendacky <thomas.lendacky@amd.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@kernel.org>, Tony Luck <tony.luck@intel.com>,
	Andi Kleen <andi.kleen@intel.com>,
	Arjan Van De Ven <arjan.van.de.ven@intel.com>
Subject: [PATCH 3/3] x86/microcode: Quiesce all threads before a microcode update.
Date: Wed, 21 Feb 2018 08:49:44 -0800	[thread overview]
Message-ID: <1519231784-9941-4-git-send-email-ashok.raj@intel.com> (raw)
In-Reply-To: <1519231784-9941-1-git-send-email-ashok.raj@intel.com>

Microcode updates during OS load always assumed the other hyperthread
was "quiet", but Linux never really did this. We've recently received
several issues on this, where things did not go well at scale
deployments, and the Intel microcode team asked us to make sure the
system is in a quiet state during these updates. Such updates are
rare events, so we use stop_machine() to ensure the whole system is
quiet.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Cc: X86 ML <x86@kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Andi Kleen <andi.kleen@intel.com>
Cc: Boris Petkov <bp@suse.de>
Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
---
 arch/x86/kernel/cpu/microcode/core.c  | 113 +++++++++++++++++++++++++++++-----
 arch/x86/kernel/cpu/microcode/intel.c |   1 +
 2 files changed, 98 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index aa1b9a4..af0aeb2 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -31,6 +31,9 @@
 #include <linux/cpu.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/nmi.h>
+#include <linux/stop_machine.h>
+#include <linux/delay.h>
 
 #include <asm/microcode_intel.h>
 #include <asm/cpu_device_id.h>
@@ -489,19 +492,82 @@ static void __exit microcode_dev_exit(void)
 /* fake device for request_firmware */
 static struct platform_device	*microcode_pdev;
 
-static enum ucode_state reload_for_cpu(int cpu)
+static struct ucode_update_param {
+	spinlock_t ucode_lock;
+	atomic_t   count;
+	atomic_t   errors;
+	atomic_t   enter;
+	int	   timeout;
+} uc_data;
+
+static void do_ucode_update(int cpu, struct ucode_update_param *ucd)
 {
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	enum ucode_state ustate;
+	enum ucode_state retval = 0;
 
-	if (!uci->valid)
-		return UCODE_OK;
+	spin_lock(&ucd->ucode_lock);
+	retval = microcode_ops->apply_microcode(cpu);
+	spin_unlock(&ucd->ucode_lock);
+	if (retval > UCODE_NFOUND) {
+		atomic_inc(&ucd->errors);
+		pr_warn("microcode update to cpu %d failed\n", cpu);
+	}
+	atomic_inc(&ucd->count);
+}
+
+/*
+ * Wait for upto 1sec for all cpus
+ * to show up in the rendezvous function
+ */
+#define MAX_UCODE_RENDEZVOUS	1000000000 /* nanosec */
+#define SPINUNIT		100	   /* 100ns */
+
+/*
+ * Each cpu waits for 1sec max.
+ */
+static int ucode_wait_timedout(int *time_out, void *data)
+{
+	struct ucode_update_param *ucd = data;
+	if (*time_out < SPINUNIT) {
+		pr_err("Not all cpus entered ucode update handler %d cpus missing\n",
+			(num_online_cpus() - atomic_read(&ucd->enter)));
+		return 1;
+	}
+	*time_out -= SPINUNIT;
+	touch_nmi_watchdog();
+	return 0;
+}
+
+/*
+ * All cpus enter here before a ucode load upto 1 sec.
+ * If not all cpus showed up, we abort the ucode update
+ * and return. ucode update is serialized with the spinlock
+ */
+static int ucode_load_rendezvous(void *data)
+{
+	int cpu = smp_processor_id();
+	struct ucode_update_param *ucd = data;
+	int timeout = MAX_UCODE_RENDEZVOUS;
+	int total_cpus = num_online_cpus();
 
-	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
-	if (ustate != UCODE_OK)
-		return ustate;
+	/*
+	 * Wait for all cpu's to arrive
+	 */
+	atomic_dec(&ucd->enter);
+	while(atomic_read(&ucd->enter)) {
+		if (ucode_wait_timedout(&timeout, ucd))
+			return 1;
+		ndelay(SPINUNIT);
+	}
+
+	do_ucode_update(cpu, ucd);
 
-	return apply_microcode_on_target(cpu);
+	/*
+	 * Wait for all cpu's to complete
+	 * ucode update
+	 */
+	while (atomic_read(&ucd->count) != total_cpus)
+		cpu_relax();
+	return 0;
 }
 
 static ssize_t reload_store(struct device *dev,
@@ -509,7 +575,6 @@ static ssize_t reload_store(struct device *dev,
 			    const char *buf, size_t size)
 {
 	enum ucode_state tmp_ret = UCODE_OK;
-	bool do_callback = false;
 	unsigned long val;
 	ssize_t ret = 0;
 	int cpu;
@@ -523,21 +588,37 @@ static ssize_t reload_store(struct device *dev,
 
 	get_online_cpus();
 	mutex_lock(&microcode_mutex);
+	/*
+	 * First load the microcode file for all cpu's
+	 */
 	for_each_online_cpu(cpu) {
-		tmp_ret = reload_for_cpu(cpu);
+		tmp_ret = microcode_ops->request_microcode_fw(cpu,
+				&microcode_pdev->dev, true);
 		if (tmp_ret > UCODE_NFOUND) {
-			pr_warn("Error reloading microcode on CPU %d\n", cpu);
+			pr_warn("Error reloading microcode file for CPU %d\n", cpu);
 
 			/* set retval for the first encountered reload error */
 			if (!ret)
 				ret = -EINVAL;
 		}
-
-		if (tmp_ret == UCODE_UPDATED)
-			do_callback = true;
 	}
+	pr_debug("Done loading microcode file for all cpus\n");
 
-	if (!ret && do_callback)
+	memset(&uc_data, 0, sizeof(struct ucode_update_param));
+	spin_lock_init(&uc_data.ucode_lock);
+	atomic_set(&uc_data.enter, num_online_cpus());
+	/*
+	 * Wait for a 1 sec
+	 */
+	uc_data.timeout = USEC_PER_SEC;
+	stop_machine(ucode_load_rendezvous, &uc_data, cpu_online_mask);
+
+	pr_debug("Total CPUS = %d uperrors = %d\n",
+		atomic_read(&uc_data.count), atomic_read(&uc_data.errors));
+
+	if (atomic_read(&uc_data.errors))
+		pr_warn("Update failed for %d cpus\n", atomic_read(&uc_data.errors));
+	else
 		microcode_check();
 
 	mutex_unlock(&microcode_mutex);
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 5d32724..0c55be6 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -809,6 +809,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
 	wbinvd();
 	/* write microcode via MSR 0x79 */
 	wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
+	pr_debug("ucode loading for cpu %d done\n", cpu);
 
 	rev = intel_get_microcode_revision();
 
-- 
2.7.4

  parent reply	other threads:[~2018-02-21 16:50 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-21 16:49 [PATCH 0/3] Patch series to address some limitations in OS microcode loading Ashok Raj
2018-02-21 16:49 ` [PATCH 1/3] x86/microcode/intel: Check microcode revision before updating sibling threads Ashok Raj
2018-02-21 16:49 ` [PATCH 2/3] x86/microcode/intel: Perform a cache flush before ucode update Ashok Raj
2018-02-21 18:25   ` Borislav Petkov
2018-02-21 16:49 ` Ashok Raj [this message]
2018-02-21 19:06   ` [PATCH 3/3] x86/microcode: Quiesce all threads before a microcode update Borislav Petkov
2018-02-21 20:13     ` Raj, Ashok
2018-02-21 21:15       ` Borislav Petkov
2018-02-22 15:36       ` Tom Lendacky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1519231784-9941-4-git-send-email-ashok.raj@intel.com \
    --to=ashok.raj@intel.com \
    --cc=andi.kleen@intel.com \
    --cc=arjan.van.de.ven@intel.com \
    --cc=bp@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.