linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86/AMD: Apply Erratum 688 fix when BIOS doesn't
@ 2017-10-22 10:47 Borislav Petkov
  2017-10-22 11:04 ` Ingo Molnar
  2017-10-22 12:16 ` [tip:x86/urgent] x86/cpu/AMD: Apply the Erratum 688 fix when the " tip-bot for Borislav Petkov
  0 siblings, 2 replies; 7+ messages in thread
From: Borislav Petkov @ 2017-10-22 10:47 UTC (permalink / raw)
  To: X86 ML; +Cc: Sherry Hurwitz, Yazen Ghannam, mirh, LKML

From: Borislav Petkov <bp@suse.de>

Some F14h machines have an erratum which, "under a highly specific
and detailed set of internal timing conditions" can lead to skipping
instructions and rIP corruption. Add the fix for those machines when
their BIOS doesn't apply it or there simply isn't BIOS update for them.

Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: <mirh@protonmail.ch>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=197285
Cc: Sherry Hurwitz <sherry.hurwitz@amd.com>
Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
Cc: <stable@vger.kernel.org>
---
 arch/x86/kernel/amd_nb.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 458da8509b75..7ad1dfc8f40e 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -27,6 +27,8 @@ static const struct pci_device_id amd_root_ids[] = {
 	{}
 };
 
+#define PCI_DEVICE_ID_AMD_CNB17H_F4     0x1704
+
 const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
@@ -37,6 +39,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
 	{}
 };
 EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
@@ -48,6 +51,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
 	{}
 };
 
@@ -402,11 +406,46 @@ void amd_flush_garts(void)
 }
 EXPORT_SYMBOL_GPL(amd_flush_garts);
 
+static void __fix_erratum_688(void *info)
+{
+#define MSR_AMD64_IC_CFG 0xC0011021
+
+	msr_set_bit(MSR_AMD64_IC_CFG, 3);
+	msr_set_bit(MSR_AMD64_IC_CFG, 14);
+}
+
+/* Apply erratum 688 fix so machines without a BIOS fix work. */
+static __init void fix_erratum_688(void)
+{
+	struct pci_dev *F4;
+	u32 val;
+
+	if (boot_cpu_data.x86 != 0x14)
+		return;
+
+	if (!amd_northbridges.num)
+		return;
+
+	F4 = node_to_amd_nb(0)->link;
+	if (!F4)
+		return;
+
+	if (pci_read_config_dword(F4, 0x164, &val))
+		return;
+
+	if (val & BIT(2))
+		return;
+
+	on_each_cpu(__fix_erratum_688, NULL, 0);
+}
+
 static __init int init_amd_nbs(void)
 {
 	amd_cache_northbridges();
 	amd_cache_gart();
 
+	fix_erratum_688();
+
 	return 0;
 }
 
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86/AMD: Apply Erratum 688 fix when BIOS doesn't
  2017-10-22 10:47 [PATCH] x86/AMD: Apply Erratum 688 fix when BIOS doesn't Borislav Petkov
@ 2017-10-22 11:04 ` Ingo Molnar
  2017-10-22 11:12   ` Borislav Petkov
  2017-10-22 12:16 ` [tip:x86/urgent] x86/cpu/AMD: Apply the Erratum 688 fix when the " tip-bot for Borislav Petkov
  1 sibling, 1 reply; 7+ messages in thread
From: Ingo Molnar @ 2017-10-22 11:04 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: X86 ML, Sherry Hurwitz, Yazen Ghannam, mirh, LKML


* Borislav Petkov <bp@alien8.de> wrote:

> From: Borislav Petkov <bp@suse.de>
> 
> Some F14h machines have an erratum which, "under a highly specific
> and detailed set of internal timing conditions" can lead to skipping
> instructions and rIP corruption. Add the fix for those machines when
> their BIOS doesn't apply it or there simply isn't BIOS update for them.
> 
> Signed-off-by: Borislav Petkov <bp@suse.de>
> Tested-by: <mirh@protonmail.ch>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=197285
> Cc: Sherry Hurwitz <sherry.hurwitz@amd.com>
> Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
> Cc: <stable@vger.kernel.org>
> ---
>  arch/x86/kernel/amd_nb.c | 39 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 39 insertions(+)
> 
> diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
> index 458da8509b75..7ad1dfc8f40e 100644
> --- a/arch/x86/kernel/amd_nb.c
> +++ b/arch/x86/kernel/amd_nb.c
> @@ -27,6 +27,8 @@ static const struct pci_device_id amd_root_ids[] = {
>  	{}
>  };
>  
> +#define PCI_DEVICE_ID_AMD_CNB17H_F4     0x1704
> +
>  const struct pci_device_id amd_nb_misc_ids[] = {
>  	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
>  	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
> @@ -37,6 +39,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
>  	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
>  	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
>  	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
> +	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
>  	{}
>  };
>  EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
> @@ -48,6 +51,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
>  	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
>  	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
>  	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
> +	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
>  	{}
>  };
>  
> @@ -402,11 +406,46 @@ void amd_flush_garts(void)
>  }
>  EXPORT_SYMBOL_GPL(amd_flush_garts);
>  
> +static void __fix_erratum_688(void *info)
> +{
> +#define MSR_AMD64_IC_CFG 0xC0011021
> +
> +	msr_set_bit(MSR_AMD64_IC_CFG, 3);
> +	msr_set_bit(MSR_AMD64_IC_CFG, 14);
> +}
> +
> +/* Apply erratum 688 fix so machines without a BIOS fix work. */
> +static __init void fix_erratum_688(void)
> +{
> +	struct pci_dev *F4;
> +	u32 val;
> +
> +	if (boot_cpu_data.x86 != 0x14)
> +		return;
> +
> +	if (!amd_northbridges.num)
> +		return;
> +
> +	F4 = node_to_amd_nb(0)->link;
> +	if (!F4)
> +		return;
> +
> +	if (pci_read_config_dword(F4, 0x164, &val))
> +		return;
> +
> +	if (val & BIT(2))
> +		return;
> +
> +	on_each_cpu(__fix_erratum_688, NULL, 0);

Any objections to me adding a printk message that we applied a fix?

	pr_info("x86/cpu/AMD: CPU erratum 688 worked around\n");

or so?

That would also create some pressure for customers to prod manufacturers to prod 
BIOS makers to fix the erratum in a BIOS update or so.

Plus, in the unlikely event that the erratum was not applied due to some other 
erratum, or the erratum was mis-documented, we'd eventually discover that as well.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86/AMD: Apply Erratum 688 fix when BIOS doesn't
  2017-10-22 11:04 ` Ingo Molnar
@ 2017-10-22 11:12   ` Borislav Petkov
  2017-10-22 12:14     ` Ingo Molnar
  0 siblings, 1 reply; 7+ messages in thread
From: Borislav Petkov @ 2017-10-22 11:12 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: X86 ML, Sherry Hurwitz, Yazen Ghannam, mirh, LKML

On Sun, Oct 22, 2017 at 01:04:38PM +0200, Ingo Molnar wrote:
> Any objections to me adding a printk message that we applied a fix?
> 
> 	pr_info("x86/cpu/AMD: CPU erratum 688 worked around\n");
> 
> or so?
> 
> That would also create some pressure for customers to prod manufacturers to prod 
> BIOS makers to fix the erratum in a BIOS update or so.

Adding that would be purely useless because F14h is long out of
production AFAIK. Especially those earlier models.

So getting new BIOS for those is less likely than winning the lottery.
IOW, this is more or less, an after-the-fact fix for the remaining
machines out there.

Note that I didn't even use a bug flag because it would simply be a
waste.

Frankly, this fix could simply be a userspace script setting those two
MSR bits but then communicating it to people would be a nightmare, thus
the kernel fix.

Thx.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86/AMD: Apply Erratum 688 fix when BIOS doesn't
  2017-10-22 11:12   ` Borislav Petkov
@ 2017-10-22 12:14     ` Ingo Molnar
  0 siblings, 0 replies; 7+ messages in thread
From: Ingo Molnar @ 2017-10-22 12:14 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: X86 ML, Sherry Hurwitz, Yazen Ghannam, mirh, LKML


* Borislav Petkov <bp@alien8.de> wrote:

> On Sun, Oct 22, 2017 at 01:04:38PM +0200, Ingo Molnar wrote:
> > Any objections to me adding a printk message that we applied a fix?
> > 
> > 	pr_info("x86/cpu/AMD: CPU erratum 688 worked around\n");
> > 
> > or so?
> > 
> > That would also create some pressure for customers to prod manufacturers to prod 
> > BIOS makers to fix the erratum in a BIOS update or so.
> 
> Adding that would be purely useless because F14h is long out of
> production AFAIK. Especially those earlier models.

I've still added it, just out of principle - every time the kernel modifies low 
level state in a rare fashion we should advertise it:

	pr_info("x86/cpu/AMD: CPU erratum 688 worked around\n");

if it doesn't matter (which is likely as you say) then the message won't matter 
really.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tip:x86/urgent] x86/cpu/AMD: Apply the Erratum 688 fix when the BIOS doesn't
  2017-10-22 10:47 [PATCH] x86/AMD: Apply Erratum 688 fix when BIOS doesn't Borislav Petkov
  2017-10-22 11:04 ` Ingo Molnar
@ 2017-10-22 12:16 ` tip-bot for Borislav Petkov
  2017-10-22 13:04   ` Peter Zijlstra
  1 sibling, 1 reply; 7+ messages in thread
From: tip-bot for Borislav Petkov @ 2017-10-22 12:16 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: hpa, peterz, torvalds, tglx, mingo, Yazen.Ghannam, mirh,
	sherry.hurwitz, stable, bp, linux-kernel

Commit-ID:  bfc1168de949cd3e9ca18c3480b5085deff1ea7c
Gitweb:     https://git.kernel.org/tip/bfc1168de949cd3e9ca18c3480b5085deff1ea7c
Author:     Borislav Petkov <bp@suse.de>
AuthorDate: Sun, 22 Oct 2017 12:47:31 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sun, 22 Oct 2017 13:06:02 +0200

x86/cpu/AMD: Apply the Erratum 688 fix when the BIOS doesn't

Some F14h machines have an erratum which, "under a highly specific
and detailed set of internal timing conditions" can lead to skipping
instructions and RIP corruption.

Add the fix for those machines when their BIOS doesn't apply it or
there simply isn't BIOS update for them.

Tested-by: <mirh@protonmail.ch>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sherry Hurwitz <sherry.hurwitz@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
Link: http://lkml.kernel.org/r/20171022104731.28249-1-bp@alien8.de
Link: https://bugzilla.kernel.org/show_bug.cgi?id=197285
[ Added pr_info() that we activated the workaround. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/amd_nb.c | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 458da85..6db28f1 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -27,6 +27,8 @@ static const struct pci_device_id amd_root_ids[] = {
 	{}
 };
 
+#define PCI_DEVICE_ID_AMD_CNB17H_F4     0x1704
+
 const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
@@ -37,6 +39,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
 	{}
 };
 EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
@@ -48,6 +51,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
 	{}
 };
 
@@ -402,11 +406,48 @@ void amd_flush_garts(void)
 }
 EXPORT_SYMBOL_GPL(amd_flush_garts);
 
+static void __fix_erratum_688(void *info)
+{
+#define MSR_AMD64_IC_CFG 0xC0011021
+
+	msr_set_bit(MSR_AMD64_IC_CFG, 3);
+	msr_set_bit(MSR_AMD64_IC_CFG, 14);
+}
+
+/* Apply erratum 688 fix so machines without a BIOS fix work. */
+static __init void fix_erratum_688(void)
+{
+	struct pci_dev *F4;
+	u32 val;
+
+	if (boot_cpu_data.x86 != 0x14)
+		return;
+
+	if (!amd_northbridges.num)
+		return;
+
+	F4 = node_to_amd_nb(0)->link;
+	if (!F4)
+		return;
+
+	if (pci_read_config_dword(F4, 0x164, &val))
+		return;
+
+	if (val & BIT(2))
+		return;
+
+	on_each_cpu(__fix_erratum_688, NULL, 0);
+
+	pr_info("x86/cpu/AMD: CPU erratum 688 worked around\n");
+}
+
 static __init int init_amd_nbs(void)
 {
 	amd_cache_northbridges();
 	amd_cache_gart();
 
+	fix_erratum_688();
+
 	return 0;
 }
 

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [tip:x86/urgent] x86/cpu/AMD: Apply the Erratum 688 fix when the BIOS doesn't
  2017-10-22 12:16 ` [tip:x86/urgent] x86/cpu/AMD: Apply the Erratum 688 fix when the " tip-bot for Borislav Petkov
@ 2017-10-22 13:04   ` Peter Zijlstra
  2017-10-22 14:01     ` Boris Petkov
  0 siblings, 1 reply; 7+ messages in thread
From: Peter Zijlstra @ 2017-10-22 13:04 UTC (permalink / raw)
  To: hpa, torvalds, tglx, mingo, Yazen.Ghannam, mirh, sherry.hurwitz,
	bp, stable, linux-kernel
  Cc: linux-tip-commits

On Sun, Oct 22, 2017 at 05:16:29AM -0700, tip-bot for Borislav Petkov wrote:
> +static void __fix_erratum_688(void *info)
> +{
> +#define MSR_AMD64_IC_CFG 0xC0011021
> +
> +	msr_set_bit(MSR_AMD64_IC_CFG, 3);
> +	msr_set_bit(MSR_AMD64_IC_CFG, 14);

I realize this is an erratum work around, but would it be too much to
ask for a small comment explaining the magic values?

> +}
> +
> +/* Apply erratum 688 fix so machines without a BIOS fix work. */
> +static __init void fix_erratum_688(void)
> +{
> +	struct pci_dev *F4;
> +	u32 val;
> +
> +	if (boot_cpu_data.x86 != 0x14)
> +		return;
> +
> +	if (!amd_northbridges.num)
> +		return;
> +
> +	F4 = node_to_amd_nb(0)->link;
> +	if (!F4)
> +		return;
> +
> +	if (pci_read_config_dword(F4, 0x164, &val))
> +		return;
> +
> +	if (val & BIT(2))
> +		return;
> +
> +	on_each_cpu(__fix_erratum_688, NULL, 0);
> +
> +	pr_info("x86/cpu/AMD: CPU erratum 688 worked around\n");

Except for all CPUs that were not online at this point in time... So
suppose I boot with a limited number of CPUs and then later bring up the
rest, bad things happen.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [tip:x86/urgent] x86/cpu/AMD: Apply the Erratum 688 fix when the BIOS doesn't
  2017-10-22 13:04   ` Peter Zijlstra
@ 2017-10-22 14:01     ` Boris Petkov
  0 siblings, 0 replies; 7+ messages in thread
From: Boris Petkov @ 2017-10-22 14:01 UTC (permalink / raw)
  To: Peter Zijlstra, hpa, torvalds, tglx, mingo, Yazen.Ghannam, mirh,
	sherry.hurwitz, stable, linux-kernel
  Cc: linux-tip-commits

On October 22, 2017 3:04:29 PM GMT+02:00, Peter Zijlstra <peterz@infradead.org> wrote:
>I realize this is an erratum work around, but would it be too much to
>ask for a small comment explaining the magic values?

Revision guide doesn't state what those bits are. By the looks of it, they could be some sort of chicken bits turning off something.

>Except for all CPUs that were not online at this point in time... So
>suppose I boot with a limited number of CPUs and then later bring up
>the
>rest, bad things happen.

Well, those are small laptops and they're 1 or 2 CPUs tops and you really really really have to try to shoot yourself in the foot...


-- 
Sent from a small device: formatting sux and brevity is inevitable. 

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-10-22 14:59 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-22 10:47 [PATCH] x86/AMD: Apply Erratum 688 fix when BIOS doesn't Borislav Petkov
2017-10-22 11:04 ` Ingo Molnar
2017-10-22 11:12   ` Borislav Petkov
2017-10-22 12:14     ` Ingo Molnar
2017-10-22 12:16 ` [tip:x86/urgent] x86/cpu/AMD: Apply the Erratum 688 fix when the " tip-bot for Borislav Petkov
2017-10-22 13:04   ` Peter Zijlstra
2017-10-22 14:01     ` Boris Petkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).