* [PATCH v2 1/2] x86/AMD: collect checking for bugs in a single function
2022-02-24 13:23 [PATCH v2 0/2] x86: correct fencing around CLFLUSH (+some tidying) Jan Beulich
@ 2022-02-24 13:24 ` Jan Beulich
2022-02-24 13:24 ` [PATCH v2 2/2] x86: correct fencing around CLFLUSH Jan Beulich
2022-04-07 6:57 ` Ping: [PATCH v2 0/2] x86: correct fencing around CLFLUSH (+some tidying) Jan Beulich
2 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2022-02-24 13:24 UTC (permalink / raw)
To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné
Besides keeping things centralized and reducing (by folding) a few
conditionals, this also allows this helper to be put in .init.text.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: New.
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -744,6 +744,23 @@ void __init detect_zen2_null_seg_behavio
}
+static void __init noinline detect_bugs(const struct cpuinfo_x86 *c)
+{
+ /*
+ * Older AMD CPUs don't save/load FOP/FIP/FDP unless an FPU exception
+ * is pending. Xen works around this at (F)XRSTOR time.
+ */
+ if (!cpu_has(c, X86_FEATURE_RSTR_FP_ERR_PTRS))
+ setup_force_cpu_cap(X86_BUG_FPU_PTRS);
+
+ /*
+ * AMD CPUs before Zen2 don't clear segment bases/limits when loading
+ * a NULL selector.
+ */
+ if (!cpu_has_nscb)
+ setup_force_cpu_cap(X86_BUG_NULL_SEG);
+}
+
static void cf_check init_amd(struct cpuinfo_x86 *c)
{
u32 l, h;
@@ -781,13 +805,6 @@ static void init_amd(struct cpuinfo_x86
wrmsr_amd_safe(0xc001100d, l, h & ~1);
}
- /*
- * Older AMD CPUs don't save/load FOP/FIP/FDP unless an FPU exception
- * is pending. Xen works around this at (F)XRSTOR time.
- */
- if (c == &boot_cpu_data && !cpu_has(c, X86_FEATURE_RSTR_FP_ERR_PTRS))
- setup_force_cpu_cap(X86_BUG_FPU_PTRS);
-
if (c->x86 == 0x0f || c->x86 == 0x11)
/* Always dispatch serialising on this hardare. */
__set_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability);
@@ -796,17 +813,13 @@ static void init_amd(struct cpuinfo_x86
amd_init_ssbd(c);
- /* Probe for NSCB on Zen2 CPUs when not virtualised */
- if (!cpu_has_hypervisor && !cpu_has_nscb && c == &boot_cpu_data &&
- c->x86 == 0x17)
- detect_zen2_null_seg_behaviour();
+ if (c == &boot_cpu_data) {
+ /* Probe for NSCB on Zen2 CPUs when not virtualised */
+ if (!cpu_has_hypervisor && !cpu_has_nscb && c->x86 == 0x17)
+ detect_zen2_null_seg_behaviour();
- /*
- * AMD CPUs before Zen2 don't clear segment bases/limits when loading
- * a NULL selector.
- */
- if (c == &boot_cpu_data && !cpu_has_nscb)
- setup_force_cpu_cap(X86_BUG_NULL_SEG);
+ detect_bugs(c);
+ }
/* MFENCE stops RDTSC speculation */
if (!cpu_has_lfence_dispatch)
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v2 2/2] x86: correct fencing around CLFLUSH
2022-02-24 13:23 [PATCH v2 0/2] x86: correct fencing around CLFLUSH (+some tidying) Jan Beulich
2022-02-24 13:24 ` [PATCH v2 1/2] x86/AMD: collect checking for bugs in a single function Jan Beulich
@ 2022-02-24 13:24 ` Jan Beulich
2022-04-07 6:57 ` Ping: [PATCH v2 0/2] x86: correct fencing around CLFLUSH (+some tidying) Jan Beulich
2 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2022-02-24 13:24 UTC (permalink / raw)
To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné
As noted in the context of 3330013e6739 ("VT-d / x86: re-arrange cache
syncing"): While cache_writeback() has the SFENCE on the correct side of
CLFLUSHOPT, flush_area_local() doesn't. While I can't prove it due to
lacking a copy of the old SDM version, I can only assume this placement
was a result of what had been described there originally. In any event
recent versions of the SDM hve been telling us otherwise.
For AMD the situation is more complex: MFENCE is needed ahead and/or
after CLFLUSH when the CPU doesn't also support CLFLUSHOPT. (It's "and"
in the flush_area_local() case, as we cannot know what the caller's
needs are. For cache_writeback() fencing ahead of the flush is
sufficient.)
Fixes: 623c720fc8da3 ("x86: use CLFLUSHOPT when available")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
TBD: I'd be okay with not touching cache_writeback(), for only being
used by VT-d right now.
---
v2: Mark AMD behavior as a bug rather than a feature. Retain quotes.
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -761,6 +761,13 @@ static void __init noinline detect_bugs(
*/
if (!cpu_has_nscb)
setup_force_cpu_cap(X86_BUG_NULL_SEG);
+
+ /*
+ * AMD CPUs not supporting CLFLUSHOPT require MFENCE to serialize
+ * CLFLUSH against other memory accesses.
+ */
+ if (!cpu_has_clflushopt)
+ setup_force_cpu_cap(X86_BUG_CLFLUSH_MFENCE);
}
static void init_amd(struct cpuinfo_x86 *c)
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -245,12 +245,15 @@ unsigned int flush_area_local(const void
c->x86_clflush_size && c->x86_cache_size && sz &&
((sz >> 10) < c->x86_cache_size) )
{
- alternative("", "sfence", X86_FEATURE_CLFLUSHOPT);
+ alternative("", "mfence", X86_BUG_CLFLUSH_MFENCE);
for ( i = 0; i < sz; i += c->x86_clflush_size )
alternative_input("ds; clflush %0",
"data16 clflush %0", /* clflushopt */
X86_FEATURE_CLFLUSHOPT,
"m" (((const char *)va)[i]));
+ alternative_2("",
+ "sfence", X86_FEATURE_CLFLUSHOPT,
+ "mfence", X86_BUG_CLFLUSH_MFENCE);
flags &= ~FLUSH_CACHE;
}
else
@@ -274,6 +277,8 @@ void cache_writeback(const void *addr, u
unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16;
const void *end = addr + size;
+ alternative("", "mfence", X86_BUG_CLFLUSH_MFENCE);
+
addr -= (unsigned long)addr & (clflush_size - 1);
for ( ; addr < end; addr += clflush_size )
{
--- a/xen/arch/x86/include/asm/cpufeatures.h
+++ b/xen/arch/x86/include/asm/cpufeatures.h
@@ -47,6 +47,7 @@ XEN_CPUFEATURE(RET_SIMPLE, X86_SY
#define X86_BUG_FPU_PTRS X86_BUG( 0) /* (F)X{SAVE,RSTOR} doesn't save/restore FOP/FIP/FDP. */
#define X86_BUG_NULL_SEG X86_BUG( 1) /* NULL-ing a selector preserves the base and limit. */
+#define X86_BUG_CLFLUSH_MFENCE X86_BUG( 2) /* MFENCE needed to serialize CLFLUSH */
/* Total number of capability words, inc synth and bug words. */
#define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */
^ permalink raw reply [flat|nested] 4+ messages in thread