All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/3] x86: RSBA and RRSBA handling
@ 2023-06-01 14:48 Andrew Cooper
  2023-06-01 14:48 ` [PATCH v2 1/3] x86/spec-ctrl: Rename retpoline_safe() to retpoline_calculations() Andrew Cooper
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Andrew Cooper @ 2023-06-01 14:48 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné, Wei Liu

This series deals with the hanlding of the RSBA and RRSBA bits across all
parts and all mistakes encountered in various microcode versions.

There are substantial changes from v1, following a clarification from Intel.
Importantly, CPUs are not expected to enumerate both RSBA and RRSBA, therefore
we should do the same for VMs.

Andrew Cooper (3):
  x86/spec-ctrl: Rename retpoline_safe() to retpoline_calculations()
  x86/spec-ctrl: Fix up the RSBA/RRSBA bits as appropriate
  x86/cpu-policy: Derive RSBA/RRSBA for guest policies

 xen/arch/x86/cpu-policy.c                   |  53 ++++++++
 xen/arch/x86/include/asm/cpufeature.h       |   1 +
 xen/arch/x86/spec_ctrl.c                    | 131 +++++++++++++++++---
 xen/include/public/arch-x86/cpufeatureset.h |   4 +-
 xen/tools/gen-cpuid.py                      |   5 +-
 5 files changed, 172 insertions(+), 22 deletions(-)

-- 
2.30.2



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 1/3] x86/spec-ctrl: Rename retpoline_safe() to retpoline_calculations()
  2023-06-01 14:48 [PATCH v2 0/3] x86: RSBA and RRSBA handling Andrew Cooper
@ 2023-06-01 14:48 ` Andrew Cooper
  2023-06-02  9:37   ` Jan Beulich
  2023-06-01 14:48 ` [PATCH v2 2/3] x86/spec-ctrl: Fix up the RSBA/RRSBA bits as appropriate Andrew Cooper
  2023-06-01 14:48 ` [PATCH v2 3/3] x86/cpu-policy: Derive RSBA/RRSBA for guest policies Andrew Cooper
  2 siblings, 1 reply; 15+ messages in thread
From: Andrew Cooper @ 2023-06-01 14:48 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné, Wei Liu

This is prep work, split out to simply the diff on the following change.

 * Rename to retpoline_calculations(), and call unconditionally.  It is
   shortly going to synthesise missing enumerations required for guest safety.
 * For the model check switch statement, store the result in a variable and
   break rather than returning directly.

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>

v2:
 * Extend the 'safe' variable to the entire switch statement.
---
 xen/arch/x86/spec_ctrl.c | 41 +++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
index cd5ea6aa52d9..daee61900afa 100644
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -580,9 +580,10 @@ static bool __init check_smt_enabled(void)
 }
 
 /* Calculate whether Retpoline is known-safe on this CPU. */
-static bool __init retpoline_safe(void)
+static bool __init retpoline_calculations(void)
 {
     unsigned int ucode_rev = this_cpu(cpu_sig).rev;
+    bool safe = false;
 
     if ( boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) )
         return true;
@@ -620,29 +621,31 @@ static bool __init retpoline_safe(void)
     case 0x3f: /* Haswell EX/EP */
     case 0x45: /* Haswell D */
     case 0x46: /* Haswell H */
-        return true;
+        safe = true;
+        break;
 
         /*
          * Broadwell processors are retpoline-safe after specific microcode
          * versions.
          */
     case 0x3d: /* Broadwell */
-        return ucode_rev >= 0x2a;
+        safe = ucode_rev >= 0x2a;      break;
     case 0x47: /* Broadwell H */
-        return ucode_rev >= 0x1d;
+        safe = ucode_rev >= 0x1d;      break;
     case 0x4f: /* Broadwell EP/EX */
-        return ucode_rev >= 0xb000021;
+        safe = ucode_rev >= 0xb000021; break;
     case 0x56: /* Broadwell D */
         switch ( boot_cpu_data.x86_mask )
         {
-        case 2:  return ucode_rev >= 0x15;
-        case 3:  return ucode_rev >= 0x7000012;
-        case 4:  return ucode_rev >= 0xf000011;
-        case 5:  return ucode_rev >= 0xe000009;
+        case 2:  safe = ucode_rev >= 0x15;      break;
+        case 3:  safe = ucode_rev >= 0x7000012; break;
+        case 4:  safe = ucode_rev >= 0xf000011; break;
+        case 5:  safe = ucode_rev >= 0xe000009; break;
         default:
             printk("Unrecognised CPU stepping %#x - assuming not reptpoline safe\n",
                    boot_cpu_data.x86_mask);
-            return false;
+            safe = false;
+            break;
         }
         break;
 
@@ -656,7 +659,8 @@ static bool __init retpoline_safe(void)
     case 0x67: /* Cannonlake? */
     case 0x8e: /* Kabylake M */
     case 0x9e: /* Kabylake D */
-        return false;
+        safe = false;
+        break;
 
         /*
          * Atom processors before Goldmont Plus/Gemini Lake are retpoline-safe.
@@ -675,13 +679,17 @@ static bool __init retpoline_safe(void)
     case 0x5c: /* Goldmont */
     case 0x5f: /* Denverton */
     case 0x85: /* Knights Mill */
-        return true;
+        safe = true;
+        break;
 
     default:
         printk("Unrecognised CPU model %#x - assuming not reptpoline safe\n",
                boot_cpu_data.x86_model);
-        return false;
+        safe = false;
+        break;
     }
+
+    return safe;
 }
 
 /*
@@ -1114,7 +1122,7 @@ void __init init_speculation_mitigations(void)
 {
     enum ind_thunk thunk = THUNK_DEFAULT;
     bool has_spec_ctrl, ibrs = false, hw_smt_enabled;
-    bool cpu_has_bug_taa;
+    bool cpu_has_bug_taa, retpoline_safe;
 
     hw_smt_enabled = check_smt_enabled();
 
@@ -1140,6 +1148,9 @@ void __init init_speculation_mitigations(void)
             thunk = THUNK_JMP;
     }
 
+    /* Determine if retpoline is safe on this CPU. */
+    retpoline_safe = retpoline_calculations();
+
     /*
      * Has the user specified any custom BTI mitigations?  If so, follow their
      * instructions exactly and disable all heuristics.
@@ -1161,7 +1172,7 @@ void __init init_speculation_mitigations(void)
              * On all hardware, we'd like to use retpoline in preference to
              * IBRS, but only if it is safe on this hardware.
              */
-            if ( retpoline_safe() )
+            if ( retpoline_safe )
                 thunk = THUNK_RETPOLINE;
             else if ( has_spec_ctrl )
                 ibrs = true;
-- 
2.30.2



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 2/3] x86/spec-ctrl: Fix up the RSBA/RRSBA bits as appropriate
  2023-06-01 14:48 [PATCH v2 0/3] x86: RSBA and RRSBA handling Andrew Cooper
  2023-06-01 14:48 ` [PATCH v2 1/3] x86/spec-ctrl: Rename retpoline_safe() to retpoline_calculations() Andrew Cooper
@ 2023-06-01 14:48 ` Andrew Cooper
  2023-06-02  9:56   ` Jan Beulich
  2023-06-01 14:48 ` [PATCH v2 3/3] x86/cpu-policy: Derive RSBA/RRSBA for guest policies Andrew Cooper
  2 siblings, 1 reply; 15+ messages in thread
From: Andrew Cooper @ 2023-06-01 14:48 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné, Wei Liu

In order to level a VM safely for migration, the toolstack needs to know the
RSBA/RRSBA properties of the CPU, whether or not they happen to be enumerated.

See the code comment for details.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>

v2:
 * Rewrite almost from scratch.
---
 xen/arch/x86/include/asm/cpufeature.h |  1 +
 xen/arch/x86/spec_ctrl.c              | 92 +++++++++++++++++++++++++--
 2 files changed, 88 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h
index ace31e3b1f1a..e2cb8f3cc728 100644
--- a/xen/arch/x86/include/asm/cpufeature.h
+++ b/xen/arch/x86/include/asm/cpufeature.h
@@ -193,6 +193,7 @@ static inline bool boot_cpu_has(unsigned int feat)
 #define cpu_has_tsx_ctrl        boot_cpu_has(X86_FEATURE_TSX_CTRL)
 #define cpu_has_taa_no          boot_cpu_has(X86_FEATURE_TAA_NO)
 #define cpu_has_fb_clear        boot_cpu_has(X86_FEATURE_FB_CLEAR)
+#define cpu_has_rrsba           boot_cpu_has(X86_FEATURE_RRSBA)
 
 /* Synthesized. */
 #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
index daee61900afa..29ed410da47a 100644
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -579,7 +579,10 @@ static bool __init check_smt_enabled(void)
     return false;
 }
 
-/* Calculate whether Retpoline is known-safe on this CPU. */
+/*
+ * Calculate whether Retpoline is known-safe on this CPU.  Fix up the
+ * RSBA/RRSBA bits as necessary.
+ */
 static bool __init retpoline_calculations(void)
 {
     unsigned int ucode_rev = this_cpu(cpu_sig).rev;
@@ -593,15 +596,85 @@ static bool __init retpoline_calculations(void)
         return false;
 
     /*
-     * RSBA may be set by a hypervisor to indicate that we may move to a
-     * processor which isn't retpoline-safe.
+     * The meaning of the RSBA and RRSBA bits have evolved over time.  The
+     * agreed upon meaning at the time of writing (May 2023) is thus:
+     *
+     * - RSBA (RSB Alternative) means that an RSB may fall back to an
+     *   alternative predictor on underflow.  Skylake uarch and later all have
+     *   this property.  Broadwell too, when running microcode versions prior
+     *   to Jan 2018.
+     *
+     * - All eIBRS-capable processors suffer RSBA, but eIBRS also introduces
+     *   tagging of predictions with the mode in which they were learned.  So
+     *   when eIBRS is active, RSBA becomes RRSBA (Restricted RSBA).
+     *
+     * - CPUs are not expected to enumerate both RSBA and RRSBA.
+     *
+     * Some parts (Broadwell) are not expected to ever enumerate this
+     * behaviour directly.  Other parts have differing enumeration with
+     * microcode version.  Fix up Xen's idea, so we can advertise them safely
+     * to guests, and so toolstacks can level a VM safety for migration.
+     *
+     * The following states exist:
+     *
+     * |   | RSBA | EIBRS | RRSBA | Notes              | Action        |
+     * |---+------+-------+-------+--------------------+---------------|
+     * | 1 |    0 |     0 |     0 | OK (older parts)   | Maybe +RSBA   |
+     * | 2 |    0 |     0 |     1 | Broken             | +RSBA, -RRSBA |
+     * | 3 |    0 |     1 |     0 | OK (pre-Aug ucode) | +RRSBA        |
+     * | 4 |    0 |     1 |     1 | OK                 |               |
+     * | 5 |    1 |     0 |     0 | OK                 |               |
+     * | 6 |    1 |     0 |     1 | Broken             | -RRSBA        |
+     * | 7 |    1 |     1 |     0 | Broken             | -RSBA, +RRSBA |
+     * | 8 |    1 |     1 |     1 | Broken             | -RSBA         |
      *
+     * However, we doesn't need perfect adherence to the spec.  Identify the
+     * broken cases (so we stand a chance of spotting violated assumptions),
+     * and fix up Rows 1 and 3 so Xen can use RSBA || RRSBA to identify
+     * "alternative predictors potentially in use".
+     */
+    if ( cpu_has_eibrs ? cpu_has_rsba  /* Rows 7, 8 */
+                       : cpu_has_rrsba /* Rows 2, 6 */ )
+        printk(XENLOG_ERR
+               "FIRMWARE BUG: CPU %02x-%02x-%02x, ucode 0x%08x: RSBA %u, EIBRS %u, RRSBA %u\n",
+               boot_cpu_data.x86, boot_cpu_data.x86_model,
+               boot_cpu_data.x86_mask, ucode_rev,
+               cpu_has_rsba, cpu_has_eibrs, cpu_has_rrsba);
+
+    /*
      * Processors offering Enhanced IBRS are not guarenteed to be
      * repoline-safe.
      */
-    if ( cpu_has_rsba || cpu_has_eibrs )
+    if ( cpu_has_eibrs )
+    {
+        /*
+         * Prior to the August 2023 microcode, many eIBRS-capable parts did
+         * not enumerate RRSBA.
+         */
+        if ( !cpu_has_rrsba )
+            setup_force_cpu_cap(X86_FEATURE_RRSBA);
+
+        return false;
+    }
+
+    /*
+     * RSBA is explicitly enumerated in some cases, but may also be set by a
+     * hypervisor to indicate that we may move to a processor which isn't
+     * retpoline-safe.
+     */
+    if ( cpu_has_rsba )
         return false;
 
+    /*
+     * At this point, we've filtered all the legal RSBA || RRSBA cases (or the
+     * known non-ideal cases).  If ARCH_CAPS is visible, trust the absence of
+     * RSBA || RRSBA.  There's no known microcode which advertises ARCH_CAPS
+     * without RSBA or EIBRS, and if we're virtualised we can't rely the model
+     * check anyway.
+     */
+    if ( cpu_has_arch_caps )
+        return true;
+
     switch ( boot_cpu_data.x86_model )
     {
     case 0x17: /* Penryn */
@@ -689,6 +762,15 @@ static bool __init retpoline_calculations(void)
         break;
     }
 
+    if ( !safe )
+    {
+        /*
+         * Note: the eIBRS-capable parts are filtered out earlier, so the
+         * remainder here are the ones which suffer only RSBA behaviour.
+         */
+        setup_force_cpu_cap(X86_FEATURE_RSBA);
+    }
+
     return safe;
 }
 
@@ -1148,7 +1230,7 @@ void __init init_speculation_mitigations(void)
             thunk = THUNK_JMP;
     }
 
-    /* Determine if retpoline is safe on this CPU. */
+    /* Determine if retpoline is safe on this CPU.  Fix up RSBA/RRSBA enumerations. */
     retpoline_safe = retpoline_calculations();
 
     /*
-- 
2.30.2



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 3/3] x86/cpu-policy: Derive RSBA/RRSBA for guest policies
  2023-06-01 14:48 [PATCH v2 0/3] x86: RSBA and RRSBA handling Andrew Cooper
  2023-06-01 14:48 ` [PATCH v2 1/3] x86/spec-ctrl: Rename retpoline_safe() to retpoline_calculations() Andrew Cooper
  2023-06-01 14:48 ` [PATCH v2 2/3] x86/spec-ctrl: Fix up the RSBA/RRSBA bits as appropriate Andrew Cooper
@ 2023-06-01 14:48 ` Andrew Cooper
  2023-06-02 10:20   ` Jan Beulich
  2 siblings, 1 reply; 15+ messages in thread
From: Andrew Cooper @ 2023-06-01 14:48 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné, Wei Liu

The RSBA bit, "RSB Alternative", means that the RSB may use alternative
predictors when empty.  From a practical point of view, this mean "Retpoline
not safe".

Enhanced IBRS (officially IBRS_ALL in Intel's docs, previously IBRS_ATT) is a
statement that IBRS is implemented in hardware (as opposed to the form
retrofitted to existing CPUs in microcode).

The RRSBA bit, "Restricted-RSBA", is a combination of RSBA, and the eIBRS
property that predictions are tagged with the mode in which they were learnt.
Therefore, it means "when eIBRS is active, the RSB may fall back to
alternative predictors but restricted to the current prediction mode".  As
such, it's stronger statement than RSBA, but still means "Retpoline not safe".

CPUs are not expected to enumerate both RSBA and RRSBA.

Add feature dependencies for EIBRS and RRSBA.  While technically they're not
linked, absolutely nothing good can of letting the guest see RRSBA without
EIBRS.  Nor can anything good come of a guest seeing EIBRS without IBRSB.
Furthermore, we use this dependency to simplify the max derivation logic.

The max policies gets RSBA and RRSBA unconditionally set (with the EIBRS
dependency maybe hiding RRSBA).  We can run any VM, even if it has been told
"somewhere you might run, Retpoline isn't safe".

The default policies are more complicated.  A guest shouldn't see both bits,
but it needs to see one if the current host suffers from any form of RSBA, and
which bit it needs to see depends on whether eIBRS is visible or not.
Therefore, the calculation must be performed after sanitise_featureset().

Finally, apply the same logic in recalculate_cpuid_policy(), as we do for
other safety settings while we're still overhauling the toolstack logic in
this area.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>

v2:
 * Expand/adjust the comment for the max features.
 * Rewrite the default feature derivation in light of new information.
 * Fix up in recalculate_cpuid_policy() too.
---
 xen/arch/x86/cpu-policy.c                   | 53 +++++++++++++++++++++
 xen/include/public/arch-x86/cpufeatureset.h |  4 +-
 xen/tools/gen-cpuid.py                      |  5 +-
 3 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
index ee256ff5a137..f3bcb1ea4101 100644
--- a/xen/arch/x86/cpu-policy.c
+++ b/xen/arch/x86/cpu-policy.c
@@ -423,8 +423,17 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs)
          * Retpoline not safe)", so these need to be visible to a guest in all
          * cases, even when it's only some other server in the pool which
          * suffers the identified behaviour.
+         *
+         * We can always run any VM which has previously (or will
+         * subsequently) run on hardware where Retpoline is not safe.
+         * Note:
+         *  - The dependency logic may hide RRSBA for other reasons.
+         *  - The max policy does not contitute a sensible configuration to
+         *    run a guest in.
          */
         __set_bit(X86_FEATURE_ARCH_CAPS, fs);
+        __set_bit(X86_FEATURE_RSBA, fs);
+        __set_bit(X86_FEATURE_RRSBA, fs);
     }
 }
 
@@ -532,6 +541,21 @@ static void __init calculate_pv_def_policy(void)
     guest_common_default_feature_adjustments(fs);
 
     sanitise_featureset(fs);
+
+    /*
+     * If the host suffers from RSBA of any form, and the guest can see
+     * MSR_ARCH_CAPS, reflect the appropriate RSBA/RRSBA property to the guest
+     * depending on the visibility of eIBRS.
+     */
+    if ( test_bit(X86_FEATURE_ARCH_CAPS, fs) &&
+         (cpu_has_rsba || cpu_has_rrsba) )
+    {
+        bool eibrs = test_bit(X86_FEATURE_EIBRS, fs);
+
+        __set_bit(eibrs ? X86_FEATURE_RRSBA
+                        : X86_FEATURE_RSBA, fs);
+    }
+
     x86_cpu_featureset_to_policy(fs, p);
     recalculate_xstate(p);
 }
@@ -664,6 +688,21 @@ static void __init calculate_hvm_def_policy(void)
         __set_bit(X86_FEATURE_VIRT_SSBD, fs);
 
     sanitise_featureset(fs);
+
+    /*
+     * If the host suffers from RSBA of any form, and the guest can see
+     * MSR_ARCH_CAPS, reflect the appropriate RSBA/RRSBA property to the guest
+     * depending on the visibility of eIBRS.
+     */
+    if ( test_bit(X86_FEATURE_ARCH_CAPS, fs) &&
+         (cpu_has_rsba || cpu_has_rrsba) )
+    {
+        bool eibrs = test_bit(X86_FEATURE_EIBRS, fs);
+
+        __set_bit(eibrs ? X86_FEATURE_RRSBA
+                        : X86_FEATURE_RSBA, fs);
+    }
+
     x86_cpu_featureset_to_policy(fs, p);
     recalculate_xstate(p);
 }
@@ -786,6 +825,20 @@ void recalculate_cpuid_policy(struct domain *d)
 
     sanitise_featureset(fs);
 
+    /*
+     * If the host suffers from RSBA of any form, and the guest can see
+     * MSR_ARCH_CAPS, reflect the appropriate RSBA/RRSBA property to the guest
+     * depending on the visibility of eIBRS.
+     */
+    if ( test_bit(X86_FEATURE_ARCH_CAPS, fs) &&
+         (cpu_has_rsba || cpu_has_rrsba) )
+    {
+        bool eibrs = test_bit(X86_FEATURE_EIBRS, fs);
+
+        __set_bit(eibrs ? X86_FEATURE_RRSBA
+                        : X86_FEATURE_RSBA, fs);
+    }
+
     /* Fold host's FDP_EXCP_ONLY and NO_FPU_SEL into guest's view. */
     fs[FEATURESET_7b0] &= ~(cpufeat_mask(X86_FEATURE_FDP_EXCP_ONLY) |
                             cpufeat_mask(X86_FEATURE_NO_FPU_SEL));
diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
index 4edf9aba7ff6..a0e46138d763 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -311,7 +311,7 @@ XEN_CPUFEATURE(CET_SSS,            15*32+18) /*   CET Supervisor Shadow Stacks s
 /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.eax, word 16 */
 XEN_CPUFEATURE(RDCL_NO,            16*32+ 0) /*A  No Rogue Data Cache Load (Meltdown) */
 XEN_CPUFEATURE(EIBRS,              16*32+ 1) /*A  Enhanced IBRS */
-XEN_CPUFEATURE(RSBA,               16*32+ 2) /*!A RSB Alternative (Retpoline not safe) */
+XEN_CPUFEATURE(RSBA,               16*32+ 2) /*!  RSB Alternative (Retpoline not safe) */
 XEN_CPUFEATURE(SKIP_L1DFL,         16*32+ 3) /*   Don't need to flush L1D on VMEntry */
 XEN_CPUFEATURE(INTEL_SSB_NO,       16*32+ 4) /*A  No Speculative Store Bypass */
 XEN_CPUFEATURE(MDS_NO,             16*32+ 5) /*A  No Microarchitectural Data Sampling */
@@ -327,7 +327,7 @@ XEN_CPUFEATURE(FBSDP_NO,           16*32+14) /*A  No Fill Buffer Stale Data Prop
 XEN_CPUFEATURE(PSDP_NO,            16*32+15) /*A  No Primary Stale Data Propagation */
 XEN_CPUFEATURE(FB_CLEAR,           16*32+17) /*A  Fill Buffers cleared by VERW */
 XEN_CPUFEATURE(FB_CLEAR_CTRL,      16*32+18) /*   MSR_OPT_CPU_CTRL.FB_CLEAR_DIS */
-XEN_CPUFEATURE(RRSBA,              16*32+19) /*!A Restricted RSB Alternative */
+XEN_CPUFEATURE(RRSBA,              16*32+19) /*!  Restricted RSB Alternative */
 XEN_CPUFEATURE(BHI_NO,             16*32+20) /*A  No Branch History Injection  */
 XEN_CPUFEATURE(XAPIC_STATUS,       16*32+21) /*   MSR_XAPIC_DISABLE_STATUS */
 XEN_CPUFEATURE(OVRCLK_STATUS,      16*32+23) /*   MSR_OVERCLOCKING_STATUS */
diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
index 973fcc1c64e8..72cf11654ba9 100755
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -318,7 +318,7 @@ def crunch_numbers(state):
         # IBRSB/IBRS, and we pass this MSR directly to guests.  Treating them
         # as dependent features simplifies Xen's logic, and prevents the guest
         # from seeing implausible configurations.
-        IBRSB: [STIBP, SSBD, INTEL_PSFD],
+        IBRSB: [STIBP, SSBD, INTEL_PSFD, EIBRS],
         IBRS: [AMD_STIBP, AMD_SSBD, PSFD, AUTO_IBRS,
                IBRS_ALWAYS, IBRS_FAST, IBRS_SAME_MODE],
         AMD_STIBP: [STIBP_ALWAYS],
@@ -328,6 +328,9 @@ def crunch_numbers(state):
 
         # The ARCH_CAPS CPUID bit enumerates the availability of the whole register.
         ARCH_CAPS: list(range(RDCL_NO, RDCL_NO + 64)),
+
+        # The behaviour described by RRSBA depend on eIBRS being active.
+        EIBRS: [RRSBA],
     }
 
     deep_features = tuple(sorted(deps.keys()))
-- 
2.30.2



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/3] x86/spec-ctrl: Rename retpoline_safe() to retpoline_calculations()
  2023-06-01 14:48 ` [PATCH v2 1/3] x86/spec-ctrl: Rename retpoline_safe() to retpoline_calculations() Andrew Cooper
@ 2023-06-02  9:37   ` Jan Beulich
  0 siblings, 0 replies; 15+ messages in thread
From: Jan Beulich @ 2023-06-02  9:37 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 01.06.2023 16:48, Andrew Cooper wrote:
> This is prep work, split out to simply the diff on the following change.
> 
>  * Rename to retpoline_calculations(), and call unconditionally.  It is
>    shortly going to synthesise missing enumerations required for guest safety.
>  * For the model check switch statement, store the result in a variable and
>    break rather than returning directly.
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>




^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 2/3] x86/spec-ctrl: Fix up the RSBA/RRSBA bits as appropriate
  2023-06-01 14:48 ` [PATCH v2 2/3] x86/spec-ctrl: Fix up the RSBA/RRSBA bits as appropriate Andrew Cooper
@ 2023-06-02  9:56   ` Jan Beulich
  2023-06-02 13:57     ` Andrew Cooper
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2023-06-02  9:56 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 01.06.2023 16:48, Andrew Cooper wrote:
> @@ -593,15 +596,85 @@ static bool __init retpoline_calculations(void)
>          return false;
>  
>      /*
> -     * RSBA may be set by a hypervisor to indicate that we may move to a
> -     * processor which isn't retpoline-safe.
> +     * The meaning of the RSBA and RRSBA bits have evolved over time.  The
> +     * agreed upon meaning at the time of writing (May 2023) is thus:
> +     *
> +     * - RSBA (RSB Alternative) means that an RSB may fall back to an
> +     *   alternative predictor on underflow.  Skylake uarch and later all have
> +     *   this property.  Broadwell too, when running microcode versions prior
> +     *   to Jan 2018.
> +     *
> +     * - All eIBRS-capable processors suffer RSBA, but eIBRS also introduces
> +     *   tagging of predictions with the mode in which they were learned.  So
> +     *   when eIBRS is active, RSBA becomes RRSBA (Restricted RSBA).
> +     *
> +     * - CPUs are not expected to enumerate both RSBA and RRSBA.
> +     *
> +     * Some parts (Broadwell) are not expected to ever enumerate this
> +     * behaviour directly.  Other parts have differing enumeration with
> +     * microcode version.  Fix up Xen's idea, so we can advertise them safely
> +     * to guests, and so toolstacks can level a VM safety for migration.
> +     *
> +     * The following states exist:
> +     *
> +     * |   | RSBA | EIBRS | RRSBA | Notes              | Action        |
> +     * |---+------+-------+-------+--------------------+---------------|
> +     * | 1 |    0 |     0 |     0 | OK (older parts)   | Maybe +RSBA   |
> +     * | 2 |    0 |     0 |     1 | Broken             | +RSBA, -RRSBA |
> +     * | 3 |    0 |     1 |     0 | OK (pre-Aug ucode) | +RRSBA        |
> +     * | 4 |    0 |     1 |     1 | OK                 |               |
> +     * | 5 |    1 |     0 |     0 | OK                 |               |
> +     * | 6 |    1 |     0 |     1 | Broken             | -RRSBA        |
> +     * | 7 |    1 |     1 |     0 | Broken             | -RSBA, +RRSBA |
> +     * | 8 |    1 |     1 |     1 | Broken             | -RSBA         |
>       *
> +     * However, we doesn't need perfect adherence to the spec.  Identify the

Nit: "don't" or "it"?

> +     * broken cases (so we stand a chance of spotting violated assumptions),
> +     * and fix up Rows 1 and 3 so Xen can use RSBA || RRSBA to identify
> +     * "alternative predictors potentially in use".

Considering that it's rows 2, 6, 7, and 8 which are broken, I find this
comment a little misleading. To me it doesn't become clear whether them
subsequently being left alone (and merely a log message issued) is
intentional.

> +     */
> +    if ( cpu_has_eibrs ? cpu_has_rsba  /* Rows 7, 8 */
> +                       : cpu_has_rrsba /* Rows 2, 6 */ )
> +        printk(XENLOG_ERR
> +               "FIRMWARE BUG: CPU %02x-%02x-%02x, ucode 0x%08x: RSBA %u, EIBRS %u, RRSBA %u\n",
> +               boot_cpu_data.x86, boot_cpu_data.x86_model,
> +               boot_cpu_data.x86_mask, ucode_rev,
> +               cpu_has_rsba, cpu_has_eibrs, cpu_has_rrsba);
> +
> +    /*
>       * Processors offering Enhanced IBRS are not guarenteed to be
>       * repoline-safe.
>       */
> -    if ( cpu_has_rsba || cpu_has_eibrs )
> +    if ( cpu_has_eibrs )
> +    {
> +        /*
> +         * Prior to the August 2023 microcode, many eIBRS-capable parts did
> +         * not enumerate RRSBA.
> +         */
> +        if ( !cpu_has_rrsba )
> +            setup_force_cpu_cap(X86_FEATURE_RRSBA);
> +
> +        return false;
> +    }

No clearing of RSBA in this case? I fear we may end up with misbehavior if
our own records aren't kept consistent with our assumptions. (This then
extends to the "just a log message" above as well.)

Also the inner conditional continues to strike me as odd; could you add
half a sentence to the comment (or description) as to meaning to leave
is_forced_cpu_cap() function correctly (which in turn raises the question
whether - down the road - this is actually going to matter)?

> +    /*
> +     * RSBA is explicitly enumerated in some cases, but may also be set by a
> +     * hypervisor to indicate that we may move to a processor which isn't
> +     * retpoline-safe.
> +     */
> +    if ( cpu_has_rsba )
>          return false;
>  
> +    /*
> +     * At this point, we've filtered all the legal RSBA || RRSBA cases (or the
> +     * known non-ideal cases).  If ARCH_CAPS is visible, trust the absence of
> +     * RSBA || RRSBA.  There's no known microcode which advertises ARCH_CAPS
> +     * without RSBA or EIBRS, and if we're virtualised we can't rely the model
> +     * check anyway.
> +     */

I think "no known" wants further qualification: When IBRSB was first
introduced, EIBRS and RSBA weren't even known about. I also didn't
think all hardware (i.e. sufficiently old one) did get newer ucode
when these started to be known. Possibly you mean "... which wrongly
advertises ..."?

> @@ -689,6 +762,15 @@ static bool __init retpoline_calculations(void)
>          break;
>      }
>  
> +    if ( !safe )
> +    {
> +        /*
> +         * Note: the eIBRS-capable parts are filtered out earlier, so the
> +         * remainder here are the ones which suffer only RSBA behaviour.
> +         */

As I think I had mentioned already, I find "only" here odd, when RSBA
is more severe than RRSBA. Maybe the "only" could move earlier, e.g.
between "are" and "the"? Then again this may be another non-native-
speaker issue of mine ...

Jan


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/3] x86/cpu-policy: Derive RSBA/RRSBA for guest policies
  2023-06-01 14:48 ` [PATCH v2 3/3] x86/cpu-policy: Derive RSBA/RRSBA for guest policies Andrew Cooper
@ 2023-06-02 10:20   ` Jan Beulich
  2023-06-02 15:29     ` Andrew Cooper
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2023-06-02 10:20 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 01.06.2023 16:48, Andrew Cooper wrote:
> The RSBA bit, "RSB Alternative", means that the RSB may use alternative
> predictors when empty.  From a practical point of view, this mean "Retpoline
> not safe".
> 
> Enhanced IBRS (officially IBRS_ALL in Intel's docs, previously IBRS_ATT) is a
> statement that IBRS is implemented in hardware (as opposed to the form
> retrofitted to existing CPUs in microcode).
> 
> The RRSBA bit, "Restricted-RSBA", is a combination of RSBA, and the eIBRS
> property that predictions are tagged with the mode in which they were learnt.
> Therefore, it means "when eIBRS is active, the RSB may fall back to
> alternative predictors but restricted to the current prediction mode".  As
> such, it's stronger statement than RSBA, but still means "Retpoline not safe".
> 
> CPUs are not expected to enumerate both RSBA and RRSBA.
> 
> Add feature dependencies for EIBRS and RRSBA.  While technically they're not
> linked, absolutely nothing good can of letting the guest see RRSBA without

Nit: missing "come"?

> EIBRS.  Nor can anything good come of a guest seeing EIBRS without IBRSB.
> Furthermore, we use this dependency to simplify the max derivation logic.
>[...]
> @@ -532,6 +541,21 @@ static void __init calculate_pv_def_policy(void)
>      guest_common_default_feature_adjustments(fs);
>  
>      sanitise_featureset(fs);
> +
> +    /*
> +     * If the host suffers from RSBA of any form, and the guest can see
> +     * MSR_ARCH_CAPS, reflect the appropriate RSBA/RRSBA property to the guest
> +     * depending on the visibility of eIBRS.
> +     */
> +    if ( test_bit(X86_FEATURE_ARCH_CAPS, fs) &&
> +         (cpu_has_rsba || cpu_has_rrsba) )
> +    {
> +        bool eibrs = test_bit(X86_FEATURE_EIBRS, fs);
> +
> +        __set_bit(eibrs ? X86_FEATURE_RRSBA
> +                        : X86_FEATURE_RSBA, fs);
> +    }
> +
>      x86_cpu_featureset_to_policy(fs, p);
>      recalculate_xstate(p);
>  }
> @@ -664,6 +688,21 @@ static void __init calculate_hvm_def_policy(void)
>          __set_bit(X86_FEATURE_VIRT_SSBD, fs);
>  
>      sanitise_featureset(fs);
> +
> +    /*
> +     * If the host suffers from RSBA of any form, and the guest can see
> +     * MSR_ARCH_CAPS, reflect the appropriate RSBA/RRSBA property to the guest
> +     * depending on the visibility of eIBRS.
> +     */
> +    if ( test_bit(X86_FEATURE_ARCH_CAPS, fs) &&
> +         (cpu_has_rsba || cpu_has_rrsba) )
> +    {
> +        bool eibrs = test_bit(X86_FEATURE_EIBRS, fs);
> +
> +        __set_bit(eibrs ? X86_FEATURE_RRSBA
> +                        : X86_FEATURE_RSBA, fs);
> +    }
> +
>      x86_cpu_featureset_to_policy(fs, p);
>      recalculate_xstate(p);
>  }
> @@ -786,6 +825,20 @@ void recalculate_cpuid_policy(struct domain *d)
>  
>      sanitise_featureset(fs);
>  
> +    /*
> +     * If the host suffers from RSBA of any form, and the guest can see
> +     * MSR_ARCH_CAPS, reflect the appropriate RSBA/RRSBA property to the guest
> +     * depending on the visibility of eIBRS.
> +     */
> +    if ( test_bit(X86_FEATURE_ARCH_CAPS, fs) &&
> +         (cpu_has_rsba || cpu_has_rrsba) )
> +    {
> +        bool eibrs = test_bit(X86_FEATURE_EIBRS, fs);
> +
> +        __set_bit(eibrs ? X86_FEATURE_RRSBA
> +                        : X86_FEATURE_RSBA, fs);
> +    }

Now that we have the same code and comment even a 3rd time, surely this
wants to be put in a helper?

What about a tool stack request leading to us setting the 2nd of the two
bits here, while the other was already set? IOW wouldn't we better clear
the other bit explicitly? (Due to the EIBRS dependency or RRSBA I think
this can really only happen when the tool stack requests RSBA+EIBRS, as
the deep deps clearing doesn't know the concept of "negative"
dependencies.)

Similarly what about a tool stack (and ultimately admin) request setting
both RSBA and RRSBA? Wouldn't we better clear the respectively wrong bit
then? People may do this in their guest configs "just to be on the safe
side" (once expressing this in guest configs is possible, of course),
and due to the max policies having both bits set this also won't occur
"automatically".

Jan


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 2/3] x86/spec-ctrl: Fix up the RSBA/RRSBA bits as appropriate
  2023-06-02  9:56   ` Jan Beulich
@ 2023-06-02 13:57     ` Andrew Cooper
  2023-06-05  7:48       ` Jan Beulich
  0 siblings, 1 reply; 15+ messages in thread
From: Andrew Cooper @ 2023-06-02 13:57 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 02/06/2023 10:56 am, Jan Beulich wrote:
> On 01.06.2023 16:48, Andrew Cooper wrote:
>> @@ -593,15 +596,85 @@ static bool __init retpoline_calculations(void)
>>          return false;
>>  
>>      /*
>> -     * RSBA may be set by a hypervisor to indicate that we may move to a
>> -     * processor which isn't retpoline-safe.
>> +     * The meaning of the RSBA and RRSBA bits have evolved over time.  The
>> +     * agreed upon meaning at the time of writing (May 2023) is thus:
>> +     *
>> +     * - RSBA (RSB Alternative) means that an RSB may fall back to an
>> +     *   alternative predictor on underflow.  Skylake uarch and later all have
>> +     *   this property.  Broadwell too, when running microcode versions prior
>> +     *   to Jan 2018.
>> +     *
>> +     * - All eIBRS-capable processors suffer RSBA, but eIBRS also introduces
>> +     *   tagging of predictions with the mode in which they were learned.  So
>> +     *   when eIBRS is active, RSBA becomes RRSBA (Restricted RSBA).
>> +     *
>> +     * - CPUs are not expected to enumerate both RSBA and RRSBA.
>> +     *
>> +     * Some parts (Broadwell) are not expected to ever enumerate this
>> +     * behaviour directly.  Other parts have differing enumeration with
>> +     * microcode version.  Fix up Xen's idea, so we can advertise them safely
>> +     * to guests, and so toolstacks can level a VM safety for migration.
>> +     *
>> +     * The following states exist:
>> +     *
>> +     * |   | RSBA | EIBRS | RRSBA | Notes              | Action        |
>> +     * |---+------+-------+-------+--------------------+---------------|
>> +     * | 1 |    0 |     0 |     0 | OK (older parts)   | Maybe +RSBA   |
>> +     * | 2 |    0 |     0 |     1 | Broken             | +RSBA, -RRSBA |
>> +     * | 3 |    0 |     1 |     0 | OK (pre-Aug ucode) | +RRSBA        |
>> +     * | 4 |    0 |     1 |     1 | OK                 |               |
>> +     * | 5 |    1 |     0 |     0 | OK                 |               |
>> +     * | 6 |    1 |     0 |     1 | Broken             | -RRSBA        |
>> +     * | 7 |    1 |     1 |     0 | Broken             | -RSBA, +RRSBA |
>> +     * | 8 |    1 |     1 |     1 | Broken             | -RSBA         |
>>       *
>> +     * However, we doesn't need perfect adherence to the spec.  Identify the
> Nit: "don't" or "it"?

Oops.  This used to read "Xen doesn't need".  So much for last minute
changes.

>
>> +     * broken cases (so we stand a chance of spotting violated assumptions),
>> +     * and fix up Rows 1 and 3 so Xen can use RSBA || RRSBA to identify
>> +     * "alternative predictors potentially in use".
> Considering that it's rows 2, 6, 7, and 8 which are broken, I find this
> comment a little misleading. To me it doesn't become clear whether them
> subsequently being left alone (and merely a log message issued) is
> intentional.

It is intentional.

I don't know if these combinations exist in practice anywhere or not. 
Intel think they oughtn't to, and it's quite possible that the printk()
is unreachable, but given the complexity and shifting meanings over time
here, I think it would be unwise to simply assume this to be true.

But at the same time, if it is an unreachable code, it would be equally
unwise to have a load of fixup code which we can't test.  I've still got
the fixup code in a separate patch incase we need to put it back in.

I have checked that this printk() doesn't trigger for any of the CPUs
and microcode combinations I have easily to hand, but it's not an
exhaustive test.

>
>> +     */
>> +    if ( cpu_has_eibrs ? cpu_has_rsba  /* Rows 7, 8 */
>> +                       : cpu_has_rrsba /* Rows 2, 6 */ )
>> +        printk(XENLOG_ERR
>> +               "FIRMWARE BUG: CPU %02x-%02x-%02x, ucode 0x%08x: RSBA %u, EIBRS %u, RRSBA %u\n",
>> +               boot_cpu_data.x86, boot_cpu_data.x86_model,
>> +               boot_cpu_data.x86_mask, ucode_rev,
>> +               cpu_has_rsba, cpu_has_eibrs, cpu_has_rrsba);
>> +
>> +    /*
>>       * Processors offering Enhanced IBRS are not guarenteed to be
>>       * repoline-safe.
>>       */
>> -    if ( cpu_has_rsba || cpu_has_eibrs )
>> +    if ( cpu_has_eibrs )
>> +    {
>> +        /*
>> +         * Prior to the August 2023 microcode, many eIBRS-capable parts did
>> +         * not enumerate RRSBA.
>> +         */
>> +        if ( !cpu_has_rrsba )
>> +            setup_force_cpu_cap(X86_FEATURE_RRSBA);
>> +
>> +        return false;
>> +    }
> No clearing of RSBA in this case? I fear we may end up with misbehavior if
> our own records aren't kept consistent with our assumptions. (This then
> extends to the "just a log message" above as well.)

Well quite, which is why I've gone to lengths to state what our
assumptions are.

Right now, there is nothing in Xen itself where RSBA vs RRSBA matters. 
Until this patch, we don't even have cpu_has_rrsba, and remember that
Xen was not vulnerable to CVE-2022-29901 (Intel Retbleed) because we
chose to use the microcode IBRS implementation on early Skylake, rather
than hope that Retpoline was safe enough and go with the faster option.


In v1, having RSBA and RRSBA (working as I thought they were supposed to
work) *did* matter for the default cpu-policy derivation to work nicely.

But that was invalidated by the clarification to say that RSBA and RRSBA
should never be seen together, which in turn completely changed the
derivation logic.

In v2, it doesn't matter if Xen ends up seeing both RSBA and RRSBA.  It
explicitly can cope (by treating them the same WRT Retpoline), and the
derivation logic now calculates both completely from scratch (and based
on RSBA || RRSBA).

If Xen's assumptions change, then of course we could end up with
misbehaviour, but I think it's unlikely, and I don't think this code is
any more liable to misbehave than anything else in spec-ctrl.c.

> Also the inner conditional continues to strike me as odd; could you add
> half a sentence to the comment (or description) as to meaning to leave
> is_forced_cpu_cap() function correctly (which in turn raises the question
> whether - down the road - this is actually going to matter)?

Look at the single user of is_forced_cpu_cap().

I am not micro-optimising a single branch out of the init section on the
blind hope that the contradictory behaviour it creates won't matter in
the future.  Every forced cap is an abnormal case, and it's almost
certainly my future time which will be spent unravelling the
contradictory behaviour when it comes back to bite.

>> +    /*
>> +     * RSBA is explicitly enumerated in some cases, but may also be set by a
>> +     * hypervisor to indicate that we may move to a processor which isn't
>> +     * retpoline-safe.
>> +     */
>> +    if ( cpu_has_rsba )
>>          return false;
>>  
>> +    /*
>> +     * At this point, we've filtered all the legal RSBA || RRSBA cases (or the
>> +     * known non-ideal cases).  If ARCH_CAPS is visible, trust the absence of
>> +     * RSBA || RRSBA.  There's no known microcode which advertises ARCH_CAPS
>> +     * without RSBA or EIBRS, and if we're virtualised we can't rely the model
>> +     * check anyway.
>> +     */
> I think "no known" wants further qualification: When IBRSB was first
> introduced, EIBRS and RSBA weren't even known about. I also didn't
> think all hardware (i.e. sufficiently old one) did get newer ucode
> when these started to be known. Possibly you mean "... which wrongly
> advertises ..."?

ARCH_CAPS equally didn't exit originally.  ARCH_CAPS, RSBA and EIBRS all
appeared together - see how they're bits 0 and 1 in the MSR.  RRSBA on
the other hand is bit 19, which gives you some idea of how recent it is.

The original intention (AIUI) was that ARCH_CAPS would only exist in
CLX/CFL-R and later which had EIBRS.  But it had to be retrofitted to
older parts in order to enumerate energy-filtering to fix the RAPL
attack against SGX.

The guidance (again, AIUI) was always that if you can see ARCH_CAPS you
should trust the value, if for no other reason than "your hypervisor
will want you not to use a model check".  And this is also why it's
taken so long for us to ARCH_CAPS advertised - advertising ARCH_CAPS and
getting RSBA wrong is worse than "sorry, you're on your own".


None of this is perfect - it was put together in reaction to emergency
situations, where "doing the best we can, urgently" is far more
important than missing the deadline.

Personally, I don't think the RRSBA bit is useful, and I argued against
introducing it.  It literally means "RSBA, with EIBRS restricting which
alternative predictions to select from", and IMO adds complexity with no
benefit.  But others wanted it, and the rest is history.

>
>> @@ -689,6 +762,15 @@ static bool __init retpoline_calculations(void)
>>          break;
>>      }
>>  
>> +    if ( !safe )
>> +    {
>> +        /*
>> +         * Note: the eIBRS-capable parts are filtered out earlier, so the
>> +         * remainder here are the ones which suffer only RSBA behaviour.
>> +         */
> As I think I had mentioned already, I find "only" here odd, when RSBA
> is more severe than RRSBA. Maybe the "only" could move earlier, e.g.
> between "are" and "the"? Then again this may be another non-native-
> speaker issue of mine ...

Well, that is something which has arguably changed between v1 and v2.

Originally, this was really just the Broadwell and Skylake case, and the
point was to explain why we weren't adjusting RRSBA too.

But yeah, I think the "only" can be dropped given the other
rearrangements in v2.

~Andrew


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/3] x86/cpu-policy: Derive RSBA/RRSBA for guest policies
  2023-06-02 10:20   ` Jan Beulich
@ 2023-06-02 15:29     ` Andrew Cooper
  2023-06-02 15:38       ` Andrew Cooper
  2023-06-05  8:08       ` Jan Beulich
  0 siblings, 2 replies; 15+ messages in thread
From: Andrew Cooper @ 2023-06-02 15:29 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 02/06/2023 11:20 am, Jan Beulich wrote:
> On 01.06.2023 16:48, Andrew Cooper wrote:
>> The RSBA bit, "RSB Alternative", means that the RSB may use alternative
>> predictors when empty.  From a practical point of view, this mean "Retpoline
>> not safe".
>>
>> Enhanced IBRS (officially IBRS_ALL in Intel's docs, previously IBRS_ATT) is a
>> statement that IBRS is implemented in hardware (as opposed to the form
>> retrofitted to existing CPUs in microcode).
>>
>> The RRSBA bit, "Restricted-RSBA", is a combination of RSBA, and the eIBRS
>> property that predictions are tagged with the mode in which they were learnt.
>> Therefore, it means "when eIBRS is active, the RSB may fall back to
>> alternative predictors but restricted to the current prediction mode".  As
>> such, it's stronger statement than RSBA, but still means "Retpoline not safe".
>>
>> CPUs are not expected to enumerate both RSBA and RRSBA.
>>
>> Add feature dependencies for EIBRS and RRSBA.  While technically they're not
>> linked, absolutely nothing good can of letting the guest see RRSBA without
> Nit: missing "come"?

Yes.  Will fix.

>> @@ -786,6 +825,20 @@ void recalculate_cpuid_policy(struct domain *d)
>>  
>>      sanitise_featureset(fs);
>>  
>> +    /*
>> +     * If the host suffers from RSBA of any form, and the guest can see
>> +     * MSR_ARCH_CAPS, reflect the appropriate RSBA/RRSBA property to the guest
>> +     * depending on the visibility of eIBRS.
>> +     */
>> +    if ( test_bit(X86_FEATURE_ARCH_CAPS, fs) &&
>> +         (cpu_has_rsba || cpu_has_rrsba) )
>> +    {
>> +        bool eibrs = test_bit(X86_FEATURE_EIBRS, fs);
>> +
>> +        __set_bit(eibrs ? X86_FEATURE_RRSBA
>> +                        : X86_FEATURE_RSBA, fs);
>> +    }
> Now that we have the same code and comment even a 3rd time, surely this
> wants to be put in a helper?

I did consider that, and chose not to in this case.

One of these is going to disappear again in due course, when we start
handing errors back to the toolstack instead of fixing up behind it.

The requirement to be after sanitise_featureset() is critically
important here for safety, and out-of-lining makes that connection less
obvious.

I considered having guest_common_default_late_feature_adjustments(), but
that name is getting silly and it's already somewhat hard to navigate.

There's quite a bit of other cleanup which ought to be done, like
uniformly adding new bits first, then taking bits away (I suffered two
bugs in init_dom0_cpuid_policy() getting this wrong during development),
so I was planning to leave any decisions until then.

> What about a tool stack request leading to us setting the 2nd of the two
> bits here, while the other was already set? IOW wouldn't we better clear
> the other bit explicitly? (Due to the EIBRS dependency or RRSBA I think
> this can really only happen when the tool stack requests RSBA+EIBRS, as
> the deep deps clearing doesn't know the concept of "negative"
> dependencies.)

Hmm - I think there is a bug here, but it's not this simple.  I think
the only reasonable thing we can do is start rejecting bad input because
I don't think Xen can fix up safely.

Xen must not ever clear RSBA, or we've potentially made the VM unsafe
behind the toolstack's back.

If EIBRS != RRSBA, the toolstack has made a mistake.  Equally too for
RSBA && EIBRS.

I think this is going to take more coffee to solve...

> Similarly what about a tool stack (and ultimately admin) request setting
> both RSBA and RRSBA? Wouldn't we better clear the respectively wrong bit
> then? People may do this in their guest configs "just to be on the safe
> side" (once expressing this in guest configs is possible, of course),
> and due to the max policies having both bits set this also won't occur
> "automatically".

The only reason this series doesn't have a final patch turning
ARCH_CAPS's "a" into "A" is because libxl can't currently operate these
bits at all, let alone safely.  Roger is kindly looking into that side
of things.

It is an error to be modifying bits behind the toolstack's back to start
with.  We get away with it previously because hiding bits that the
toolstack thinks the guest saw is goes in the safe direction WRT
migrate.  But no more with the semantics of RSBA/RRSBA.

I explicitly don't care about people wanting to set RSBA && RRSBA "just
in case" - this is too complicated already.  The only non-default thing
an admin needs to be able to express is +rsba,-eibrs,-rrsba to mean
"please be compatible with pre-EIBRS hardware".  (In reality, there will
also need to be some FOO_NO bits taken out too, depending on the CPUs in
question.)

~Andrew


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/3] x86/cpu-policy: Derive RSBA/RRSBA for guest policies
  2023-06-02 15:29     ` Andrew Cooper
@ 2023-06-02 15:38       ` Andrew Cooper
  2023-06-05  8:04         ` Jan Beulich
  2023-06-05  8:08       ` Jan Beulich
  1 sibling, 1 reply; 15+ messages in thread
From: Andrew Cooper @ 2023-06-02 15:38 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 02/06/2023 4:29 pm, Andrew Cooper wrote:
> On 02/06/2023 11:20 am, Jan Beulich wrote:
>> On 01.06.2023 16:48, Andrew Cooper wrote:
>> What about a tool stack request leading to us setting the 2nd of the two
>> bits here, while the other was already set? IOW wouldn't we better clear
>> the other bit explicitly? (Due to the EIBRS dependency or RRSBA I think
>> this can really only happen when the tool stack requests RSBA+EIBRS, as
>> the deep deps clearing doesn't know the concept of "negative"
>> dependencies.)
> Hmm - I think there is a bug here, but it's not this simple.  I think
> the only reasonable thing we can do is start rejecting bad input because
> I don't think Xen can fix up safely.
>
> Xen must not ever clear RSBA, or we've potentially made the VM unsafe
> behind the toolstack's back.
>
> If EIBRS != RRSBA, the toolstack has made a mistake.  Equally too for
> RSBA && EIBRS.
>
> I think this is going to take more coffee to solve...

Actually, no.

I'm going to delete the hunk modifying recalculate_cpuid(), and move
this patch back to the meaning it had in v1 which is just "get the
policies looking correct".


It's still not supported for the toolstack to request ARCH_CAPS (the "a"
marking), and the safely logic for that can come in a subsequent series
along with the unit(ish) testing I was already planning to do.

~Andrew


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 2/3] x86/spec-ctrl: Fix up the RSBA/RRSBA bits as appropriate
  2023-06-02 13:57     ` Andrew Cooper
@ 2023-06-05  7:48       ` Jan Beulich
  2023-06-05  7:50         ` Jan Beulich
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2023-06-05  7:48 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 02.06.2023 15:57, Andrew Cooper wrote:
> On 02/06/2023 10:56 am, Jan Beulich wrote:
>> On 01.06.2023 16:48, Andrew Cooper wrote:
>>> @@ -593,15 +596,85 @@ static bool __init retpoline_calculations(void)
>>>          return false;
>>>  
>>>      /*
>>> -     * RSBA may be set by a hypervisor to indicate that we may move to a
>>> -     * processor which isn't retpoline-safe.
>>> +     * The meaning of the RSBA and RRSBA bits have evolved over time.  The
>>> +     * agreed upon meaning at the time of writing (May 2023) is thus:
>>> +     *
>>> +     * - RSBA (RSB Alternative) means that an RSB may fall back to an
>>> +     *   alternative predictor on underflow.  Skylake uarch and later all have
>>> +     *   this property.  Broadwell too, when running microcode versions prior
>>> +     *   to Jan 2018.
>>> +     *
>>> +     * - All eIBRS-capable processors suffer RSBA, but eIBRS also introduces
>>> +     *   tagging of predictions with the mode in which they were learned.  So
>>> +     *   when eIBRS is active, RSBA becomes RRSBA (Restricted RSBA).
>>> +     *
>>> +     * - CPUs are not expected to enumerate both RSBA and RRSBA.
>>> +     *
>>> +     * Some parts (Broadwell) are not expected to ever enumerate this
>>> +     * behaviour directly.  Other parts have differing enumeration with
>>> +     * microcode version.  Fix up Xen's idea, so we can advertise them safely
>>> +     * to guests, and so toolstacks can level a VM safety for migration.
>>> +     *
>>> +     * The following states exist:
>>> +     *
>>> +     * |   | RSBA | EIBRS | RRSBA | Notes              | Action        |
>>> +     * |---+------+-------+-------+--------------------+---------------|
>>> +     * | 1 |    0 |     0 |     0 | OK (older parts)   | Maybe +RSBA   |
>>> +     * | 2 |    0 |     0 |     1 | Broken             | +RSBA, -RRSBA |
>>> +     * | 3 |    0 |     1 |     0 | OK (pre-Aug ucode) | +RRSBA        |
>>> +     * | 4 |    0 |     1 |     1 | OK                 |               |
>>> +     * | 5 |    1 |     0 |     0 | OK                 |               |
>>> +     * | 6 |    1 |     0 |     1 | Broken             | -RRSBA        |
>>> +     * | 7 |    1 |     1 |     0 | Broken             | -RSBA, +RRSBA |
>>> +     * | 8 |    1 |     1 |     1 | Broken             | -RSBA         |
>>>       *
>>> +     * However, we doesn't need perfect adherence to the spec.  Identify the
>>> +     * broken cases (so we stand a chance of spotting violated assumptions),
>>> +     * and fix up Rows 1 and 3 so Xen can use RSBA || RRSBA to identify
>>> +     * "alternative predictors potentially in use".
>> Considering that it's rows 2, 6, 7, and 8 which are broken, I find this
>> comment a little misleading. To me it doesn't become clear whether them
>> subsequently being left alone (and merely a log message issued) is
>> intentional.
> 
> It is intentional.
> 
> I don't know if these combinations exist in practice anywhere or not. 
> Intel think they oughtn't to, and it's quite possible that the printk()
> is unreachable, but given the complexity and shifting meanings over time
> here, I think it would be unwise to simply assume this to be true.

I agree.

> But at the same time, if it is an unreachable code, it would be equally
> unwise to have a load of fixup code which we can't test.  I've still got
> the fixup code in a separate patch incase we need to put it back in.

Iirc the fixup code you had wasn't really "a load of code". Thing though
is: If such a combination did exist, according to our history we'd be at
least on the edge of needing to issue an XSA along with adding the
missing fixup code. From all I can tell that risk would be lower if we
had that fixup code: It might well be correct.

Nevertheless, if you decide to leave out any fixup, may I ask that you
say so very explicitly in the comment?

>>> +     */
>>> +    if ( cpu_has_eibrs ? cpu_has_rsba  /* Rows 7, 8 */
>>> +                       : cpu_has_rrsba /* Rows 2, 6 */ )
>>> +        printk(XENLOG_ERR
>>> +               "FIRMWARE BUG: CPU %02x-%02x-%02x, ucode 0x%08x: RSBA %u, EIBRS %u, RRSBA %u\n",
>>> +               boot_cpu_data.x86, boot_cpu_data.x86_model,
>>> +               boot_cpu_data.x86_mask, ucode_rev,
>>> +               cpu_has_rsba, cpu_has_eibrs, cpu_has_rrsba);
>>> +
>>> +    /*
>>>       * Processors offering Enhanced IBRS are not guarenteed to be
>>>       * repoline-safe.
>>>       */
>>> -    if ( cpu_has_rsba || cpu_has_eibrs )
>>> +    if ( cpu_has_eibrs )
>>> +    {
>>> +        /*
>>> +         * Prior to the August 2023 microcode, many eIBRS-capable parts did
>>> +         * not enumerate RRSBA.
>>> +         */
>>> +        if ( !cpu_has_rrsba )
>>> +            setup_force_cpu_cap(X86_FEATURE_RRSBA);
>>> +
>>> +        return false;
>>> +    }
>> No clearing of RSBA in this case? I fear we may end up with misbehavior if
>> our own records aren't kept consistent with our assumptions. (This then
>> extends to the "just a log message" above as well.)
> 
> Well quite, which is why I've gone to lengths to state what our
> assumptions are.
> 
> Right now, there is nothing in Xen itself where RSBA vs RRSBA matters. 
> Until this patch, we don't even have cpu_has_rrsba, and remember that
> Xen was not vulnerable to CVE-2022-29901 (Intel Retbleed) because we
> chose to use the microcode IBRS implementation on early Skylake, rather
> than hope that Retpoline was safe enough and go with the faster option.
> 
> 
> In v1, having RSBA and RRSBA (working as I thought they were supposed to
> work) *did* matter for the default cpu-policy derivation to work nicely.
> 
> But that was invalidated by the clarification to say that RSBA and RRSBA
> should never be seen together, which in turn completely changed the
> derivation logic.
> 
> In v2, it doesn't matter if Xen ends up seeing both RSBA and RRSBA.  It
> explicitly can cope (by treating them the same WRT Retpoline), and the
> derivation logic now calculates both completely from scratch (and based
> on RSBA || RRSBA).

Like above, may I ask that you say so explicitly in the / a comment right
here?

>> Also the inner conditional continues to strike me as odd; could you add
>> half a sentence to the comment (or description) as to meaning to leave
>> is_forced_cpu_cap() function correctly (which in turn raises the question
>> whether - down the road - this is actually going to matter)?
> 
> Look at the single user of is_forced_cpu_cap().
> 
> I am not micro-optimising a single branch out of the init section on the
> blind hope that the contradictory behaviour it creates won't matter in
> the future.  Every forced cap is an abnormal case, and it's almost
> certainly my future time which will be spent unravelling the
> contradictory behaviour when it comes back to bite.

My request isn't about optimization at all, but about an apparent pattern
of unnecessary redundancy (which only as a side effect leads to the
elimination of a branch and hence some tiny bit of optimization). But if
you're sure this is going to be obvious to everyone but me, I'm not going
to insist.

>>> +    /*
>>> +     * RSBA is explicitly enumerated in some cases, but may also be set by a
>>> +     * hypervisor to indicate that we may move to a processor which isn't
>>> +     * retpoline-safe.
>>> +     */
>>> +    if ( cpu_has_rsba )
>>>          return false;
>>>  
>>> +    /*
>>> +     * At this point, we've filtered all the legal RSBA || RRSBA cases (or the
>>> +     * known non-ideal cases).  If ARCH_CAPS is visible, trust the absence of
>>> +     * RSBA || RRSBA.  There's no known microcode which advertises ARCH_CAPS
>>> +     * without RSBA or EIBRS, and if we're virtualised we can't rely the model
>>> +     * check anyway.
>>> +     */
>> I think "no known" wants further qualification: When IBRSB was first
>> introduced, EIBRS and RSBA weren't even known about. I also didn't
>> think all hardware (i.e. sufficiently old one) did get newer ucode
>> when these started to be known. Possibly you mean "... which wrongly
>> advertises ..."?
> 
> ARCH_CAPS equally didn't exit originally.  ARCH_CAPS, RSBA and EIBRS all
> appeared together - see how they're bits 0 and 1 in the MSR.  RRSBA on
> the other hand is bit 19, which gives you some idea of how recent it is.

Hmm, yes, I see.

Jan


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 2/3] x86/spec-ctrl: Fix up the RSBA/RRSBA bits as appropriate
  2023-06-05  7:48       ` Jan Beulich
@ 2023-06-05  7:50         ` Jan Beulich
  2023-06-05  9:44           ` Andrew Cooper
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2023-06-05  7:50 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 05.06.2023 09:48, Jan Beulich wrote:
> On 02.06.2023 15:57, Andrew Cooper wrote:
>> On 02/06/2023 10:56 am, Jan Beulich wrote:
>>> On 01.06.2023 16:48, Andrew Cooper wrote:
>>>> @@ -593,15 +596,85 @@ static bool __init retpoline_calculations(void)
>>>>          return false;
>>>>  
>>>>      /*
>>>> -     * RSBA may be set by a hypervisor to indicate that we may move to a
>>>> -     * processor which isn't retpoline-safe.
>>>> +     * The meaning of the RSBA and RRSBA bits have evolved over time.  The
>>>> +     * agreed upon meaning at the time of writing (May 2023) is thus:
>>>> +     *
>>>> +     * - RSBA (RSB Alternative) means that an RSB may fall back to an
>>>> +     *   alternative predictor on underflow.  Skylake uarch and later all have
>>>> +     *   this property.  Broadwell too, when running microcode versions prior
>>>> +     *   to Jan 2018.
>>>> +     *
>>>> +     * - All eIBRS-capable processors suffer RSBA, but eIBRS also introduces
>>>> +     *   tagging of predictions with the mode in which they were learned.  So
>>>> +     *   when eIBRS is active, RSBA becomes RRSBA (Restricted RSBA).
>>>> +     *
>>>> +     * - CPUs are not expected to enumerate both RSBA and RRSBA.
>>>> +     *
>>>> +     * Some parts (Broadwell) are not expected to ever enumerate this
>>>> +     * behaviour directly.  Other parts have differing enumeration with
>>>> +     * microcode version.  Fix up Xen's idea, so we can advertise them safely
>>>> +     * to guests, and so toolstacks can level a VM safety for migration.
>>>> +     *
>>>> +     * The following states exist:
>>>> +     *
>>>> +     * |   | RSBA | EIBRS | RRSBA | Notes              | Action        |
>>>> +     * |---+------+-------+-------+--------------------+---------------|
>>>> +     * | 1 |    0 |     0 |     0 | OK (older parts)   | Maybe +RSBA   |
>>>> +     * | 2 |    0 |     0 |     1 | Broken             | +RSBA, -RRSBA |
>>>> +     * | 3 |    0 |     1 |     0 | OK (pre-Aug ucode) | +RRSBA        |
>>>> +     * | 4 |    0 |     1 |     1 | OK                 |               |
>>>> +     * | 5 |    1 |     0 |     0 | OK                 |               |
>>>> +     * | 6 |    1 |     0 |     1 | Broken             | -RRSBA        |
>>>> +     * | 7 |    1 |     1 |     0 | Broken             | -RSBA, +RRSBA |
>>>> +     * | 8 |    1 |     1 |     1 | Broken             | -RSBA         |
>>>>       *
>>>> +     * However, we doesn't need perfect adherence to the spec.  Identify the
>>>> +     * broken cases (so we stand a chance of spotting violated assumptions),
>>>> +     * and fix up Rows 1 and 3 so Xen can use RSBA || RRSBA to identify
>>>> +     * "alternative predictors potentially in use".
>>> Considering that it's rows 2, 6, 7, and 8 which are broken, I find this
>>> comment a little misleading. To me it doesn't become clear whether them
>>> subsequently being left alone (and merely a log message issued) is
>>> intentional.
>>
>> It is intentional.
>>
>> I don't know if these combinations exist in practice anywhere or not. 
>> Intel think they oughtn't to, and it's quite possible that the printk()
>> is unreachable, but given the complexity and shifting meanings over time
>> here, I think it would be unwise to simply assume this to be true.
> 
> I agree.

Thinking of it - would we perhaps want to go a step further an taint the
system in such a case? I would then view this as kind of "Xen not
(security) supported on this hardware." Until we manage to fix (or work
around) the issue.

Jan


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/3] x86/cpu-policy: Derive RSBA/RRSBA for guest policies
  2023-06-02 15:38       ` Andrew Cooper
@ 2023-06-05  8:04         ` Jan Beulich
  0 siblings, 0 replies; 15+ messages in thread
From: Jan Beulich @ 2023-06-05  8:04 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 02.06.2023 17:38, Andrew Cooper wrote:
> On 02/06/2023 4:29 pm, Andrew Cooper wrote:
>> On 02/06/2023 11:20 am, Jan Beulich wrote:
>>> On 01.06.2023 16:48, Andrew Cooper wrote:
>>> What about a tool stack request leading to us setting the 2nd of the two
>>> bits here, while the other was already set? IOW wouldn't we better clear
>>> the other bit explicitly? (Due to the EIBRS dependency or RRSBA I think
>>> this can really only happen when the tool stack requests RSBA+EIBRS, as
>>> the deep deps clearing doesn't know the concept of "negative"
>>> dependencies.)
>> Hmm - I think there is a bug here, but it's not this simple.  I think
>> the only reasonable thing we can do is start rejecting bad input because
>> I don't think Xen can fix up safely.
>>
>> Xen must not ever clear RSBA, or we've potentially made the VM unsafe
>> behind the toolstack's back.
>>
>> If EIBRS != RRSBA, the toolstack has made a mistake.  Equally too for
>> RSBA && EIBRS.
>>
>> I think this is going to take more coffee to solve...
> 
> Actually, no.
> 
> I'm going to delete the hunk modifying recalculate_cpuid(), and move
> this patch back to the meaning it had in v1 which is just "get the
> policies looking correct".

Hmm, and how are you intending to achieve that goal without adjusting
behind the tool stack's back, and without it being an option (yet) to
reject the input? Iirc you agreed with my v1 remark that some fixing
that some fixing up is going to be necessary.

Since as you say (and as I should have realized when writing the earlier
reply) we may never clear RSBA, wouldn't it be an option to clear EIBRS
instead?

> It's still not supported for the toolstack to request ARCH_CAPS (the "a"
> marking), and the safely logic for that can come in a subsequent series
> along with the unit(ish) testing I was already planning to do.

Yet it not being supported doesn't mean we should leave a broken
result when we can fix things up (for the time being, i.e. until we
can report back that we don't like this feature combination).

Jan


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/3] x86/cpu-policy: Derive RSBA/RRSBA for guest policies
  2023-06-02 15:29     ` Andrew Cooper
  2023-06-02 15:38       ` Andrew Cooper
@ 2023-06-05  8:08       ` Jan Beulich
  1 sibling, 0 replies; 15+ messages in thread
From: Jan Beulich @ 2023-06-05  8:08 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 02.06.2023 17:29, Andrew Cooper wrote:
> On 02/06/2023 11:20 am, Jan Beulich wrote:
>> On 01.06.2023 16:48, Andrew Cooper wrote:
>>> @@ -786,6 +825,20 @@ void recalculate_cpuid_policy(struct domain *d)
>>>  
>>>      sanitise_featureset(fs);
>>>  
>>> +    /*
>>> +     * If the host suffers from RSBA of any form, and the guest can see
>>> +     * MSR_ARCH_CAPS, reflect the appropriate RSBA/RRSBA property to the guest
>>> +     * depending on the visibility of eIBRS.
>>> +     */
>>> +    if ( test_bit(X86_FEATURE_ARCH_CAPS, fs) &&
>>> +         (cpu_has_rsba || cpu_has_rrsba) )
>>> +    {
>>> +        bool eibrs = test_bit(X86_FEATURE_EIBRS, fs);
>>> +
>>> +        __set_bit(eibrs ? X86_FEATURE_RRSBA
>>> +                        : X86_FEATURE_RSBA, fs);
>>> +    }
>> Now that we have the same code and comment even a 3rd time, surely this
>> wants to be put in a helper?
> 
> I did consider that, and chose not to in this case.
> 
> One of these is going to disappear again in due course, when we start
> handing errors back to the toolstack instead of fixing up behind it.
> 
> The requirement to be after sanitise_featureset() is critically
> important here for safety, and out-of-lining makes that connection less
> obvious.
> 
> I considered having guest_common_default_late_feature_adjustments(), but
> that name is getting silly and it's already somewhat hard to navigate.

Well, okay then. But may I then please ask that the three instances each
cross-reference one another, so touching one will stand a chance of the
others also being touched?

>> Similarly what about a tool stack (and ultimately admin) request setting
>> both RSBA and RRSBA? Wouldn't we better clear the respectively wrong bit
>> then? People may do this in their guest configs "just to be on the safe
>> side" (once expressing this in guest configs is possible, of course),
>> and due to the max policies having both bits set this also won't occur
>> "automatically".
> 
> The only reason this series doesn't have a final patch turning
> ARCH_CAPS's "a" into "A" is because libxl can't currently operate these
> bits at all, let alone safely.  Roger is kindly looking into that side
> of things.
> 
> It is an error to be modifying bits behind the toolstack's back to start
> with.  We get away with it previously because hiding bits that the
> toolstack thinks the guest saw is goes in the safe direction WRT
> migrate.  But no more with the semantics of RSBA/RRSBA.
> 
> I explicitly don't care about people wanting to set RSBA && RRSBA "just
> in case" - this is too complicated already.  The only non-default thing
> an admin needs to be able to express is +rsba,-eibrs,-rrsba to mean
> "please be compatible with pre-EIBRS hardware".  (In reality, there will
> also need to be some FOO_NO bits taken out too, depending on the CPUs in
> question.)

Not caring about such people would mean documenting very clearly that the
two bits used together is not supported (and then also their respective
invalid combinations with EIBRS). Otherwise I'm afraid "such people" would
likely include one of our bigger customers.

Jan


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 2/3] x86/spec-ctrl: Fix up the RSBA/RRSBA bits as appropriate
  2023-06-05  7:50         ` Jan Beulich
@ 2023-06-05  9:44           ` Andrew Cooper
  0 siblings, 0 replies; 15+ messages in thread
From: Andrew Cooper @ 2023-06-05  9:44 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 05/06/2023 8:50 am, Jan Beulich wrote:
> On 05.06.2023 09:48, Jan Beulich wrote:
>> On 02.06.2023 15:57, Andrew Cooper wrote:
>>> On 02/06/2023 10:56 am, Jan Beulich wrote:
>>>> On 01.06.2023 16:48, Andrew Cooper wrote:
>>>>> @@ -593,15 +596,85 @@ static bool __init retpoline_calculations(void)
>>>>>          return false;
>>>>>  
>>>>>      /*
>>>>> -     * RSBA may be set by a hypervisor to indicate that we may move to a
>>>>> -     * processor which isn't retpoline-safe.
>>>>> +     * The meaning of the RSBA and RRSBA bits have evolved over time.  The
>>>>> +     * agreed upon meaning at the time of writing (May 2023) is thus:
>>>>> +     *
>>>>> +     * - RSBA (RSB Alternative) means that an RSB may fall back to an
>>>>> +     *   alternative predictor on underflow.  Skylake uarch and later all have
>>>>> +     *   this property.  Broadwell too, when running microcode versions prior
>>>>> +     *   to Jan 2018.
>>>>> +     *
>>>>> +     * - All eIBRS-capable processors suffer RSBA, but eIBRS also introduces
>>>>> +     *   tagging of predictions with the mode in which they were learned.  So
>>>>> +     *   when eIBRS is active, RSBA becomes RRSBA (Restricted RSBA).
>>>>> +     *
>>>>> +     * - CPUs are not expected to enumerate both RSBA and RRSBA.
>>>>> +     *
>>>>> +     * Some parts (Broadwell) are not expected to ever enumerate this
>>>>> +     * behaviour directly.  Other parts have differing enumeration with
>>>>> +     * microcode version.  Fix up Xen's idea, so we can advertise them safely
>>>>> +     * to guests, and so toolstacks can level a VM safety for migration.
>>>>> +     *
>>>>> +     * The following states exist:
>>>>> +     *
>>>>> +     * |   | RSBA | EIBRS | RRSBA | Notes              | Action        |
>>>>> +     * |---+------+-------+-------+--------------------+---------------|
>>>>> +     * | 1 |    0 |     0 |     0 | OK (older parts)   | Maybe +RSBA   |
>>>>> +     * | 2 |    0 |     0 |     1 | Broken             | +RSBA, -RRSBA |
>>>>> +     * | 3 |    0 |     1 |     0 | OK (pre-Aug ucode) | +RRSBA        |
>>>>> +     * | 4 |    0 |     1 |     1 | OK                 |               |
>>>>> +     * | 5 |    1 |     0 |     0 | OK                 |               |
>>>>> +     * | 6 |    1 |     0 |     1 | Broken             | -RRSBA        |
>>>>> +     * | 7 |    1 |     1 |     0 | Broken             | -RSBA, +RRSBA |
>>>>> +     * | 8 |    1 |     1 |     1 | Broken             | -RSBA         |
>>>>>       *
>>>>> +     * However, we doesn't need perfect adherence to the spec.  Identify the
>>>>> +     * broken cases (so we stand a chance of spotting violated assumptions),
>>>>> +     * and fix up Rows 1 and 3 so Xen can use RSBA || RRSBA to identify
>>>>> +     * "alternative predictors potentially in use".
>>>> Considering that it's rows 2, 6, 7, and 8 which are broken, I find this
>>>> comment a little misleading. To me it doesn't become clear whether them
>>>> subsequently being left alone (and merely a log message issued) is
>>>> intentional.
>>> It is intentional.
>>>
>>> I don't know if these combinations exist in practice anywhere or not. 
>>> Intel think they oughtn't to, and it's quite possible that the printk()
>>> is unreachable, but given the complexity and shifting meanings over time
>>> here, I think it would be unwise to simply assume this to be true.
>> I agree.
> Thinking of it - would we perhaps want to go a step further an taint the
> system in such a case? I would then view this as kind of "Xen not
> (security) supported on this hardware." Until we manage to fix (or work
> around) the issue.

'S' for out-of-spec seems like it fits the bill.

In fact, that can also be used for the CET vs MSR_SPEC_CTRL cross-check
at the start of init_speculation_mitigations().

~Andrew


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2023-06-05  9:45 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-01 14:48 [PATCH v2 0/3] x86: RSBA and RRSBA handling Andrew Cooper
2023-06-01 14:48 ` [PATCH v2 1/3] x86/spec-ctrl: Rename retpoline_safe() to retpoline_calculations() Andrew Cooper
2023-06-02  9:37   ` Jan Beulich
2023-06-01 14:48 ` [PATCH v2 2/3] x86/spec-ctrl: Fix up the RSBA/RRSBA bits as appropriate Andrew Cooper
2023-06-02  9:56   ` Jan Beulich
2023-06-02 13:57     ` Andrew Cooper
2023-06-05  7:48       ` Jan Beulich
2023-06-05  7:50         ` Jan Beulich
2023-06-05  9:44           ` Andrew Cooper
2023-06-01 14:48 ` [PATCH v2 3/3] x86/cpu-policy: Derive RSBA/RRSBA for guest policies Andrew Cooper
2023-06-02 10:20   ` Jan Beulich
2023-06-02 15:29     ` Andrew Cooper
2023-06-02 15:38       ` Andrew Cooper
2023-06-05  8:04         ` Jan Beulich
2023-06-05  8:08       ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.