All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/6] Support for new CPU model SapphireRapids
@ 2022-10-27  2:00 Wang, Lei
  2022-10-27  2:00 ` [PATCH 1/6] i386: Introduce FeatureWordInfo for AMX CPUID leaf 0x1D and 0x1E Wang, Lei
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Wang, Lei @ 2022-10-27  2:00 UTC (permalink / raw)
  To: pbonzini; +Cc: qemu-devel, dgilbert, berrange, xiaoyao.li, yang.zhong

This series aims to add a new CPU model SapphireRapids, and tries to
address the problem stated in
https://lore.kernel.org/all/20220812055751.14553-1-lei4.wang@intel.com/T/#mcf67dbd1ad37c65d7988c36a2b267be9afd2fb30,
so that named CPU model can define its own AMX values, and QEMU won't
pass the wrong AMX values to KVM in future platforms if they have
different values supported.

The original patch is
https://lore.kernel.org/all/20220812055751.14553-1-lei4.wang@intel.com/T/#u.

Wang, Lei (6):
  i386: Introduce FeatureWordInfo for AMX CPUID leaf 0x1D and 0x1E
  i386: Remove unused parameter "uint32_t bit" in
    feature_word_description()
  i386: Introduce new struct "MultiBitFeatureInfo" for multi-bit
    features
  i386: Mask and report unavailable multi-bit feature values
  i386: Initialize AMX CPUID leaves with corresponding env->features[]
    leaves
  i386: Add new CPU model SapphireRapids

 target/i386/cpu-internal.h |  11 ++
 target/i386/cpu.c          | 314 ++++++++++++++++++++++++++++++++++---
 target/i386/cpu.h          |  18 +++
 3 files changed, 323 insertions(+), 20 deletions(-)

-- 
2.34.1



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/6] i386: Introduce FeatureWordInfo for AMX CPUID leaf 0x1D and 0x1E
  2022-10-27  2:00 [PATCH 0/6] Support for new CPU model SapphireRapids Wang, Lei
@ 2022-10-27  2:00 ` Wang, Lei
  2022-10-27  2:00 ` [PATCH 2/6] i386: Remove unused parameter "uint32_t bit" in feature_word_description() Wang, Lei
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Wang, Lei @ 2022-10-27  2:00 UTC (permalink / raw)
  To: pbonzini; +Cc: qemu-devel, dgilbert, berrange, xiaoyao.li, yang.zhong

CPUID leaf 0x1D and 0x1E enumerate tile and TMUL information for AMX.

Introduce FeatureWord FEAT_1D_1_EAX, FEAT_1D_1_EBX, FEAT_1D_1_ECX and
FEAT_1E_0_EBX. Thus these features of AMX can be expanded when
"-cpu host/max" and can be configured in named CPU model.

Signed-off-by: Wang, Lei <lei4.wang@intel.com>
---
 target/i386/cpu.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++
 target/i386/cpu.h | 12 +++++++++++
 2 files changed, 67 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 8a11470507..e98780773c 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1002,6 +1002,45 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
         },
         .tcg_features = ~0U,
     },
+    [FEAT_1D_1_EAX] = {
+        .type = CPUID_FEATURE_WORD,
+        .cpuid = {
+            .eax = 0x1D,
+            .needs_ecx = true, .ecx = 1,
+            .reg = R_EAX,
+        },
+        .migratable_flags = CPUID_AMX_PALETTE_1_TOTAL_TILE_BYTES_MASK |
+            CPUID_AMX_PALETTE_1_BYTES_PER_TILE_MASK,
+    },
+    [FEAT_1D_1_EBX] = {
+        .type = CPUID_FEATURE_WORD,
+        .cpuid = {
+            .eax = 0x1D,
+            .needs_ecx = true, .ecx = 1,
+            .reg = R_EBX,
+        },
+        .migratable_flags = CPUID_AMX_PALETTE_1_BYTES_PER_ROW_MASK |
+            CPUID_AMX_PALETTE_1_MAX_NAMES_MASK,
+    },
+    [FEAT_1D_1_ECX] = {
+        .type = CPUID_FEATURE_WORD,
+        .cpuid = {
+            .eax = 0x1D,
+            .needs_ecx = true, .ecx = 1,
+            .reg = R_ECX,
+        },
+        .migratable_flags = CPUID_AMX_PALETTE_1_MAX_ROWS_MASK,
+    },
+    [FEAT_1E_0_EBX] = {
+        .type = CPUID_FEATURE_WORD,
+        .cpuid = {
+            .eax = 0x1E,
+            .needs_ecx = true, .ecx = 0,
+            .reg = R_EBX,
+        },
+        .migratable_flags = CPUID_AMX_TMUL_MAX_K_MASK |
+            CPUID_AMX_TMUL_MAX_N_MASK,
+    },
     /*Below are MSR exposed features*/
     [FEAT_ARCH_CAPABILITIES] = {
         .type = MSR_FEATURE_WORD,
@@ -1371,6 +1410,22 @@ static FeatureDep feature_dependencies[] = {
         .from = { FEAT_7_0_EBX,             CPUID_7_0_EBX_INTEL_PT },
         .to = { FEAT_14_0_ECX,              ~0ull },
     },
+    {
+        .from = { FEAT_7_0_EDX,             CPUID_7_0_EDX_AMX_TILE },
+        .to = { FEAT_1D_1_EAX,              ~0ull },
+    },
+    {
+        .from = { FEAT_7_0_EDX,             CPUID_7_0_EDX_AMX_TILE },
+        .to = { FEAT_1D_1_EBX,              ~0ull },
+    },
+    {
+        .from = { FEAT_7_0_EDX,             CPUID_7_0_EDX_AMX_TILE },
+        .to = { FEAT_1D_1_ECX,              ~0ull },
+    },
+    {
+        .from = { FEAT_7_0_EDX,             CPUID_7_0_EDX_AMX_TILE },
+        .to = { FEAT_1E_0_EBX,              ~0ull },
+    },
     {
         .from = { FEAT_8000_0001_EDX,       CPUID_EXT2_RDTSCP },
         .to = { FEAT_VMX_SECONDARY_CTLS,    VMX_SECONDARY_EXEC_RDTSCP },
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 7edf5dfac3..1c90fb6c9d 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -583,6 +583,14 @@ typedef enum X86Seg {
                                  XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \
                                  XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK)
 
+#define CPUID_AMX_PALETTE_1_TOTAL_TILE_BYTES_MASK 0xffffU
+#define CPUID_AMX_PALETTE_1_BYTES_PER_TILE_MASK   (0xffffU << 16)
+#define CPUID_AMX_PALETTE_1_BYTES_PER_ROW_MASK    0xffffU
+#define CPUID_AMX_PALETTE_1_MAX_NAMES_MASK        (0xffffU << 16)
+#define CPUID_AMX_PALETTE_1_MAX_ROWS_MASK         0xffffU
+#define CPUID_AMX_TMUL_MAX_K_MASK                 0xffU
+#define CPUID_AMX_TMUL_MAX_N_MASK                 (0xffffU << 8)
+
 /* CPUID feature words */
 typedef enum FeatureWord {
     FEAT_1_EDX,         /* CPUID[1].EDX */
@@ -603,6 +611,10 @@ typedef enum FeatureWord {
     FEAT_6_EAX,         /* CPUID[6].EAX */
     FEAT_XSAVE_XCR0_LO, /* CPUID[EAX=0xd,ECX=0].EAX */
     FEAT_XSAVE_XCR0_HI, /* CPUID[EAX=0xd,ECX=0].EDX */
+    FEAT_1D_1_EAX,      /* CPUID[EAX=0x1d,ECX=1].EAX */
+    FEAT_1D_1_EBX,      /* CPUID[EAX=0x1d,ECX=1].EBX */
+    FEAT_1D_1_ECX,      /* CPUID[EAX=0x1d,ECX=1].ECX */
+    FEAT_1E_0_EBX,      /* CPUID[EAX=0x1e,ECX=0].EBX */
     FEAT_ARCH_CAPABILITIES,
     FEAT_CORE_CAPABILITY,
     FEAT_PERF_CAPABILITIES,
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/6] i386: Remove unused parameter "uint32_t bit" in feature_word_description()
  2022-10-27  2:00 [PATCH 0/6] Support for new CPU model SapphireRapids Wang, Lei
  2022-10-27  2:00 ` [PATCH 1/6] i386: Introduce FeatureWordInfo for AMX CPUID leaf 0x1D and 0x1E Wang, Lei
@ 2022-10-27  2:00 ` Wang, Lei
  2022-10-27  2:00 ` [PATCH 3/6] i386: Introduce new struct "MultiBitFeatureInfo" for multi-bit features Wang, Lei
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Wang, Lei @ 2022-10-27  2:00 UTC (permalink / raw)
  To: pbonzini; +Cc: qemu-devel, dgilbert, berrange, xiaoyao.li, yang.zhong

Parameter "uint32_t bit" is not used in function feature_word_description(),
so remove it.

Signed-off-by: Wang, Lei <lei4.wang@intel.com>
---
 target/i386/cpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index e98780773c..0083a2a7f7 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -4290,7 +4290,7 @@ static const TypeInfo max_x86_cpu_type_info = {
     .class_init = max_x86_cpu_class_init,
 };
 
-static char *feature_word_description(FeatureWordInfo *f, uint32_t bit)
+static char *feature_word_description(FeatureWordInfo *f)
 {
     assert(f->type == CPUID_FEATURE_WORD || f->type == MSR_FEATURE_WORD);
 
@@ -4329,6 +4329,7 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
     CPUX86State *env = &cpu->env;
     FeatureWordInfo *f = &feature_word_info[w];
     int i;
+    g_autofree char *feat_word_str = feature_word_description(f);
 
     if (!cpu->force_features) {
         env->features[w] &= ~mask;
@@ -4341,7 +4342,6 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
 
     for (i = 0; i < 64; ++i) {
         if ((1ULL << i) & mask) {
-            g_autofree char *feat_word_str = feature_word_description(f, i);
             warn_report("%s: %s%s%s [bit %d]",
                         verbose_prefix,
                         feat_word_str,
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/6] i386: Introduce new struct "MultiBitFeatureInfo" for multi-bit features
  2022-10-27  2:00 [PATCH 0/6] Support for new CPU model SapphireRapids Wang, Lei
  2022-10-27  2:00 ` [PATCH 1/6] i386: Introduce FeatureWordInfo for AMX CPUID leaf 0x1D and 0x1E Wang, Lei
  2022-10-27  2:00 ` [PATCH 2/6] i386: Remove unused parameter "uint32_t bit" in feature_word_description() Wang, Lei
@ 2022-10-27  2:00 ` Wang, Lei
  2022-10-27  2:00 ` [PATCH 4/6] i386: Mask and report unavailable multi-bit feature values Wang, Lei
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Wang, Lei @ 2022-10-27  2:00 UTC (permalink / raw)
  To: pbonzini; +Cc: qemu-devel, dgilbert, berrange, xiaoyao.li, yang.zhong

Some features use multiple CPUID bits to form a value to be used, e.g.,
CPUID(0x1E,0):EBX[23:08] is regarded as the tmul_maxn value for AMX.
Introduce a new struct "MultiBitFeatureInfo" to hold the information for
those features and create a corresponding member in struct FeatureWordInfo,
so that the infomation can be assigned for each item in feature_word_info
array and used in the future.

Signed-off-by: Wang, Lei <lei4.wang@intel.com>
---
 target/i386/cpu-internal.h |  9 +++++++
 target/i386/cpu.c          | 54 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)

diff --git a/target/i386/cpu-internal.h b/target/i386/cpu-internal.h
index 9baac5c0b4..66b3d66cb4 100644
--- a/target/i386/cpu-internal.h
+++ b/target/i386/cpu-internal.h
@@ -25,6 +25,13 @@ typedef enum FeatureWordType {
    MSR_FEATURE_WORD,
 } FeatureWordType;
 
+typedef struct MultiBitFeatureInfo {
+    const char *feat_name;
+    uint64_t mask;
+    unsigned high_bit_position;
+    unsigned low_bit_position;
+} MultiBitFeatureInfo;
+
 typedef struct FeatureWordInfo {
     FeatureWordType type;
     /* feature flags names are taken from "Intel Processor Identification and
@@ -51,6 +58,8 @@ typedef struct FeatureWordInfo {
     uint64_t migratable_flags; /* Feature flags known to be migratable */
     /* Features that shouldn't be auto-enabled by "-cpu host" */
     uint64_t no_autoenable_flags;
+    unsigned num_multi_bit_features;
+    MultiBitFeatureInfo *multi_bit_features;
 } FeatureWordInfo;
 
 extern FeatureWordInfo feature_word_info[];
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 0083a2a7f7..7ae232ab18 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1011,6 +1011,21 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
         },
         .migratable_flags = CPUID_AMX_PALETTE_1_TOTAL_TILE_BYTES_MASK |
             CPUID_AMX_PALETTE_1_BYTES_PER_TILE_MASK,
+        .num_multi_bit_features = 2,
+        .multi_bit_features = (MultiBitFeatureInfo[]){
+            {
+                .feat_name = "total_tile_bytes",
+                .mask = CPUID_AMX_PALETTE_1_TOTAL_TILE_BYTES_MASK,
+                .high_bit_position = 15,
+                .low_bit_position = 0,
+            },
+            {
+                .feat_name = "bytes_per_tile",
+                .mask = CPUID_AMX_PALETTE_1_BYTES_PER_TILE_MASK,
+                .high_bit_position = 31,
+                .low_bit_position = 16,
+            },
+        },
     },
     [FEAT_1D_1_EBX] = {
         .type = CPUID_FEATURE_WORD,
@@ -1021,6 +1036,21 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
         },
         .migratable_flags = CPUID_AMX_PALETTE_1_BYTES_PER_ROW_MASK |
             CPUID_AMX_PALETTE_1_MAX_NAMES_MASK,
+        .num_multi_bit_features = 2,
+        .multi_bit_features = (MultiBitFeatureInfo[]){
+            {
+                .feat_name = "bytes_per_row",
+                .mask = CPUID_AMX_PALETTE_1_BYTES_PER_ROW_MASK,
+                .high_bit_position = 15,
+                .low_bit_position = 0,
+            },
+            {
+                .feat_name = "max_names",
+                .mask = CPUID_AMX_PALETTE_1_MAX_NAMES_MASK,
+                .high_bit_position = 31,
+                .low_bit_position = 16,
+            },
+        },
     },
     [FEAT_1D_1_ECX] = {
         .type = CPUID_FEATURE_WORD,
@@ -1030,6 +1060,15 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
             .reg = R_ECX,
         },
         .migratable_flags = CPUID_AMX_PALETTE_1_MAX_ROWS_MASK,
+        .num_multi_bit_features = 1,
+        .multi_bit_features = (MultiBitFeatureInfo[]){
+            {
+                .feat_name = "max_rows",
+                .mask = CPUID_AMX_PALETTE_1_MAX_ROWS_MASK,
+                .high_bit_position = 15,
+                .low_bit_position = 0,
+            },
+        },
     },
     [FEAT_1E_0_EBX] = {
         .type = CPUID_FEATURE_WORD,
@@ -1040,6 +1079,21 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
         },
         .migratable_flags = CPUID_AMX_TMUL_MAX_K_MASK |
             CPUID_AMX_TMUL_MAX_N_MASK,
+        .num_multi_bit_features = 2,
+        .multi_bit_features = (MultiBitFeatureInfo[]){
+            {
+                .feat_name = "tmul_maxk",
+                .mask = CPUID_AMX_TMUL_MAX_K_MASK,
+                .high_bit_position = 7,
+                .low_bit_position = 0,
+            },
+            {
+                .feat_name = "tmul_maxn",
+                .mask = CPUID_AMX_TMUL_MAX_N_MASK,
+                .high_bit_position = 23,
+                .low_bit_position = 8,
+            },
+        },
     },
     /*Below are MSR exposed features*/
     [FEAT_ARCH_CAPABILITIES] = {
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/6] i386: Mask and report unavailable multi-bit feature values
  2022-10-27  2:00 [PATCH 0/6] Support for new CPU model SapphireRapids Wang, Lei
                   ` (2 preceding siblings ...)
  2022-10-27  2:00 ` [PATCH 3/6] i386: Introduce new struct "MultiBitFeatureInfo" for multi-bit features Wang, Lei
@ 2022-10-27  2:00 ` Wang, Lei
  2022-10-27  2:00 ` [PATCH 5/6] i386: Initialize AMX CPUID leaves with corresponding env->features[] leaves Wang, Lei
  2022-10-27  2:00 ` [PATCH 6/6] i386: Add new CPU model SapphireRapids Wang, Lei
  5 siblings, 0 replies; 7+ messages in thread
From: Wang, Lei @ 2022-10-27  2:00 UTC (permalink / raw)
  To: pbonzini; +Cc: qemu-devel, dgilbert, berrange, xiaoyao.li, yang.zhong

Some feature words, e.g., feature words in AMX-related CPUID leaf 0x1D and
0x1E are not bit-wise but multiple bits represents one value. Handle this
situation when the values specified are not the same as which are reported
by KVM. The handling includes:

 - The responsibility of masking bits and giving warnings are delegated to
   the feature enabler. A framwork is also provided to enable this.
 - To simplify the initialization, a default function is provided if the
   the function is not specified.

The reason why delegating this responsibility rather than just marking
them as zeros when they are not same is because different multi-bit
features may have different logic, which is case by case, for example:

 1. CPUID.0x14_0x1:EBX[15:0]. Even though it's multi-bits field, it's a
    bitmap and each bit represents a separate capability.

 2. CPUID.0x14_0x1:EAX[2:0] represents the number of configurable Address
    Ranges. 3 bits as a whole to represent a integer value. It means the
    maximum capability of HW. If KVM reports M, then M to 0 is legal
    value to configure (because KVM can emulate each value correctly).

 3. CPUID.0x1D_0x1:EAX[31:16] represents palette 1 bytes_per_tile. 16 bits
    as a whole represent an integer value. It's not like case 2 and SW
    needs to configure the same value as reported. Because it's not
    possible for SW to configure to a different value and KVM cannot
    emulate it.

So marking them blindly as zeros is incorrect, and delegating this
responsibility can let each multi-bit feature have its own way to mask bits.

Signed-off-by: Wang, Lei <lei4.wang@intel.com>
---
 target/i386/cpu-internal.h |  2 ++
 target/i386/cpu.c          | 39 ++++++++++++++++++++++++++++++++++----
 target/i386/cpu.h          |  2 ++
 3 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/target/i386/cpu-internal.h b/target/i386/cpu-internal.h
index 66b3d66cb4..f973046b4e 100644
--- a/target/i386/cpu-internal.h
+++ b/target/i386/cpu-internal.h
@@ -30,6 +30,8 @@ typedef struct MultiBitFeatureInfo {
     uint64_t mask;
     unsigned high_bit_position;
     unsigned low_bit_position;
+    void (*mark_unavailable_bits)(X86CPU *cpu, FeatureWord w, int index,
+                                  const char *verbose_prefix);
 } MultiBitFeatureInfo;
 
 typedef struct FeatureWordInfo {
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 7ae232ab18..fc120c0694 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -4377,6 +4377,26 @@ static bool x86_cpu_have_filtered_features(X86CPU *cpu)
     return false;
 }
 
+void mark_unavailable_bits(X86CPU *cpu, FeatureWord w, int index,
+                           const char *verbose_prefix)
+{
+    FeatureWordInfo *f = &feature_word_info[w];
+    g_autofree char *feat_word_str = feature_word_description(f);
+    uint64_t host_feat = x86_cpu_get_supported_feature_word(w, false);
+    MultiBitFeatureInfo mf = f->multi_bit_features[index];
+
+    if ((cpu->env.features[w] ^ host_feat) & mf.mask) {
+        if (!cpu->force_features) {
+            cpu->env.features[w] &= ~mf.mask;
+        }
+        cpu->filtered_features[w] |= mf.mask;
+        if (verbose_prefix)
+            warn_report("%s: %s.%s [%u:%u]", verbose_prefix, feat_word_str,
+                        mf.feat_name, mf.high_bit_position,
+                        mf.low_bit_position);
+    }
+}
+
 static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
                                       const char *verbose_prefix)
 {
@@ -6424,10 +6444,21 @@ static void x86_cpu_filter_features(X86CPU *cpu, bool verbose)
     }
 
     for (w = 0; w < FEATURE_WORDS; w++) {
-        uint64_t host_feat =
-            x86_cpu_get_supported_feature_word(w, false);
-        uint64_t requested_features = env->features[w];
-        uint64_t unavailable_features = requested_features & ~host_feat;
+        uint64_t host_feat = x86_cpu_get_supported_feature_word(w, false);
+        FeatureWordInfo f = feature_word_info[w];
+        uint64_t unavailable_features = env->features[w] & ~host_feat;
+        int i;
+
+        for (i = 0; i < f.num_multi_bit_features; i++) {
+            MultiBitFeatureInfo mf = f.multi_bit_features[i];
+            if (!mf.mark_unavailable_bits) {
+                mf.mark_unavailable_bits = mark_unavailable_bits;
+            }
+            mf.mark_unavailable_bits(cpu, w, i, prefix);
+
+            unavailable_features &= ~mf.mask;
+        }
+
         mark_unavailable_features(cpu, w, unavailable_features, prefix);
     }
 
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 1c90fb6c9d..824a2b0f85 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2103,6 +2103,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
 void cpu_clear_apic_feature(CPUX86State *env);
 void host_cpuid(uint32_t function, uint32_t count,
                 uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
+void mark_unavailable_bits(X86CPU *cpu, FeatureWord w, int index,
+                           const char *verbose_prefix);
 
 /* helper.c */
 void x86_cpu_set_a20(X86CPU *cpu, int a20_state);
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 5/6] i386: Initialize AMX CPUID leaves with corresponding env->features[] leaves
  2022-10-27  2:00 [PATCH 0/6] Support for new CPU model SapphireRapids Wang, Lei
                   ` (3 preceding siblings ...)
  2022-10-27  2:00 ` [PATCH 4/6] i386: Mask and report unavailable multi-bit feature values Wang, Lei
@ 2022-10-27  2:00 ` Wang, Lei
  2022-10-27  2:00 ` [PATCH 6/6] i386: Add new CPU model SapphireRapids Wang, Lei
  5 siblings, 0 replies; 7+ messages in thread
From: Wang, Lei @ 2022-10-27  2:00 UTC (permalink / raw)
  To: pbonzini; +Cc: qemu-devel, dgilbert, berrange, xiaoyao.li, yang.zhong

The AMX-related CPUID value, i.e., CPUID(0x1D,1):EAX, CPUID(0x1D,1):EBX,
CPUID(0x1D,1):ECX and CPUID(0x1E,0):EBX are hard-coded to Sapphire Rapids
without considering future platforms.

Replace these hard-coded values with env->features[], so QEMU can pass the
right value to KVM.

Signed-off-by: Wang, Lei <lei4.wang@intel.com>
---
 target/i386/cpu.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index fc120c0694..21d9529d38 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -576,16 +576,16 @@ static CPUCacheInfo legacy_l3_cache = {
 #define INTEL_PT_PSB_BITMAP      (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */
 
 /* CPUID Leaf 0x1D constants: */
-#define INTEL_AMX_TILE_MAX_SUBLEAF     0x1
-#define INTEL_AMX_TOTAL_TILE_BYTES     0x2000
-#define INTEL_AMX_BYTES_PER_TILE       0x400
-#define INTEL_AMX_BYTES_PER_ROW        0x40
-#define INTEL_AMX_TILE_MAX_NAMES       0x8
-#define INTEL_AMX_TILE_MAX_ROWS        0x10
+#define INTEL_SPR_AMX_TILE_MAX_SUBLEAF     0x1
+#define INTEL_SPR_AMX_TOTAL_TILE_BYTES     0x2000
+#define INTEL_SPR_AMX_BYTES_PER_TILE       0x400
+#define INTEL_SPR_AMX_BYTES_PER_ROW        0x40
+#define INTEL_SPR_AMX_TILE_MAX_NAMES       0x8
+#define INTEL_SPR_AMX_TILE_MAX_ROWS        0x10
 
 /* CPUID Leaf 0x1E constants: */
-#define INTEL_AMX_TMUL_MAX_K           0x10
-#define INTEL_AMX_TMUL_MAX_N           0x40
+#define INTEL_SPR_AMX_TMUL_MAX_K           0x10
+#define INTEL_SPR_AMX_TMUL_MAX_N           0x40
 
 void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
                               uint32_t vendor2, uint32_t vendor3)
@@ -5763,12 +5763,11 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
 
         if (count == 0) {
             /* Highest numbered palette subleaf */
-            *eax = INTEL_AMX_TILE_MAX_SUBLEAF;
+            *eax = INTEL_SPR_AMX_TILE_MAX_SUBLEAF;
         } else if (count == 1) {
-            *eax = INTEL_AMX_TOTAL_TILE_BYTES |
-                   (INTEL_AMX_BYTES_PER_TILE << 16);
-            *ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES << 16);
-            *ecx = INTEL_AMX_TILE_MAX_ROWS;
+            *eax = env->features[FEAT_1D_1_EAX];
+            *ebx = env->features[FEAT_1D_1_EBX];
+            *ecx = env->features[FEAT_1D_1_ECX];
         }
         break;
     }
@@ -5784,7 +5783,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
 
         if (count == 0) {
             /* Highest numbered palette subleaf */
-            *ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8);
+            *ebx = env->features[FEAT_1E_0_EBX];
         }
         break;
     }
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 6/6] i386: Add new CPU model SapphireRapids
  2022-10-27  2:00 [PATCH 0/6] Support for new CPU model SapphireRapids Wang, Lei
                   ` (4 preceding siblings ...)
  2022-10-27  2:00 ` [PATCH 5/6] i386: Initialize AMX CPUID leaves with corresponding env->features[] leaves Wang, Lei
@ 2022-10-27  2:00 ` Wang, Lei
  5 siblings, 0 replies; 7+ messages in thread
From: Wang, Lei @ 2022-10-27  2:00 UTC (permalink / raw)
  To: pbonzini; +Cc: qemu-devel, dgilbert, berrange, xiaoyao.li, yang.zhong

The new CPU model mostly inherits features from Icelake-Server, while
adding new features:
 - AMX (Advance Matrix eXtensions)
 - Bus Lock Debug Exception
and new instructions:
 - AVX VNNI (Vector Neural Network Instruction):
    - VPDPBUS: Multiply and Add Unsigned and Signed Bytes
    - VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation
    - VPDPWSSD: Multiply and Add Signed Word Integers
    - VPDPWSSDS: Multiply and Add Signed Integers with Saturation
 - FP16: Replicates existing AVX512 computational SP (FP32) instructions
   using FP16 instead of FP32 for ~2X performance gain
 - SERIALIZE: Provide software with a simple way to force the processor to
   complete all modifications, faster, allowed in all privilege levels and
   not causing an unconditional VM exit
 - TSX Suspend Load Address Tracking: Allows programmers to choose which
   memory accesses do not need to be tracked in the TSX read set
 - AVX512_BF16: Vector Neural Network Instructions supporting BFLOAT16
   inputs and conversion instructions from IEEE single precision

Features may be added in future versions:
 - CET (virtualization support hasn't been merged)
Instructions may be added in future versions:
 - fast zero-length MOVSB (KVM doesn't support yet)
 - fast short STOSB (KVM doesn't support yet)
 - fast short CMPSB, SCASB (KVM doesn't support yet)

Signed-off-by: Wang, Lei <lei4.wang@intel.com>
Reviewed-by: Robert Hoo <robert.hu@linux.intel.com>
---
 target/i386/cpu.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++
 target/i386/cpu.h |   4 ++
 2 files changed, 139 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 21d9529d38..6bbca600e0 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3576,6 +3576,141 @@ static const X86CPUDefinition builtin_x86_defs[] = {
             { /* end of list */ }
         }
     },
+    {
+        .name = "SapphireRapids",
+        .level = 0x20,
+        .vendor = CPUID_VENDOR_INTEL,
+        .family = 6,
+        .model = 143,
+        .stepping = 4,
+        /*
+         * please keep the ascending order so that we can have a clear view of
+         * bit position of each feature.
+         */
+        .features[FEAT_1_EDX] =
+            CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC |
+            CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
+            CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
+            CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR |
+            CPUID_SSE | CPUID_SSE2,
+        .features[FEAT_1_ECX] =
+            CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 |
+            CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 |
+            CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE |
+            CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES |
+            CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND,
+        .features[FEAT_8000_0001_EDX] =
+            CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB |
+            CPUID_EXT2_RDTSCP | CPUID_EXT2_LM,
+        .features[FEAT_8000_0001_ECX] =
+            CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH,
+        .features[FEAT_8000_0008_EBX] =
+            CPUID_8000_0008_EBX_WBNOINVD,
+        .features[FEAT_7_0_EBX] =
+            CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_HLE |
+            CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 |
+            CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM |
+            CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ |
+            CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP |
+            CPUID_7_0_EBX_AVX512IFMA | CPUID_7_0_EBX_CLFLUSHOPT |
+            CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI |
+            CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL,
+        .features[FEAT_7_0_ECX] =
+            CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU |
+            CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI |
+            CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ |
+            CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG |
+            CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 |
+            CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT,
+        .features[FEAT_7_0_EDX] =
+            CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE |
+            CPUID_7_0_EDX_TSX_LDTRK | CPUID_7_0_EDX_AMX_BF16 |
+            CPUID_7_0_EDX_AVX512_FP16 | CPUID_7_0_EDX_AMX_TILE |
+            CPUID_7_0_EDX_AMX_INT8 | CPUID_7_0_EDX_SPEC_CTRL |
+            CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL_SSBD,
+        .features[FEAT_ARCH_CAPABILITIES] =
+            MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL |
+            MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO |
+            MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO,
+        .features[FEAT_XSAVE] =
+            CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
+            CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES | CPUID_D_1_EAX_XFD,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
+        .features[FEAT_7_1_EAX] =
+            CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16,
+        .features[FEAT_1D_1_EAX] = INTEL_SPR_AMX_TOTAL_TILE_BYTES |
+            (INTEL_SPR_AMX_BYTES_PER_TILE << 16),
+        .features[FEAT_1D_1_EBX] = INTEL_SPR_AMX_BYTES_PER_ROW |
+            (INTEL_SPR_AMX_TILE_MAX_NAMES << 16),
+        .features[FEAT_1D_1_ECX] = INTEL_SPR_AMX_TILE_MAX_ROWS,
+        .features[FEAT_1E_0_EBX] = INTEL_SPR_AMX_TMUL_MAX_K |
+            (INTEL_SPR_AMX_TMUL_MAX_N << 8),
+        .features[FEAT_VMX_BASIC] =
+            MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS,
+        .features[FEAT_VMX_ENTRY_CTLS] =
+            VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE |
+            VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
+            VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER,
+        .features[FEAT_VMX_EPT_VPID_CAPS] =
+            MSR_VMX_EPT_EXECONLY |
+            MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_PAGE_WALK_LENGTH_5 |
+            MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB |
+            MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS |
+            MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT |
+            MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR |
+            MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT |
+            MSR_VMX_EPT_INVVPID_ALL_CONTEXT |
+            MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS,
+        .features[FEAT_VMX_EXIT_CTLS] =
+            VMX_VM_EXIT_SAVE_DEBUG_CONTROLS |
+            VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
+            VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT |
+            VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER |
+            VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER,
+        .features[FEAT_VMX_MISC] =
+            MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT |
+            MSR_VMX_MISC_VMWRITE_VMEXIT,
+        .features[FEAT_VMX_PINBASED_CTLS] =
+            VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING |
+            VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER |
+            VMX_PIN_BASED_POSTED_INTR,
+        .features[FEAT_VMX_PROCBASED_CTLS] =
+            VMX_CPU_BASED_VIRTUAL_INTR_PENDING |
+            VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING |
+            VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING |
+            VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING |
+            VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING |
+            VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING |
+            VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING |
+            VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING |
+            VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG |
+            VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING |
+            VMX_CPU_BASED_PAUSE_EXITING |
+            VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS,
+        .features[FEAT_VMX_SECONDARY_CTLS] =
+            VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+            VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC |
+            VMX_SECONDARY_EXEC_RDTSCP |
+            VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
+            VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING |
+            VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST |
+            VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT |
+            VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
+            VMX_SECONDARY_EXEC_RDRAND_EXITING |
+            VMX_SECONDARY_EXEC_ENABLE_INVPCID |
+            VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS |
+            VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML |
+            VMX_SECONDARY_EXEC_XSAVES,
+        .features[FEAT_VMX_VMFUNC] =
+            MSR_VMX_VMFUNC_EPT_SWITCHING,
+        .xlevel = 0x80000008,
+        .model_id = "Intel Xeon Processor (SapphireRapids)",
+        .versions = (X86CPUVersionDefinition[]) {
+            { .version = 1 },
+            { /* end of list */ },
+        },
+    },
     {
         .name = "Denverton",
         .level = 21,
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 824a2b0f85..4a837cdb4f 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -891,10 +891,14 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define CPUID_7_0_EDX_TSX_LDTRK         (1U << 16)
 /* Architectural LBRs */
 #define CPUID_7_0_EDX_ARCH_LBR          (1U << 19)
+/* AMX_BF16 instruction */
+#define CPUID_7_0_EDX_AMX_BF16          (1U << 22)
 /* AVX512_FP16 instruction */
 #define CPUID_7_0_EDX_AVX512_FP16       (1U << 23)
 /* AMX tile (two-dimensional register) */
 #define CPUID_7_0_EDX_AMX_TILE          (1U << 24)
+/* AMX_INT8 instruction */
+#define CPUID_7_0_EDX_AMX_INT8          (1U << 25)
 /* Speculation Control */
 #define CPUID_7_0_EDX_SPEC_CTRL         (1U << 26)
 /* Single Thread Indirect Branch Predictors */
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-10-27  2:09 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-27  2:00 [PATCH 0/6] Support for new CPU model SapphireRapids Wang, Lei
2022-10-27  2:00 ` [PATCH 1/6] i386: Introduce FeatureWordInfo for AMX CPUID leaf 0x1D and 0x1E Wang, Lei
2022-10-27  2:00 ` [PATCH 2/6] i386: Remove unused parameter "uint32_t bit" in feature_word_description() Wang, Lei
2022-10-27  2:00 ` [PATCH 3/6] i386: Introduce new struct "MultiBitFeatureInfo" for multi-bit features Wang, Lei
2022-10-27  2:00 ` [PATCH 4/6] i386: Mask and report unavailable multi-bit feature values Wang, Lei
2022-10-27  2:00 ` [PATCH 5/6] i386: Initialize AMX CPUID leaves with corresponding env->features[] leaves Wang, Lei
2022-10-27  2:00 ` [PATCH 6/6] i386: Add new CPU model SapphireRapids Wang, Lei

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.