* [PATCH] i386: Add new CPU model SapphireRapids
@ 2022-08-12 5:57 Wang, Lei
2022-09-14 11:21 ` Igor Mammedov
2022-09-21 14:51 ` Dr. David Alan Gilbert
0 siblings, 2 replies; 11+ messages in thread
From: Wang, Lei @ 2022-08-12 5:57 UTC (permalink / raw)
To: pbonzini; +Cc: qemu-devel, robert.hu, xiaoyao.li, chenyi.qiang
The new CPU model mostly inherits features from Icelake-Server, while
adding new features:
- AMX (Advance Matrix eXtensions)
- Bus Lock Debug Exception
and new instructions:
- AVX VNNI (Vector Neural Network Instruction):
- VPDPBUS: Multiply and Add Unsigned and Signed Bytes
- VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation
- VPDPWSSD: Multiply and Add Signed Word Integers
- VPDPWSSDS: Multiply and Add Signed Integers with Saturation
- FP16: Replicates existing AVX512 computational SP (FP32) instructions
using FP16 instead of FP32 for ~2X performance gain
- SERIALIZE: Provide software with a simple way to force the processor to
complete all modifications, faster, allowed in all privilege levels and
not causing an unconditional VM exit
- TSX Suspend Load Address Tracking: Allows programmers to choose which
memory accesses do not need to be tracked in the TSX read set
- AVX512_BF16: Vector Neural Network Instructions supporting BFLOAT16
inputs and conversion instructions from IEEE single precision
Features may be added in future versions:
- CET (virtualization support hasn't been merged)
Instructions may be added in future versions:
- fast zero-length MOVSB (KVM doesn't support yet)
- fast short STOSB (KVM doesn't support yet)
- fast short CMPSB, SCASB (KVM doesn't support yet)
Signed-off-by: Wang, Lei <lei4.wang@intel.com>
Reviewed-by: Robert Hoo <robert.hu@linux.intel.com>
---
target/i386/cpu.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++
target/i386/cpu.h | 4 ++
2 files changed, 132 insertions(+)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 1db1278a59..abb43853d4 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3467,6 +3467,134 @@ static const X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ }
}
},
+ {
+ .name = "SapphireRapids",
+ .level = 0x20,
+ .vendor = CPUID_VENDOR_INTEL,
+ .family = 6,
+ .model = 143,
+ .stepping = 4,
+ /*
+ * please keep the ascending order so that we can have a clear view of
+ * bit position of each feature.
+ */
+ .features[FEAT_1_EDX] =
+ CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC |
+ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
+ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
+ CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR |
+ CPUID_SSE | CPUID_SSE2,
+ .features[FEAT_1_ECX] =
+ CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 |
+ CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 |
+ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE |
+ CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES |
+ CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND,
+ .features[FEAT_8000_0001_EDX] =
+ CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB |
+ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM,
+ .features[FEAT_8000_0001_ECX] =
+ CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH,
+ .features[FEAT_8000_0008_EBX] =
+ CPUID_8000_0008_EBX_WBNOINVD,
+ .features[FEAT_7_0_EBX] =
+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_HLE |
+ CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 |
+ CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM |
+ CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ |
+ CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP |
+ CPUID_7_0_EBX_AVX512IFMA | CPUID_7_0_EBX_CLFLUSHOPT |
+ CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI |
+ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL,
+ .features[FEAT_7_0_ECX] =
+ CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU |
+ CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI |
+ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ |
+ CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG |
+ CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 |
+ CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT,
+ .features[FEAT_7_0_EDX] =
+ CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE |
+ CPUID_7_0_EDX_TSX_LDTRK | CPUID_7_0_EDX_AMX_BF16 |
+ CPUID_7_0_EDX_AVX512_FP16 | CPUID_7_0_EDX_AMX_TILE |
+ CPUID_7_0_EDX_AMX_INT8 | CPUID_7_0_EDX_SPEC_CTRL |
+ CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL_SSBD,
+ .features[FEAT_ARCH_CAPABILITIES] =
+ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL |
+ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO |
+ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO,
+ .features[FEAT_XSAVE] =
+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
+ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES | CPUID_D_1_EAX_XFD,
+ .features[FEAT_6_EAX] =
+ CPUID_6_EAX_ARAT,
+ .features[FEAT_7_1_EAX] =
+ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16,
+ .features[FEAT_VMX_BASIC] =
+ MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS,
+ .features[FEAT_VMX_ENTRY_CTLS] =
+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE |
+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
+ VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER,
+ .features[FEAT_VMX_EPT_VPID_CAPS] =
+ MSR_VMX_EPT_EXECONLY |
+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_PAGE_WALK_LENGTH_5 |
+ MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB |
+ MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS |
+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT |
+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR |
+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT |
+ MSR_VMX_EPT_INVVPID_ALL_CONTEXT |
+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS,
+ .features[FEAT_VMX_EXIT_CTLS] =
+ VMX_VM_EXIT_SAVE_DEBUG_CONTROLS |
+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT |
+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER |
+ VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER,
+ .features[FEAT_VMX_MISC] =
+ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT |
+ MSR_VMX_MISC_VMWRITE_VMEXIT,
+ .features[FEAT_VMX_PINBASED_CTLS] =
+ VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING |
+ VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER |
+ VMX_PIN_BASED_POSTED_INTR,
+ .features[FEAT_VMX_PROCBASED_CTLS] =
+ VMX_CPU_BASED_VIRTUAL_INTR_PENDING |
+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING |
+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING |
+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING |
+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING |
+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING |
+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING |
+ VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING |
+ VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG |
+ VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING |
+ VMX_CPU_BASED_PAUSE_EXITING |
+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS,
+ .features[FEAT_VMX_SECONDARY_CTLS] =
+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC |
+ VMX_SECONDARY_EXEC_RDTSCP |
+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING |
+ VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST |
+ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
+ VMX_SECONDARY_EXEC_RDRAND_EXITING |
+ VMX_SECONDARY_EXEC_ENABLE_INVPCID |
+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS |
+ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML |
+ VMX_SECONDARY_EXEC_XSAVES,
+ .features[FEAT_VMX_VMFUNC] =
+ MSR_VMX_VMFUNC_EPT_SWITCHING,
+ .xlevel = 0x80000008,
+ .model_id = "Intel Xeon Processor (SapphireRapids)",
+ .versions = (X86CPUVersionDefinition[]) {
+ { .version = 1 },
+ { /* end of list */ },
+ },
+ },
{
.name = "Denverton",
.level = 21,
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 82004b65b9..ef3e8a5ed5 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -879,10 +879,14 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
#define CPUID_7_0_EDX_TSX_LDTRK (1U << 16)
/* Architectural LBRs */
#define CPUID_7_0_EDX_ARCH_LBR (1U << 19)
+/* AMX_BF16 instruction */
+#define CPUID_7_0_EDX_AMX_BF16 (1U << 22)
/* AVX512_FP16 instruction */
#define CPUID_7_0_EDX_AVX512_FP16 (1U << 23)
/* AMX tile (two-dimensional register) */
#define CPUID_7_0_EDX_AMX_TILE (1U << 24)
+/* AMX_INT8 instruction */
+#define CPUID_7_0_EDX_AMX_INT8 (1U << 25)
/* Speculation Control */
#define CPUID_7_0_EDX_SPEC_CTRL (1U << 26)
/* Single Thread Indirect Branch Predictors */
--
2.34.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH] i386: Add new CPU model SapphireRapids
2022-08-12 5:57 [PATCH] i386: Add new CPU model SapphireRapids Wang, Lei
@ 2022-09-14 11:21 ` Igor Mammedov
2022-09-21 14:51 ` Dr. David Alan Gilbert
1 sibling, 0 replies; 11+ messages in thread
From: Igor Mammedov @ 2022-09-14 11:21 UTC (permalink / raw)
To: Wang, Lei; +Cc: pbonzini, qemu-devel, robert.hu, xiaoyao.li, chenyi.qiang
On Thu, 11 Aug 2022 22:57:51 -0700
"Wang, Lei" <lei4.wang@intel.com> wrote:
> The new CPU model mostly inherits features from Icelake-Server, while
> adding new features:
> - AMX (Advance Matrix eXtensions)
> - Bus Lock Debug Exception
> and new instructions:
> - AVX VNNI (Vector Neural Network Instruction):
> - VPDPBUS: Multiply and Add Unsigned and Signed Bytes
> - VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation
> - VPDPWSSD: Multiply and Add Signed Word Integers
> - VPDPWSSDS: Multiply and Add Signed Integers with Saturation
> - FP16: Replicates existing AVX512 computational SP (FP32) instructions
> using FP16 instead of FP32 for ~2X performance gain
> - SERIALIZE: Provide software with a simple way to force the processor to
> complete all modifications, faster, allowed in all privilege levels and
> not causing an unconditional VM exit
> - TSX Suspend Load Address Tracking: Allows programmers to choose which
> memory accesses do not need to be tracked in the TSX read set
> - AVX512_BF16: Vector Neural Network Instructions supporting BFLOAT16
> inputs and conversion instructions from IEEE single precision
>
> Features may be added in future versions:
> - CET (virtualization support hasn't been merged)
> Instructions may be added in future versions:
> - fast zero-length MOVSB (KVM doesn't support yet)
> - fast short STOSB (KVM doesn't support yet)
> - fast short CMPSB, SCASB (KVM doesn't support yet)
>
> Signed-off-by: Wang, Lei <lei4.wang@intel.com>
> Reviewed-by: Robert Hoo <robert.hu@linux.intel.com>
looks fine to me,
Acked-by: Igor Mammedov <imammedo@redhat.com>
> ---
> target/i386/cpu.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++
> target/i386/cpu.h | 4 ++
> 2 files changed, 132 insertions(+)
>
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 1db1278a59..abb43853d4 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -3467,6 +3467,134 @@ static const X86CPUDefinition builtin_x86_defs[] = {
> { /* end of list */ }
> }
> },
> + {
> + .name = "SapphireRapids",
> + .level = 0x20,
> + .vendor = CPUID_VENDOR_INTEL,
> + .family = 6,
> + .model = 143,
> + .stepping = 4,
> + /*
> + * please keep the ascending order so that we can have a clear view of
> + * bit position of each feature.
> + */
> + .features[FEAT_1_EDX] =
> + CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC |
> + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
> + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
> + CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR |
> + CPUID_SSE | CPUID_SSE2,
> + .features[FEAT_1_ECX] =
> + CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 |
> + CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 |
> + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE |
> + CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES |
> + CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND,
> + .features[FEAT_8000_0001_EDX] =
> + CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB |
> + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM,
> + .features[FEAT_8000_0001_ECX] =
> + CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH,
> + .features[FEAT_8000_0008_EBX] =
> + CPUID_8000_0008_EBX_WBNOINVD,
> + .features[FEAT_7_0_EBX] =
> + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_HLE |
> + CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 |
> + CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM |
> + CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ |
> + CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP |
> + CPUID_7_0_EBX_AVX512IFMA | CPUID_7_0_EBX_CLFLUSHOPT |
> + CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI |
> + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL,
> + .features[FEAT_7_0_ECX] =
> + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU |
> + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI |
> + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ |
> + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG |
> + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 |
> + CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT,
> + .features[FEAT_7_0_EDX] =
> + CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE |
> + CPUID_7_0_EDX_TSX_LDTRK | CPUID_7_0_EDX_AMX_BF16 |
> + CPUID_7_0_EDX_AVX512_FP16 | CPUID_7_0_EDX_AMX_TILE |
> + CPUID_7_0_EDX_AMX_INT8 | CPUID_7_0_EDX_SPEC_CTRL |
> + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL_SSBD,
> + .features[FEAT_ARCH_CAPABILITIES] =
> + MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL |
> + MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO |
> + MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO,
> + .features[FEAT_XSAVE] =
> + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
> + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES | CPUID_D_1_EAX_XFD,
> + .features[FEAT_6_EAX] =
> + CPUID_6_EAX_ARAT,
> + .features[FEAT_7_1_EAX] =
> + CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16,
> + .features[FEAT_VMX_BASIC] =
> + MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS,
> + .features[FEAT_VMX_ENTRY_CTLS] =
> + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE |
> + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
> + VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER,
> + .features[FEAT_VMX_EPT_VPID_CAPS] =
> + MSR_VMX_EPT_EXECONLY |
> + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_PAGE_WALK_LENGTH_5 |
> + MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB |
> + MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS |
> + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT |
> + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR |
> + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT |
> + MSR_VMX_EPT_INVVPID_ALL_CONTEXT |
> + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS,
> + .features[FEAT_VMX_EXIT_CTLS] =
> + VMX_VM_EXIT_SAVE_DEBUG_CONTROLS |
> + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
> + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT |
> + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER |
> + VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER,
> + .features[FEAT_VMX_MISC] =
> + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT |
> + MSR_VMX_MISC_VMWRITE_VMEXIT,
> + .features[FEAT_VMX_PINBASED_CTLS] =
> + VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING |
> + VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER |
> + VMX_PIN_BASED_POSTED_INTR,
> + .features[FEAT_VMX_PROCBASED_CTLS] =
> + VMX_CPU_BASED_VIRTUAL_INTR_PENDING |
> + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING |
> + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING |
> + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING |
> + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING |
> + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING |
> + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING |
> + VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING |
> + VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG |
> + VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING |
> + VMX_CPU_BASED_PAUSE_EXITING |
> + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS,
> + .features[FEAT_VMX_SECONDARY_CTLS] =
> + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
> + VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC |
> + VMX_SECONDARY_EXEC_RDTSCP |
> + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
> + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING |
> + VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST |
> + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT |
> + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
> + VMX_SECONDARY_EXEC_RDRAND_EXITING |
> + VMX_SECONDARY_EXEC_ENABLE_INVPCID |
> + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS |
> + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML |
> + VMX_SECONDARY_EXEC_XSAVES,
> + .features[FEAT_VMX_VMFUNC] =
> + MSR_VMX_VMFUNC_EPT_SWITCHING,
> + .xlevel = 0x80000008,
> + .model_id = "Intel Xeon Processor (SapphireRapids)",
> + .versions = (X86CPUVersionDefinition[]) {
> + { .version = 1 },
> + { /* end of list */ },
> + },
> + },
> {
> .name = "Denverton",
> .level = 21,
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 82004b65b9..ef3e8a5ed5 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -879,10 +879,14 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
> #define CPUID_7_0_EDX_TSX_LDTRK (1U << 16)
> /* Architectural LBRs */
> #define CPUID_7_0_EDX_ARCH_LBR (1U << 19)
> +/* AMX_BF16 instruction */
> +#define CPUID_7_0_EDX_AMX_BF16 (1U << 22)
> /* AVX512_FP16 instruction */
> #define CPUID_7_0_EDX_AVX512_FP16 (1U << 23)
> /* AMX tile (two-dimensional register) */
> #define CPUID_7_0_EDX_AMX_TILE (1U << 24)
> +/* AMX_INT8 instruction */
> +#define CPUID_7_0_EDX_AMX_INT8 (1U << 25)
> /* Speculation Control */
> #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26)
> /* Single Thread Indirect Branch Predictors */
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] i386: Add new CPU model SapphireRapids
2022-08-12 5:57 [PATCH] i386: Add new CPU model SapphireRapids Wang, Lei
2022-09-14 11:21 ` Igor Mammedov
@ 2022-09-21 14:51 ` Dr. David Alan Gilbert
2022-09-21 15:01 ` Daniel P. Berrangé
` (2 more replies)
1 sibling, 3 replies; 11+ messages in thread
From: Dr. David Alan Gilbert @ 2022-09-21 14:51 UTC (permalink / raw)
To: Wang, Lei, paul.c.lai
Cc: pbonzini, qemu-devel, robert.hu, xiaoyao.li, chenyi.qiang
* Wang, Lei (lei4.wang@intel.com) wrote:
> The new CPU model mostly inherits features from Icelake-Server, while
> adding new features:
> - AMX (Advance Matrix eXtensions)
> - Bus Lock Debug Exception
> and new instructions:
> - AVX VNNI (Vector Neural Network Instruction):
> - VPDPBUS: Multiply and Add Unsigned and Signed Bytes
> - VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation
> - VPDPWSSD: Multiply and Add Signed Word Integers
> - VPDPWSSDS: Multiply and Add Signed Integers with Saturation
> - FP16: Replicates existing AVX512 computational SP (FP32) instructions
> using FP16 instead of FP32 for ~2X performance gain
> - SERIALIZE: Provide software with a simple way to force the processor to
> complete all modifications, faster, allowed in all privilege levels and
> not causing an unconditional VM exit
> - TSX Suspend Load Address Tracking: Allows programmers to choose which
> memory accesses do not need to be tracked in the TSX read set
> - AVX512_BF16: Vector Neural Network Instructions supporting BFLOAT16
> inputs and conversion instructions from IEEE single precision
>
> Features may be added in future versions:
> - CET (virtualization support hasn't been merged)
> Instructions may be added in future versions:
> - fast zero-length MOVSB (KVM doesn't support yet)
> - fast short STOSB (KVM doesn't support yet)
> - fast short CMPSB, SCASB (KVM doesn't support yet)
>
> Signed-off-by: Wang, Lei <lei4.wang@intel.com>
> Reviewed-by: Robert Hoo <robert.hu@linux.intel.com>
Hi,
What fills in the AMX tile and tmul information leafs
(0x1D, 0x1E)?
In particular, how would we make sure when we migrate between two
generations of AMX/Tile/Tmul capable devices with different
register/palette/tmul limits that the migration is tied to the CPU type
correctly?
Would you expect all devices called a 'SappireRapids' to have the same
sizes?
Dave
> ---
> target/i386/cpu.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++
> target/i386/cpu.h | 4 ++
> 2 files changed, 132 insertions(+)
>
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 1db1278a59..abb43853d4 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -3467,6 +3467,134 @@ static const X86CPUDefinition builtin_x86_defs[] = {
> { /* end of list */ }
> }
> },
> + {
> + .name = "SapphireRapids",
> + .level = 0x20,
> + .vendor = CPUID_VENDOR_INTEL,
> + .family = 6,
> + .model = 143,
> + .stepping = 4,
> + /*
> + * please keep the ascending order so that we can have a clear view of
> + * bit position of each feature.
> + */
> + .features[FEAT_1_EDX] =
> + CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC |
> + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
> + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
> + CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR |
> + CPUID_SSE | CPUID_SSE2,
> + .features[FEAT_1_ECX] =
> + CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 |
> + CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 |
> + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE |
> + CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES |
> + CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND,
> + .features[FEAT_8000_0001_EDX] =
> + CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB |
> + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM,
> + .features[FEAT_8000_0001_ECX] =
> + CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH,
> + .features[FEAT_8000_0008_EBX] =
> + CPUID_8000_0008_EBX_WBNOINVD,
> + .features[FEAT_7_0_EBX] =
> + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_HLE |
> + CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 |
> + CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM |
> + CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ |
> + CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP |
> + CPUID_7_0_EBX_AVX512IFMA | CPUID_7_0_EBX_CLFLUSHOPT |
> + CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI |
> + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL,
> + .features[FEAT_7_0_ECX] =
> + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU |
> + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI |
> + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ |
> + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG |
> + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 |
> + CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT,
> + .features[FEAT_7_0_EDX] =
> + CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE |
> + CPUID_7_0_EDX_TSX_LDTRK | CPUID_7_0_EDX_AMX_BF16 |
> + CPUID_7_0_EDX_AVX512_FP16 | CPUID_7_0_EDX_AMX_TILE |
> + CPUID_7_0_EDX_AMX_INT8 | CPUID_7_0_EDX_SPEC_CTRL |
> + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL_SSBD,
> + .features[FEAT_ARCH_CAPABILITIES] =
> + MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL |
> + MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO |
> + MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO,
> + .features[FEAT_XSAVE] =
> + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
> + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES | CPUID_D_1_EAX_XFD,
> + .features[FEAT_6_EAX] =
> + CPUID_6_EAX_ARAT,
> + .features[FEAT_7_1_EAX] =
> + CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16,
> + .features[FEAT_VMX_BASIC] =
> + MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS,
> + .features[FEAT_VMX_ENTRY_CTLS] =
> + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE |
> + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
> + VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER,
> + .features[FEAT_VMX_EPT_VPID_CAPS] =
> + MSR_VMX_EPT_EXECONLY |
> + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_PAGE_WALK_LENGTH_5 |
> + MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB |
> + MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS |
> + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT |
> + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR |
> + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT |
> + MSR_VMX_EPT_INVVPID_ALL_CONTEXT |
> + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS,
> + .features[FEAT_VMX_EXIT_CTLS] =
> + VMX_VM_EXIT_SAVE_DEBUG_CONTROLS |
> + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
> + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT |
> + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER |
> + VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER,
> + .features[FEAT_VMX_MISC] =
> + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT |
> + MSR_VMX_MISC_VMWRITE_VMEXIT,
> + .features[FEAT_VMX_PINBASED_CTLS] =
> + VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING |
> + VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER |
> + VMX_PIN_BASED_POSTED_INTR,
> + .features[FEAT_VMX_PROCBASED_CTLS] =
> + VMX_CPU_BASED_VIRTUAL_INTR_PENDING |
> + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING |
> + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING |
> + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING |
> + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING |
> + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING |
> + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING |
> + VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING |
> + VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG |
> + VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING |
> + VMX_CPU_BASED_PAUSE_EXITING |
> + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS,
> + .features[FEAT_VMX_SECONDARY_CTLS] =
> + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
> + VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC |
> + VMX_SECONDARY_EXEC_RDTSCP |
> + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
> + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING |
> + VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST |
> + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT |
> + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
> + VMX_SECONDARY_EXEC_RDRAND_EXITING |
> + VMX_SECONDARY_EXEC_ENABLE_INVPCID |
> + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS |
> + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML |
> + VMX_SECONDARY_EXEC_XSAVES,
> + .features[FEAT_VMX_VMFUNC] =
> + MSR_VMX_VMFUNC_EPT_SWITCHING,
> + .xlevel = 0x80000008,
> + .model_id = "Intel Xeon Processor (SapphireRapids)",
> + .versions = (X86CPUVersionDefinition[]) {
> + { .version = 1 },
> + { /* end of list */ },
> + },
> + },
> {
> .name = "Denverton",
> .level = 21,
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 82004b65b9..ef3e8a5ed5 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -879,10 +879,14 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
> #define CPUID_7_0_EDX_TSX_LDTRK (1U << 16)
> /* Architectural LBRs */
> #define CPUID_7_0_EDX_ARCH_LBR (1U << 19)
> +/* AMX_BF16 instruction */
> +#define CPUID_7_0_EDX_AMX_BF16 (1U << 22)
> /* AVX512_FP16 instruction */
> #define CPUID_7_0_EDX_AVX512_FP16 (1U << 23)
> /* AMX tile (two-dimensional register) */
> #define CPUID_7_0_EDX_AMX_TILE (1U << 24)
> +/* AMX_INT8 instruction */
> +#define CPUID_7_0_EDX_AMX_INT8 (1U << 25)
> /* Speculation Control */
> #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26)
> /* Single Thread Indirect Branch Predictors */
> --
> 2.34.1
>
>
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] i386: Add new CPU model SapphireRapids
2022-09-21 14:51 ` Dr. David Alan Gilbert
@ 2022-09-21 15:01 ` Daniel P. Berrangé
2022-09-21 15:05 ` Dr. David Alan Gilbert
2022-09-23 13:30 ` Yang Zhong
2023-02-17 1:17 ` Xiaoyao Li
2 siblings, 1 reply; 11+ messages in thread
From: Daniel P. Berrangé @ 2022-09-21 15:01 UTC (permalink / raw)
To: Dr. David Alan Gilbert
Cc: Wang, Lei, paul.c.lai, pbonzini, qemu-devel, robert.hu,
xiaoyao.li, chenyi.qiang
On Wed, Sep 21, 2022 at 03:51:42PM +0100, Dr. David Alan Gilbert wrote:
> * Wang, Lei (lei4.wang@intel.com) wrote:
> > The new CPU model mostly inherits features from Icelake-Server, while
> > adding new features:
> > - AMX (Advance Matrix eXtensions)
> > - Bus Lock Debug Exception
> > and new instructions:
> > - AVX VNNI (Vector Neural Network Instruction):
> > - VPDPBUS: Multiply and Add Unsigned and Signed Bytes
> > - VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation
> > - VPDPWSSD: Multiply and Add Signed Word Integers
> > - VPDPWSSDS: Multiply and Add Signed Integers with Saturation
> > - FP16: Replicates existing AVX512 computational SP (FP32) instructions
> > using FP16 instead of FP32 for ~2X performance gain
> > - SERIALIZE: Provide software with a simple way to force the processor to
> > complete all modifications, faster, allowed in all privilege levels and
> > not causing an unconditional VM exit
> > - TSX Suspend Load Address Tracking: Allows programmers to choose which
> > memory accesses do not need to be tracked in the TSX read set
> > - AVX512_BF16: Vector Neural Network Instructions supporting BFLOAT16
> > inputs and conversion instructions from IEEE single precision
> >
> > Features may be added in future versions:
> > - CET (virtualization support hasn't been merged)
> > Instructions may be added in future versions:
> > - fast zero-length MOVSB (KVM doesn't support yet)
> > - fast short STOSB (KVM doesn't support yet)
> > - fast short CMPSB, SCASB (KVM doesn't support yet)
> >
> > Signed-off-by: Wang, Lei <lei4.wang@intel.com>
> > Reviewed-by: Robert Hoo <robert.hu@linux.intel.com>
>
> Hi,
> What fills in the AMX tile and tmul information leafs
> (0x1D, 0x1E)?
> In particular, how would we make sure when we migrate between two
> generations of AMX/Tile/Tmul capable devices with different
> register/palette/tmul limits that the migration is tied to the CPU type
> correctly?
> Would you expect all devices called a 'SappireRapids' to have the same
> sizes?
We shouldn't assume this will only be used on 'SappireRapids' host
silicon. Thi named CPU model is likely to be used by a guest running
on any host silicon generations that follow SappireRapids too.
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] i386: Add new CPU model SapphireRapids
2022-09-21 15:01 ` Daniel P. Berrangé
@ 2022-09-21 15:05 ` Dr. David Alan Gilbert
0 siblings, 0 replies; 11+ messages in thread
From: Dr. David Alan Gilbert @ 2022-09-21 15:05 UTC (permalink / raw)
To: Daniel P. Berrangé
Cc: Wang, Lei, paul.c.lai, pbonzini, qemu-devel, robert.hu,
xiaoyao.li, chenyi.qiang
* Daniel P. Berrangé (berrange@redhat.com) wrote:
> On Wed, Sep 21, 2022 at 03:51:42PM +0100, Dr. David Alan Gilbert wrote:
> > * Wang, Lei (lei4.wang@intel.com) wrote:
> > > The new CPU model mostly inherits features from Icelake-Server, while
> > > adding new features:
> > > - AMX (Advance Matrix eXtensions)
> > > - Bus Lock Debug Exception
> > > and new instructions:
> > > - AVX VNNI (Vector Neural Network Instruction):
> > > - VPDPBUS: Multiply and Add Unsigned and Signed Bytes
> > > - VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation
> > > - VPDPWSSD: Multiply and Add Signed Word Integers
> > > - VPDPWSSDS: Multiply and Add Signed Integers with Saturation
> > > - FP16: Replicates existing AVX512 computational SP (FP32) instructions
> > > using FP16 instead of FP32 for ~2X performance gain
> > > - SERIALIZE: Provide software with a simple way to force the processor to
> > > complete all modifications, faster, allowed in all privilege levels and
> > > not causing an unconditional VM exit
> > > - TSX Suspend Load Address Tracking: Allows programmers to choose which
> > > memory accesses do not need to be tracked in the TSX read set
> > > - AVX512_BF16: Vector Neural Network Instructions supporting BFLOAT16
> > > inputs and conversion instructions from IEEE single precision
> > >
> > > Features may be added in future versions:
> > > - CET (virtualization support hasn't been merged)
> > > Instructions may be added in future versions:
> > > - fast zero-length MOVSB (KVM doesn't support yet)
> > > - fast short STOSB (KVM doesn't support yet)
> > > - fast short CMPSB, SCASB (KVM doesn't support yet)
> > >
> > > Signed-off-by: Wang, Lei <lei4.wang@intel.com>
> > > Reviewed-by: Robert Hoo <robert.hu@linux.intel.com>
> >
> > Hi,
> > What fills in the AMX tile and tmul information leafs
> > (0x1D, 0x1E)?
> > In particular, how would we make sure when we migrate between two
> > generations of AMX/Tile/Tmul capable devices with different
> > register/palette/tmul limits that the migration is tied to the CPU type
> > correctly?
> > Would you expect all devices called a 'SappireRapids' to have the same
> > sizes?
>
> We shouldn't assume this will only be used on 'SappireRapids' host
> silicon. Thi named CPU model is likely to be used by a guest running
> on any host silicon generations that follow SappireRapids too.
Indeed, but I wanted to check the opposite question first; whether
all SappireRapids had the same sizes; I think you're asking the opposite
question.
Dave
> With regards,
> Daniel
> --
> |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o- https://fstop138.berrange.com :|
> |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
>
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] i386: Add new CPU model SapphireRapids
2022-09-21 14:51 ` Dr. David Alan Gilbert
2022-09-21 15:01 ` Daniel P. Berrangé
@ 2022-09-23 13:30 ` Yang Zhong
2022-09-23 16:01 ` Xiaoyao Li
2023-02-17 1:17 ` Xiaoyao Li
2 siblings, 1 reply; 11+ messages in thread
From: Yang Zhong @ 2022-09-23 13:30 UTC (permalink / raw)
To: Dr. David Alan Gilbert
Cc: Wang, Lei, paul.c.lai, pbonzini, qemu-devel, robert.hu,
xiaoyao.li, chenyi.qiang, yang.zhong
On Wed, Sep 21, 2022 at 03:51:42PM +0100, Dr. David Alan Gilbert wrote:
> * Wang, Lei (lei4.wang@intel.com) wrote:
> > The new CPU model mostly inherits features from Icelake-Server, while
> > adding new features:
> > - AMX (Advance Matrix eXtensions)
> > - Bus Lock Debug Exception
> > and new instructions:
> > - AVX VNNI (Vector Neural Network Instruction):
> > - VPDPBUS: Multiply and Add Unsigned and Signed Bytes
> > - VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation
> > - VPDPWSSD: Multiply and Add Signed Word Integers
> > - VPDPWSSDS: Multiply and Add Signed Integers with Saturation
> > - FP16: Replicates existing AVX512 computational SP (FP32) instructions
> > using FP16 instead of FP32 for ~2X performance gain
> > - SERIALIZE: Provide software with a simple way to force the processor to
> > complete all modifications, faster, allowed in all privilege levels and
> > not causing an unconditional VM exit
> > - TSX Suspend Load Address Tracking: Allows programmers to choose which
> > memory accesses do not need to be tracked in the TSX read set
> > - AVX512_BF16: Vector Neural Network Instructions supporting BFLOAT16
> > inputs and conversion instructions from IEEE single precision
> >
> > Features may be added in future versions:
> > - CET (virtualization support hasn't been merged)
> > Instructions may be added in future versions:
> > - fast zero-length MOVSB (KVM doesn't support yet)
> > - fast short STOSB (KVM doesn't support yet)
> > - fast short CMPSB, SCASB (KVM doesn't support yet)
> >
> > Signed-off-by: Wang, Lei <lei4.wang@intel.com>
> > Reviewed-by: Robert Hoo <robert.hu@linux.intel.com>
>
> Hi,
> What fills in the AMX tile and tmul information leafs
> (0x1D, 0x1E)?
> In particular, how would we make sure when we migrate between two
> generations of AMX/Tile/Tmul capable devices with different
> register/palette/tmul limits that the migration is tied to the CPU type
> correctly?
> Would you expect all devices called a 'SappireRapids' to have the same
> sizes?
>
There is only one palette in current design. This palette include 8
tiles. Those two CPUID leafs defined bytes_per_tile, total_tile_bytes,
max_rows and etc, the AMX tool will configure those values into TILECFG with
ldtilecfg instrcutions. Once tiles are configured, we can use
tileload instruction to load data into those tiles.
We did migration between two SappireRapids with amx self test tool
(tools/testing/selftests/x86/amx.c)started in two sides, the migration
work well.
As for SappireRapids and more newer cpu types, those two CPUID leafs
definitions are all same on AMX. So, on AMX perspective, the migration
should be workable on subsequent cpu types. thanks!
Yang
> Dave
>
> > ---
> > target/i386/cpu.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++
> > target/i386/cpu.h | 4 ++
> > 2 files changed, 132 insertions(+)
> >
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 1db1278a59..abb43853d4 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -3467,6 +3467,134 @@ static const X86CPUDefinition builtin_x86_defs[] = {
> > { /* end of list */ }
> > }
> > },
> > + {
> > + .name = "SapphireRapids",
> > + .level = 0x20,
> > + .vendor = CPUID_VENDOR_INTEL,
> > + .family = 6,
> > + .model = 143,
> > + .stepping = 4,
> > + /*
> > + * please keep the ascending order so that we can have a clear view of
> > + * bit position of each feature.
> > + */
> > + .features[FEAT_1_EDX] =
> > + CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC |
> > + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
> > + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
> > + CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR |
> > + CPUID_SSE | CPUID_SSE2,
> > + .features[FEAT_1_ECX] =
> > + CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 |
> > + CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 |
> > + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE |
> > + CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES |
> > + CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND,
> > + .features[FEAT_8000_0001_EDX] =
> > + CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB |
> > + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM,
> > + .features[FEAT_8000_0001_ECX] =
> > + CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH,
> > + .features[FEAT_8000_0008_EBX] =
> > + CPUID_8000_0008_EBX_WBNOINVD,
> > + .features[FEAT_7_0_EBX] =
> > + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_HLE |
> > + CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 |
> > + CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM |
> > + CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ |
> > + CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP |
> > + CPUID_7_0_EBX_AVX512IFMA | CPUID_7_0_EBX_CLFLUSHOPT |
> > + CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI |
> > + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL,
> > + .features[FEAT_7_0_ECX] =
> > + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU |
> > + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI |
> > + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ |
> > + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG |
> > + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 |
> > + CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT,
> > + .features[FEAT_7_0_EDX] =
> > + CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE |
> > + CPUID_7_0_EDX_TSX_LDTRK | CPUID_7_0_EDX_AMX_BF16 |
> > + CPUID_7_0_EDX_AVX512_FP16 | CPUID_7_0_EDX_AMX_TILE |
> > + CPUID_7_0_EDX_AMX_INT8 | CPUID_7_0_EDX_SPEC_CTRL |
> > + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL_SSBD,
> > + .features[FEAT_ARCH_CAPABILITIES] =
> > + MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL |
> > + MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO |
> > + MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO,
> > + .features[FEAT_XSAVE] =
> > + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
> > + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES | CPUID_D_1_EAX_XFD,
> > + .features[FEAT_6_EAX] =
> > + CPUID_6_EAX_ARAT,
> > + .features[FEAT_7_1_EAX] =
> > + CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16,
> > + .features[FEAT_VMX_BASIC] =
> > + MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS,
> > + .features[FEAT_VMX_ENTRY_CTLS] =
> > + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE |
> > + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
> > + VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER,
> > + .features[FEAT_VMX_EPT_VPID_CAPS] =
> > + MSR_VMX_EPT_EXECONLY |
> > + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_PAGE_WALK_LENGTH_5 |
> > + MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB |
> > + MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS |
> > + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT |
> > + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR |
> > + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT |
> > + MSR_VMX_EPT_INVVPID_ALL_CONTEXT |
> > + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS,
> > + .features[FEAT_VMX_EXIT_CTLS] =
> > + VMX_VM_EXIT_SAVE_DEBUG_CONTROLS |
> > + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
> > + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT |
> > + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER |
> > + VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER,
> > + .features[FEAT_VMX_MISC] =
> > + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT |
> > + MSR_VMX_MISC_VMWRITE_VMEXIT,
> > + .features[FEAT_VMX_PINBASED_CTLS] =
> > + VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING |
> > + VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER |
> > + VMX_PIN_BASED_POSTED_INTR,
> > + .features[FEAT_VMX_PROCBASED_CTLS] =
> > + VMX_CPU_BASED_VIRTUAL_INTR_PENDING |
> > + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING |
> > + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING |
> > + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING |
> > + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING |
> > + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING |
> > + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING |
> > + VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING |
> > + VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG |
> > + VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING |
> > + VMX_CPU_BASED_PAUSE_EXITING |
> > + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS,
> > + .features[FEAT_VMX_SECONDARY_CTLS] =
> > + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
> > + VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC |
> > + VMX_SECONDARY_EXEC_RDTSCP |
> > + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
> > + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING |
> > + VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST |
> > + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT |
> > + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
> > + VMX_SECONDARY_EXEC_RDRAND_EXITING |
> > + VMX_SECONDARY_EXEC_ENABLE_INVPCID |
> > + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS |
> > + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML |
> > + VMX_SECONDARY_EXEC_XSAVES,
> > + .features[FEAT_VMX_VMFUNC] =
> > + MSR_VMX_VMFUNC_EPT_SWITCHING,
> > + .xlevel = 0x80000008,
> > + .model_id = "Intel Xeon Processor (SapphireRapids)",
> > + .versions = (X86CPUVersionDefinition[]) {
> > + { .version = 1 },
> > + { /* end of list */ },
> > + },
> > + },
> > {
> > .name = "Denverton",
> > .level = 21,
> > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > index 82004b65b9..ef3e8a5ed5 100644
> > --- a/target/i386/cpu.h
> > +++ b/target/i386/cpu.h
> > @@ -879,10 +879,14 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
> > #define CPUID_7_0_EDX_TSX_LDTRK (1U << 16)
> > /* Architectural LBRs */
> > #define CPUID_7_0_EDX_ARCH_LBR (1U << 19)
> > +/* AMX_BF16 instruction */
> > +#define CPUID_7_0_EDX_AMX_BF16 (1U << 22)
> > /* AVX512_FP16 instruction */
> > #define CPUID_7_0_EDX_AVX512_FP16 (1U << 23)
> > /* AMX tile (two-dimensional register) */
> > #define CPUID_7_0_EDX_AMX_TILE (1U << 24)
> > +/* AMX_INT8 instruction */
> > +#define CPUID_7_0_EDX_AMX_INT8 (1U << 25)
> > /* Speculation Control */
> > #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26)
> > /* Single Thread Indirect Branch Predictors */
> > --
> > 2.34.1
> >
> >
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] i386: Add new CPU model SapphireRapids
2022-09-23 13:30 ` Yang Zhong
@ 2022-09-23 16:01 ` Xiaoyao Li
2022-09-26 7:42 ` Yang Zhong
0 siblings, 1 reply; 11+ messages in thread
From: Xiaoyao Li @ 2022-09-23 16:01 UTC (permalink / raw)
To: Yang Zhong, Dr. David Alan Gilbert
Cc: Wang, Lei, paul.c.lai, pbonzini, qemu-devel, robert.hu, chenyi.qiang
On 9/23/2022 9:30 PM, Yang Zhong wrote:
> On Wed, Sep 21, 2022 at 03:51:42PM +0100, Dr. David Alan Gilbert wrote:
>> * Wang, Lei (lei4.wang@intel.com) wrote:
>>> The new CPU model mostly inherits features from Icelake-Server, while
>>> adding new features:
>>> - AMX (Advance Matrix eXtensions)
>>> - Bus Lock Debug Exception
>>> and new instructions:
>>> - AVX VNNI (Vector Neural Network Instruction):
>>> - VPDPBUS: Multiply and Add Unsigned and Signed Bytes
>>> - VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation
>>> - VPDPWSSD: Multiply and Add Signed Word Integers
>>> - VPDPWSSDS: Multiply and Add Signed Integers with Saturation
>>> - FP16: Replicates existing AVX512 computational SP (FP32) instructions
>>> using FP16 instead of FP32 for ~2X performance gain
>>> - SERIALIZE: Provide software with a simple way to force the processor to
>>> complete all modifications, faster, allowed in all privilege levels and
>>> not causing an unconditional VM exit
>>> - TSX Suspend Load Address Tracking: Allows programmers to choose which
>>> memory accesses do not need to be tracked in the TSX read set
>>> - AVX512_BF16: Vector Neural Network Instructions supporting BFLOAT16
>>> inputs and conversion instructions from IEEE single precision
>>>
>>> Features may be added in future versions:
>>> - CET (virtualization support hasn't been merged)
>>> Instructions may be added in future versions:
>>> - fast zero-length MOVSB (KVM doesn't support yet)
>>> - fast short STOSB (KVM doesn't support yet)
>>> - fast short CMPSB, SCASB (KVM doesn't support yet)
>>>
>>> Signed-off-by: Wang, Lei <lei4.wang@intel.com>
>>> Reviewed-by: Robert Hoo <robert.hu@linux.intel.com>
>>
>> Hi,
>> What fills in the AMX tile and tmul information leafs
>> (0x1D, 0x1E)?
>
>> In particular, how would we make sure when we migrate between two
>> generations of AMX/Tile/Tmul capable devices with different
>> register/palette/tmul limits that the migration is tied to the CPU type
>> correctly?
>> Would you expect all devices called a 'SappireRapids' to have the same
>> sizes?
>>
>
> There is only one palette in current design. This palette include 8
> tiles. Those two CPUID leafs defined bytes_per_tile, total_tile_bytes,
> max_rows and etc, the AMX tool will configure those values into TILECFG with
> ldtilecfg instrcutions. Once tiles are configured, we can use
> tileload instruction to load data into those tiles.
>
> We did migration between two SappireRapids with amx self test tool
> (tools/testing/selftests/x86/amx.c)started in two sides, the migration
> work well.
>
> As for SappireRapids and more newer cpu types, those two CPUID leafs
> definitions are all same on AMX.
I'm not sure what definitions mean here. Are you saying the CPUID values
of leaf 0x1D and 0x1E won't change for any future Intel Silicion?
Personally, I doubt it. And we shouldn't take such assumption unless
Intel states it SDM.
> So, on AMX perspective, the migration
> should be workable on subsequent cpu types. thanks!
I think what Dave worried is that when migrating one VM created with
"SapphireRapids" model on SPR machine to some newer platform in the
future, where the newer platform reports different value on CPUID leaves
0x1D and 0x1E than SPR platform.
I think we need to contain CPUID leaves 0x1D and 0x1E into CPU model as
well. Otherwise we will hit the same as Intel PT that SPR reports less
capabilities that ICX.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] i386: Add new CPU model SapphireRapids
2022-09-23 16:01 ` Xiaoyao Li
@ 2022-09-26 7:42 ` Yang Zhong
2022-09-26 8:51 ` Dr. David Alan Gilbert
0 siblings, 1 reply; 11+ messages in thread
From: Yang Zhong @ 2022-09-26 7:42 UTC (permalink / raw)
To: Xiaoyao Li
Cc: Dr. David Alan Gilbert, Wang, Lei, paul.c.lai, pbonzini,
qemu-devel, robert.hu, chenyi.qiang, yang.zhong
On Sat, Sep 24, 2022 at 12:01:16AM +0800, Xiaoyao Li wrote:
> On 9/23/2022 9:30 PM, Yang Zhong wrote:
> > On Wed, Sep 21, 2022 at 03:51:42PM +0100, Dr. David Alan Gilbert wrote:
> > > * Wang, Lei (lei4.wang@intel.com) wrote:
> > > > The new CPU model mostly inherits features from Icelake-Server, while
> > > > adding new features:
> > > > - AMX (Advance Matrix eXtensions)
> > > > - Bus Lock Debug Exception
> > > > and new instructions:
> > > > - AVX VNNI (Vector Neural Network Instruction):
> > > > - VPDPBUS: Multiply and Add Unsigned and Signed Bytes
> > > > - VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation
> > > > - VPDPWSSD: Multiply and Add Signed Word Integers
> > > > - VPDPWSSDS: Multiply and Add Signed Integers with Saturation
> > > > - FP16: Replicates existing AVX512 computational SP (FP32) instructions
> > > > using FP16 instead of FP32 for ~2X performance gain
> > > > - SERIALIZE: Provide software with a simple way to force the processor to
> > > > complete all modifications, faster, allowed in all privilege levels and
> > > > not causing an unconditional VM exit
> > > > - TSX Suspend Load Address Tracking: Allows programmers to choose which
> > > > memory accesses do not need to be tracked in the TSX read set
> > > > - AVX512_BF16: Vector Neural Network Instructions supporting BFLOAT16
> > > > inputs and conversion instructions from IEEE single precision
> > > >
> > > > Features may be added in future versions:
> > > > - CET (virtualization support hasn't been merged)
> > > > Instructions may be added in future versions:
> > > > - fast zero-length MOVSB (KVM doesn't support yet)
> > > > - fast short STOSB (KVM doesn't support yet)
> > > > - fast short CMPSB, SCASB (KVM doesn't support yet)
> > > >
> > > > Signed-off-by: Wang, Lei <lei4.wang@intel.com>
> > > > Reviewed-by: Robert Hoo <robert.hu@linux.intel.com>
> > >
> > > Hi,
> > > What fills in the AMX tile and tmul information leafs
> > > (0x1D, 0x1E)?
> > > In particular, how would we make sure when we migrate between two
> > > generations of AMX/Tile/Tmul capable devices with different
> > > register/palette/tmul limits that the migration is tied to the CPU type
> > > correctly?
> > > Would you expect all devices called a 'SappireRapids' to have the same
> > > sizes?
> > >
> >
> > There is only one palette in current design. This palette include 8
> > tiles. Those two CPUID leafs defined bytes_per_tile, total_tile_bytes,
> > max_rows and etc, the AMX tool will configure those values into TILECFG with
> > ldtilecfg instrcutions. Once tiles are configured, we can use
> > tileload instruction to load data into those tiles.
> >
> > We did migration between two SappireRapids with amx self test tool
> > (tools/testing/selftests/x86/amx.c)started in two sides, the migration
> > work well.
> >
> > As for SappireRapids and more newer cpu types, those two CPUID leafs
> > definitions are all same on AMX.
>
> I'm not sure what definitions mean here. Are you saying the CPUID values of
> leaf 0x1D and 0x1E won't change for any future Intel Silicion?
>
> Personally, I doubt it. And we shouldn't take such assumption unless Intel
> states it SDM.
The current 0x1D and 0x1E definitions as below:
/* CPUID Leaf 0x1D constants: */
#define INTEL_AMX_TILE_MAX_SUBLEAF 0x1
#define INTEL_AMX_TOTAL_TILE_BYTES 0x2000
#define INTEL_AMX_BYTES_PER_TILE 0x400
#define INTEL_AMX_BYTES_PER_ROW 0x40
#define INTEL_AMX_TILE_MAX_NAMES 0x8
#define INTEL_AMX_TILE_MAX_ROWS 0x10
/* CPUID Leaf 0x1E constants: */
#define INTEL_AMX_TMUL_MAX_K 0x10
#define INTEL_AMX_TMUL_MAX_N 0x40
These values are defined from SDM, and from the new developping CPU,
these values are still same with SappireRapids. thanks!
Yang
>
> > So, on AMX perspective, the migration
> > should be workable on subsequent cpu types. thanks!
>
> I think what Dave worried is that when migrating one VM created with
> "SapphireRapids" model on SPR machine to some newer platform in the future,
> where the newer platform reports different value on CPUID leaves 0x1D and
> 0x1E than SPR platform.
>
> I think we need to contain CPUID leaves 0x1D and 0x1E into CPU model as
> well. Otherwise we will hit the same as Intel PT that SPR reports less
> capabilities that ICX.
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] i386: Add new CPU model SapphireRapids
2022-09-26 7:42 ` Yang Zhong
@ 2022-09-26 8:51 ` Dr. David Alan Gilbert
2022-09-28 8:12 ` Yang Zhong
0 siblings, 1 reply; 11+ messages in thread
From: Dr. David Alan Gilbert @ 2022-09-26 8:51 UTC (permalink / raw)
To: Yang Zhong
Cc: Xiaoyao Li, Wang, Lei, paul.c.lai, pbonzini, qemu-devel,
robert.hu, chenyi.qiang
* Yang Zhong (yang.zhong@linux.intel.com) wrote:
> On Sat, Sep 24, 2022 at 12:01:16AM +0800, Xiaoyao Li wrote:
> > On 9/23/2022 9:30 PM, Yang Zhong wrote:
> > > On Wed, Sep 21, 2022 at 03:51:42PM +0100, Dr. David Alan Gilbert wrote:
> > > > * Wang, Lei (lei4.wang@intel.com) wrote:
> > > > > The new CPU model mostly inherits features from Icelake-Server, while
> > > > > adding new features:
> > > > > - AMX (Advance Matrix eXtensions)
> > > > > - Bus Lock Debug Exception
> > > > > and new instructions:
> > > > > - AVX VNNI (Vector Neural Network Instruction):
> > > > > - VPDPBUS: Multiply and Add Unsigned and Signed Bytes
> > > > > - VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation
> > > > > - VPDPWSSD: Multiply and Add Signed Word Integers
> > > > > - VPDPWSSDS: Multiply and Add Signed Integers with Saturation
> > > > > - FP16: Replicates existing AVX512 computational SP (FP32) instructions
> > > > > using FP16 instead of FP32 for ~2X performance gain
> > > > > - SERIALIZE: Provide software with a simple way to force the processor to
> > > > > complete all modifications, faster, allowed in all privilege levels and
> > > > > not causing an unconditional VM exit
> > > > > - TSX Suspend Load Address Tracking: Allows programmers to choose which
> > > > > memory accesses do not need to be tracked in the TSX read set
> > > > > - AVX512_BF16: Vector Neural Network Instructions supporting BFLOAT16
> > > > > inputs and conversion instructions from IEEE single precision
> > > > >
> > > > > Features may be added in future versions:
> > > > > - CET (virtualization support hasn't been merged)
> > > > > Instructions may be added in future versions:
> > > > > - fast zero-length MOVSB (KVM doesn't support yet)
> > > > > - fast short STOSB (KVM doesn't support yet)
> > > > > - fast short CMPSB, SCASB (KVM doesn't support yet)
> > > > >
> > > > > Signed-off-by: Wang, Lei <lei4.wang@intel.com>
> > > > > Reviewed-by: Robert Hoo <robert.hu@linux.intel.com>
> > > >
> > > > Hi,
> > > > What fills in the AMX tile and tmul information leafs
> > > > (0x1D, 0x1E)?
> > > > In particular, how would we make sure when we migrate between two
> > > > generations of AMX/Tile/Tmul capable devices with different
> > > > register/palette/tmul limits that the migration is tied to the CPU type
> > > > correctly?
> > > > Would you expect all devices called a 'SappireRapids' to have the same
> > > > sizes?
> > > >
> > >
> > > There is only one palette in current design. This palette include 8
> > > tiles. Those two CPUID leafs defined bytes_per_tile, total_tile_bytes,
> > > max_rows and etc, the AMX tool will configure those values into TILECFG with
> > > ldtilecfg instrcutions. Once tiles are configured, we can use
> > > tileload instruction to load data into those tiles.
> > >
> > > We did migration between two SappireRapids with amx self test tool
> > > (tools/testing/selftests/x86/amx.c)started in two sides, the migration
> > > work well.
> > >
> > > As for SappireRapids and more newer cpu types, those two CPUID leafs
> > > definitions are all same on AMX.
> >
> > I'm not sure what definitions mean here. Are you saying the CPUID values of
> > leaf 0x1D and 0x1E won't change for any future Intel Silicion?
> >
> > Personally, I doubt it. And we shouldn't take such assumption unless Intel
> > states it SDM.
>
> The current 0x1D and 0x1E definitions as below:
>
> /* CPUID Leaf 0x1D constants: */
> #define INTEL_AMX_TILE_MAX_SUBLEAF 0x1
> #define INTEL_AMX_TOTAL_TILE_BYTES 0x2000
> #define INTEL_AMX_BYTES_PER_TILE 0x400
> #define INTEL_AMX_BYTES_PER_ROW 0x40
> #define INTEL_AMX_TILE_MAX_NAMES 0x8
> #define INTEL_AMX_TILE_MAX_ROWS 0x10
>
> /* CPUID Leaf 0x1E constants: */
> #define INTEL_AMX_TMUL_MAX_K 0x10
> #define INTEL_AMX_TMUL_MAX_N 0x40
>
> These values are defined from SDM, and from the new developping CPU,
> these values are still same with SappireRapids. thanks!
But there's nothing stopping them increasing in future versions ?
Dave
> Yang
> >
> > > So, on AMX perspective, the migration
> > > should be workable on subsequent cpu types. thanks!
> >
> > I think what Dave worried is that when migrating one VM created with
> > "SapphireRapids" model on SPR machine to some newer platform in the future,
> > where the newer platform reports different value on CPUID leaves 0x1D and
> > 0x1E than SPR platform.
> >
> > I think we need to contain CPUID leaves 0x1D and 0x1E into CPU model as
> > well. Otherwise we will hit the same as Intel PT that SPR reports less
> > capabilities that ICX.
> >
>
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] i386: Add new CPU model SapphireRapids
2022-09-26 8:51 ` Dr. David Alan Gilbert
@ 2022-09-28 8:12 ` Yang Zhong
0 siblings, 0 replies; 11+ messages in thread
From: Yang Zhong @ 2022-09-28 8:12 UTC (permalink / raw)
To: Dr. David Alan Gilbert
Cc: Xiaoyao Li, Wang, Lei, paul.c.lai, pbonzini, qemu-devel,
robert.hu, chenyi.qiang, yang.zhong
On Mon, Sep 26, 2022 at 09:51:13AM +0100, Dr. David Alan Gilbert wrote:
> * Yang Zhong (yang.zhong@linux.intel.com) wrote:
> > On Sat, Sep 24, 2022 at 12:01:16AM +0800, Xiaoyao Li wrote:
> > > On 9/23/2022 9:30 PM, Yang Zhong wrote:
> > > > On Wed, Sep 21, 2022 at 03:51:42PM +0100, Dr. David Alan Gilbert wrote:
> > > > > * Wang, Lei (lei4.wang@intel.com) wrote:
> > > > > > The new CPU model mostly inherits features from Icelake-Server, while
> > > > > > adding new features:
> > > > > > - AMX (Advance Matrix eXtensions)
> > > > > > - Bus Lock Debug Exception
> > > > > > and new instructions:
> > > > > > - AVX VNNI (Vector Neural Network Instruction):
> > > > > > - VPDPBUS: Multiply and Add Unsigned and Signed Bytes
> > > > > > - VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation
> > > > > > - VPDPWSSD: Multiply and Add Signed Word Integers
> > > > > > - VPDPWSSDS: Multiply and Add Signed Integers with Saturation
> > > > > > - FP16: Replicates existing AVX512 computational SP (FP32) instructions
> > > > > > using FP16 instead of FP32 for ~2X performance gain
> > > > > > - SERIALIZE: Provide software with a simple way to force the processor to
> > > > > > complete all modifications, faster, allowed in all privilege levels and
> > > > > > not causing an unconditional VM exit
> > > > > > - TSX Suspend Load Address Tracking: Allows programmers to choose which
> > > > > > memory accesses do not need to be tracked in the TSX read set
> > > > > > - AVX512_BF16: Vector Neural Network Instructions supporting BFLOAT16
> > > > > > inputs and conversion instructions from IEEE single precision
> > > > > >
> > > > > > Features may be added in future versions:
> > > > > > - CET (virtualization support hasn't been merged)
> > > > > > Instructions may be added in future versions:
> > > > > > - fast zero-length MOVSB (KVM doesn't support yet)
> > > > > > - fast short STOSB (KVM doesn't support yet)
> > > > > > - fast short CMPSB, SCASB (KVM doesn't support yet)
> > > > > >
> > > > > > Signed-off-by: Wang, Lei <lei4.wang@intel.com>
> > > > > > Reviewed-by: Robert Hoo <robert.hu@linux.intel.com>
> > > > >
> > > > > Hi,
> > > > > What fills in the AMX tile and tmul information leafs
> > > > > (0x1D, 0x1E)?
> > > > > In particular, how would we make sure when we migrate between two
> > > > > generations of AMX/Tile/Tmul capable devices with different
> > > > > register/palette/tmul limits that the migration is tied to the CPU type
> > > > > correctly?
> > > > > Would you expect all devices called a 'SappireRapids' to have the same
> > > > > sizes?
> > > > >
> > > >
> > > > There is only one palette in current design. This palette include 8
> > > > tiles. Those two CPUID leafs defined bytes_per_tile, total_tile_bytes,
> > > > max_rows and etc, the AMX tool will configure those values into TILECFG with
> > > > ldtilecfg instrcutions. Once tiles are configured, we can use
> > > > tileload instruction to load data into those tiles.
> > > >
> > > > We did migration between two SappireRapids with amx self test tool
> > > > (tools/testing/selftests/x86/amx.c)started in two sides, the migration
> > > > work well.
> > > >
> > > > As for SappireRapids and more newer cpu types, those two CPUID leafs
> > > > definitions are all same on AMX.
> > >
> > > I'm not sure what definitions mean here. Are you saying the CPUID values of
> > > leaf 0x1D and 0x1E won't change for any future Intel Silicion?
> > >
> > > Personally, I doubt it. And we shouldn't take such assumption unless Intel
> > > states it SDM.
> >
> > The current 0x1D and 0x1E definitions as below:
> >
> > /* CPUID Leaf 0x1D constants: */
> > #define INTEL_AMX_TILE_MAX_SUBLEAF 0x1
> > #define INTEL_AMX_TOTAL_TILE_BYTES 0x2000
> > #define INTEL_AMX_BYTES_PER_TILE 0x400
> > #define INTEL_AMX_BYTES_PER_ROW 0x40
> > #define INTEL_AMX_TILE_MAX_NAMES 0x8
> > #define INTEL_AMX_TILE_MAX_ROWS 0x10
> >
> > /* CPUID Leaf 0x1E constants: */
> > #define INTEL_AMX_TMUL_MAX_K 0x10
> > #define INTEL_AMX_TMUL_MAX_N 0x40
> >
> > These values are defined from SDM, and from the new developping CPU,
> > these values are still same with SappireRapids. thanks!
>
> But there's nothing stopping them increasing in future versions ?
>
Okay, thanks! We will add these CPUID leafs in this cpu model.
Yang
> Dave
>
> > Yang
> > >
> > > > So, on AMX perspective, the migration
> > > > should be workable on subsequent cpu types. thanks!
> > >
> > > I think what Dave worried is that when migrating one VM created with
> > > "SapphireRapids" model on SPR machine to some newer platform in the future,
> > > where the newer platform reports different value on CPUID leaves 0x1D and
> > > 0x1E than SPR platform.
> > >
> > > I think we need to contain CPUID leaves 0x1D and 0x1E into CPU model as
> > > well. Otherwise we will hit the same as Intel PT that SPR reports less
> > > capabilities that ICX.
> > >
> >
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] i386: Add new CPU model SapphireRapids
2022-09-21 14:51 ` Dr. David Alan Gilbert
2022-09-21 15:01 ` Daniel P. Berrangé
2022-09-23 13:30 ` Yang Zhong
@ 2023-02-17 1:17 ` Xiaoyao Li
2 siblings, 0 replies; 11+ messages in thread
From: Xiaoyao Li @ 2023-02-17 1:17 UTC (permalink / raw)
To: Dr. David Alan Gilbert, Wang, Lei, paul.c.lai, Paolo Bonzini
Cc: qemu-devel, robert.hu, chenyi.qiang
On 9/21/2022 10:51 PM, Dr. David Alan Gilbert wrote:
> * Wang, Lei (lei4.wang@intel.com) wrote:
>> The new CPU model mostly inherits features from Icelake-Server, while
>> adding new features:
>> - AMX (Advance Matrix eXtensions)
>> - Bus Lock Debug Exception
>> and new instructions:
>> - AVX VNNI (Vector Neural Network Instruction):
>> - VPDPBUS: Multiply and Add Unsigned and Signed Bytes
>> - VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation
>> - VPDPWSSD: Multiply and Add Signed Word Integers
>> - VPDPWSSDS: Multiply and Add Signed Integers with Saturation
>> - FP16: Replicates existing AVX512 computational SP (FP32) instructions
>> using FP16 instead of FP32 for ~2X performance gain
>> - SERIALIZE: Provide software with a simple way to force the processor to
>> complete all modifications, faster, allowed in all privilege levels and
>> not causing an unconditional VM exit
>> - TSX Suspend Load Address Tracking: Allows programmers to choose which
>> memory accesses do not need to be tracked in the TSX read set
>> - AVX512_BF16: Vector Neural Network Instructions supporting BFLOAT16
>> inputs and conversion instructions from IEEE single precision
>>
>> Features may be added in future versions:
>> - CET (virtualization support hasn't been merged)
>> Instructions may be added in future versions:
>> - fast zero-length MOVSB (KVM doesn't support yet)
>> - fast short STOSB (KVM doesn't support yet)
>> - fast short CMPSB, SCASB (KVM doesn't support yet)
>>
>> Signed-off-by: Wang, Lei <lei4.wang@intel.com>
>> Reviewed-by: Robert Hoo <robert.hu@linux.intel.com>
>
> Hi,
> What fills in the AMX tile and tmul information leafs
> (0x1D, 0x1E)?
Current QEMU hard-codes the value of AMX tile and tmul information leafs
(0x1D, 0x1E) if AMX is exposed to guest. In cpu_x86_cpuid(),
target/i386/cpu.c
case 0x1D: {
/* AMX TILE */
*eax = 0;
*ebx = 0;
*ecx = 0;
*edx = 0;
if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
break;
}
if (count == 0) {
/* Highest numbered palette subleaf */
*eax = INTEL_AMX_TILE_MAX_SUBLEAF;
} else if (count == 1) {
*eax = INTEL_AMX_TOTAL_TILE_BYTES |
(INTEL_AMX_BYTES_PER_TILE << 16);
*ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES
<< 16);
*ecx = INTEL_AMX_TILE_MAX_ROWS;
}
break;
}
case 0x1E: {
/* AMX TMUL */
*eax = 0;
*ebx = 0;
*ecx = 0;
*edx = 0;
if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
break;
}
if (count == 0) {
/* Highest numbered palette subleaf */
*ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8);
}
break;
}
> In particular, how would we make sure when we migrate between two
> generations of AMX/Tile/Tmul capable devices with different
> register/palette/tmul limits that the migration is tied to the CPU type
> correctly?
Since they'are hard-coded. The value of guest never change no matter
what HW is.
> Would you expect all devices called a 'SappireRapids' to have the same
> sizes?
I suppose here the devices you mean HW platform? If so, Intel commits
that palette 1 value (CPUID leaf 0x1d, subleaf 0x1) will never change.
And TMUL capability (CPUID leaf 0x1e) are constant for a few generations
after SPR.
But this has no impact on migration safety as long as CPUID value of
leaf 0x1d and 0x1e are tied to a named CPU model and doesn't vary on
different hosts. And current QEMU code satisfy it since the values are
hard-coded.
So, IMHO, it seems OK to define AMX in SPR cpu model with current QEMU
as this patch does. Although hard-coded value of 0x1E seems to have
potential issue if far future product reports smaller value (Intel only
can commit it doesn't change for a few generations so far), it's another
thing we can handle separately for AMX.
Dave and Paolo, what do you think?
> Dave
>
>> ---
>> target/i386/cpu.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++
>> target/i386/cpu.h | 4 ++
>> 2 files changed, 132 insertions(+)
>>
>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
>> index 1db1278a59..abb43853d4 100644
>> --- a/target/i386/cpu.c
>> +++ b/target/i386/cpu.c
>> @@ -3467,6 +3467,134 @@ static const X86CPUDefinition builtin_x86_defs[] = {
>> { /* end of list */ }
>> }
>> },
>> + {
>> + .name = "SapphireRapids",
>> + .level = 0x20,
>> + .vendor = CPUID_VENDOR_INTEL,
>> + .family = 6,
>> + .model = 143,
>> + .stepping = 4,
>> + /*
>> + * please keep the ascending order so that we can have a clear view of
>> + * bit position of each feature.
>> + */
>> + .features[FEAT_1_EDX] =
>> + CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC |
>> + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
>> + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
>> + CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR |
>> + CPUID_SSE | CPUID_SSE2,
>> + .features[FEAT_1_ECX] =
>> + CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 |
>> + CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 |
>> + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE |
>> + CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES |
>> + CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND,
>> + .features[FEAT_8000_0001_EDX] =
>> + CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB |
>> + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM,
>> + .features[FEAT_8000_0001_ECX] =
>> + CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH,
>> + .features[FEAT_8000_0008_EBX] =
>> + CPUID_8000_0008_EBX_WBNOINVD,
>> + .features[FEAT_7_0_EBX] =
>> + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_HLE |
>> + CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 |
>> + CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM |
>> + CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ |
>> + CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP |
>> + CPUID_7_0_EBX_AVX512IFMA | CPUID_7_0_EBX_CLFLUSHOPT |
>> + CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI |
>> + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL,
>> + .features[FEAT_7_0_ECX] =
>> + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU |
>> + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI |
>> + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ |
>> + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG |
>> + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 |
>> + CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT,
>> + .features[FEAT_7_0_EDX] =
>> + CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE |
>> + CPUID_7_0_EDX_TSX_LDTRK | CPUID_7_0_EDX_AMX_BF16 |
>> + CPUID_7_0_EDX_AVX512_FP16 | CPUID_7_0_EDX_AMX_TILE |
>> + CPUID_7_0_EDX_AMX_INT8 | CPUID_7_0_EDX_SPEC_CTRL |
>> + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL_SSBD,
>> + .features[FEAT_ARCH_CAPABILITIES] =
>> + MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL |
>> + MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO |
>> + MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO,
>> + .features[FEAT_XSAVE] =
>> + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
>> + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES | CPUID_D_1_EAX_XFD,
>> + .features[FEAT_6_EAX] =
>> + CPUID_6_EAX_ARAT,
>> + .features[FEAT_7_1_EAX] =
>> + CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16,
>> + .features[FEAT_VMX_BASIC] =
>> + MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS,
>> + .features[FEAT_VMX_ENTRY_CTLS] =
>> + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE |
>> + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
>> + VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER,
>> + .features[FEAT_VMX_EPT_VPID_CAPS] =
>> + MSR_VMX_EPT_EXECONLY |
>> + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_PAGE_WALK_LENGTH_5 |
>> + MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB |
>> + MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS |
>> + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT |
>> + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR |
>> + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT |
>> + MSR_VMX_EPT_INVVPID_ALL_CONTEXT |
>> + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS,
>> + .features[FEAT_VMX_EXIT_CTLS] =
>> + VMX_VM_EXIT_SAVE_DEBUG_CONTROLS |
>> + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
>> + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT |
>> + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER |
>> + VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER,
>> + .features[FEAT_VMX_MISC] =
>> + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT |
>> + MSR_VMX_MISC_VMWRITE_VMEXIT,
>> + .features[FEAT_VMX_PINBASED_CTLS] =
>> + VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING |
>> + VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER |
>> + VMX_PIN_BASED_POSTED_INTR,
>> + .features[FEAT_VMX_PROCBASED_CTLS] =
>> + VMX_CPU_BASED_VIRTUAL_INTR_PENDING |
>> + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING |
>> + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING |
>> + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING |
>> + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING |
>> + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING |
>> + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING |
>> + VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING |
>> + VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG |
>> + VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING |
>> + VMX_CPU_BASED_PAUSE_EXITING |
>> + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS,
>> + .features[FEAT_VMX_SECONDARY_CTLS] =
>> + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
>> + VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC |
>> + VMX_SECONDARY_EXEC_RDTSCP |
>> + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
>> + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING |
>> + VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST |
>> + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT |
>> + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
>> + VMX_SECONDARY_EXEC_RDRAND_EXITING |
>> + VMX_SECONDARY_EXEC_ENABLE_INVPCID |
>> + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS |
>> + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML |
>> + VMX_SECONDARY_EXEC_XSAVES,
>> + .features[FEAT_VMX_VMFUNC] =
>> + MSR_VMX_VMFUNC_EPT_SWITCHING,
>> + .xlevel = 0x80000008,
>> + .model_id = "Intel Xeon Processor (SapphireRapids)",
>> + .versions = (X86CPUVersionDefinition[]) {
>> + { .version = 1 },
>> + { /* end of list */ },
>> + },
>> + },
>> {
>> .name = "Denverton",
>> .level = 21,
>> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
>> index 82004b65b9..ef3e8a5ed5 100644
>> --- a/target/i386/cpu.h
>> +++ b/target/i386/cpu.h
>> @@ -879,10 +879,14 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
>> #define CPUID_7_0_EDX_TSX_LDTRK (1U << 16)
>> /* Architectural LBRs */
>> #define CPUID_7_0_EDX_ARCH_LBR (1U << 19)
>> +/* AMX_BF16 instruction */
>> +#define CPUID_7_0_EDX_AMX_BF16 (1U << 22)
>> /* AVX512_FP16 instruction */
>> #define CPUID_7_0_EDX_AVX512_FP16 (1U << 23)
>> /* AMX tile (two-dimensional register) */
>> #define CPUID_7_0_EDX_AMX_TILE (1U << 24)
>> +/* AMX_INT8 instruction */
>> +#define CPUID_7_0_EDX_AMX_INT8 (1U << 25)
>> /* Speculation Control */
>> #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26)
>> /* Single Thread Indirect Branch Predictors */
>> --
>> 2.34.1
>>
>>
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2023-02-17 1:19 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-12 5:57 [PATCH] i386: Add new CPU model SapphireRapids Wang, Lei
2022-09-14 11:21 ` Igor Mammedov
2022-09-21 14:51 ` Dr. David Alan Gilbert
2022-09-21 15:01 ` Daniel P. Berrangé
2022-09-21 15:05 ` Dr. David Alan Gilbert
2022-09-23 13:30 ` Yang Zhong
2022-09-23 16:01 ` Xiaoyao Li
2022-09-26 7:42 ` Yang Zhong
2022-09-26 8:51 ` Dr. David Alan Gilbert
2022-09-28 8:12 ` Yang Zhong
2023-02-17 1:17 ` Xiaoyao Li
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.