All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Chang S. Bae" <chang.seok.bae@intel.com>
To: linux-kernel@vger.kernel.org
Cc: x86@kernel.org, tglx@linutronix.de, dave.hansen@linux.intel.com,
	arjan@linux.intel.com, ravi.v.shankar@intel.com,
	chang.seok.bae@intel.com
Subject: [PATCH 20/23] x86/fpu/amx: Define AMX state components and have it used for boot-time checks
Date: Thu, 21 Oct 2021 15:55:24 -0700	[thread overview]
Message-ID: <20211021225527.10184-21-chang.seok.bae@intel.com> (raw)
In-Reply-To: <20211021225527.10184-1-chang.seok.bae@intel.com>

The XSTATE initialization uses check_xstate_against_struct() to sanity
check the size of XSTATE-enabled features. AMX is a XSAVE-enabled feature,
and its size is not hard-coded but discoverable at run-time via CPUID.

The AMX state is composed of state components 17 and 18, which are all user
state components. The first component is the XTILECFG state of a 64-byte
tile-related control register. The state component 18, called XTILEDATA,
contains the actual tile data, and the state size varies on
implementations. The architectural maximum, as defined in the CPUID(0x1d,
1): EAX[15:0], is a byte less than 64KB. The first implementation supports
8KB.

Check the XTILEDATA state size dynamically. The feature introduces the new
tile register, TMM. Define one register struct only and read the number of
registers from CPUID. Cross-check the overall size with CPUID again.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
---
Changes from the tglx tree:
* Make AMX_TILE depend on XFD.
* Update the changelog.
---
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/fpu/types.h   | 32 ++++++++++++
 arch/x86/include/asm/fpu/xstate.h  |  2 +
 arch/x86/kernel/fpu/xstate.c       | 80 +++++++++++++++++++++++++++++-
 4 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index ab7b3a2de85d..d5b5f2ab87a0 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -299,6 +299,7 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
 #define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_AMX_TILE		(18*32+24) /* AMX tile Support */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index c956db14b011..5dd2455ac230 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -120,6 +120,9 @@ enum xfeature {
 	XFEATURE_RSRVD_COMP_13,
 	XFEATURE_RSRVD_COMP_14,
 	XFEATURE_LBR,
+	XFEATURE_RSRVD_COMP_16,
+	XFEATURE_XTILE_CFG,
+	XFEATURE_XTILE_DATA,
 
 	XFEATURE_MAX,
 };
@@ -136,12 +139,21 @@ enum xfeature {
 #define XFEATURE_MASK_PKRU		(1 << XFEATURE_PKRU)
 #define XFEATURE_MASK_PASID		(1 << XFEATURE_PASID)
 #define XFEATURE_MASK_LBR		(1 << XFEATURE_LBR)
+#define XFEATURE_MASK_XTILE_CFG		(1 << XFEATURE_XTILE_CFG)
+#define XFEATURE_MASK_XTILE_DATA	(1 << XFEATURE_XTILE_DATA)
 
 #define XFEATURE_MASK_FPSSE		(XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
 #define XFEATURE_MASK_AVX512		(XFEATURE_MASK_OPMASK \
 					 | XFEATURE_MASK_ZMM_Hi256 \
 					 | XFEATURE_MASK_Hi16_ZMM)
 
+#ifdef CONFIG_X86_64
+# define XFEATURE_MASK_XTILE		(XFEATURE_MASK_XTILE_DATA \
+					 | XFEATURE_MASK_XTILE_CFG)
+#else
+# define XFEATURE_MASK_XTILE		(0)
+#endif
+
 #define FIRST_EXTENDED_XFEATURE	XFEATURE_YMM
 
 struct reg_128_bit {
@@ -153,6 +165,9 @@ struct reg_256_bit {
 struct reg_512_bit {
 	u8	regbytes[512/8];
 };
+struct reg_1024_byte {
+	u8	regbytes[1024];
+};
 
 /*
  * State component 2:
@@ -255,6 +270,23 @@ struct arch_lbr_state {
 	u64 ler_to;
 	u64 ler_info;
 	struct lbr_entry		entries[];
+};
+
+/*
+ * State component 17: 64-byte tile configuration register.
+ */
+struct xtile_cfg {
+	u64				tcfg[8];
+} __packed;
+
+/*
+ * State component 18: 1KB tile data register.
+ * Each register represents 16 64-byte rows of the matrix
+ * data. But the number of registers depends on the actual
+ * implementation.
+ */
+struct xtile_data {
+	struct reg_1024_byte		tmm;
 } __packed;
 
 /*
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index d23714132dbb..0c850def4d13 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -14,6 +14,8 @@
 
 #define XSTATE_CPUID		0x0000000d
 
+#define TILE_CPUID		0x0000001d
+
 #define FXSAVE_SIZE	512
 
 #define XSAVE_HDR_SIZE	    64
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index a2e17aca6318..df64b311119e 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -51,6 +51,14 @@ static const char *xfeature_names[] =
 	"Protection Keys User registers",
 	"PASID state",
 	"unknown xstate feature"	,
+	"unknown xstate feature"	,
+	"unknown xstate feature"	,
+	"unknown xstate feature"	,
+	"unknown xstate feature"	,
+	"unknown xstate feature"	,
+	"AMX Tile config"		,
+	"AMX Tile data"			,
+	"unknown xstate feature"	,
 };
 
 static unsigned short xsave_cpuid_features[] __initdata = {
@@ -65,6 +73,8 @@ static unsigned short xsave_cpuid_features[] __initdata = {
 	[XFEATURE_PT_UNIMPLEMENTED_SO_FAR]	= X86_FEATURE_INTEL_PT,
 	[XFEATURE_PKRU]				= X86_FEATURE_PKU,
 	[XFEATURE_PASID]			= X86_FEATURE_ENQCMD,
+	[XFEATURE_XTILE_CFG]			= X86_FEATURE_AMX_TILE,
+	[XFEATURE_XTILE_DATA]			= X86_FEATURE_AMX_TILE,
 };
 
 static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
@@ -240,6 +250,8 @@ static void __init print_xstate_features(void)
 	print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
 	print_xstate_feature(XFEATURE_MASK_PKRU);
 	print_xstate_feature(XFEATURE_MASK_PASID);
+	print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
+	print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
 }
 
 /*
@@ -523,6 +535,67 @@ static void __init __xstate_dump_leaves(void)
 	}								\
 } while (0)
 
+/**
+ * check_xtile_data_against_struct - Check tile data state size.
+ *
+ * Calculate the state size by multiplying the single tile size which is
+ * recorded in a C struct, and the number of tiles that the CPU informs.
+ * Compare the provided size with the calculation.
+ *
+ * @size:	The tile data state size
+ *
+ * Returns:	0 on success, -EINVAL on mismatch.
+ */
+static int __init check_xtile_data_against_struct(int size)
+{
+	u32 max_palid, palid, state_size;
+	u32 eax, ebx, ecx, edx;
+	u16 max_tile;
+
+	/*
+	 * Check the maximum palette id:
+	 *   eax: the highest numbered palette subleaf.
+	 */
+	cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx);
+
+	/*
+	 * Cross-check each tile size and find the maximum number of
+	 * supported tiles.
+	 */
+	for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
+		u16 tile_size, max;
+
+		/*
+		 * Check the tile size info:
+		 *   eax[31:16]:  bytes per title
+		 *   ebx[31:16]:  the max names (or max number of tiles)
+		 */
+		cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx);
+		tile_size = eax >> 16;
+		max = ebx >> 16;
+
+		if (tile_size != sizeof(struct xtile_data)) {
+			pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
+			       __stringify(XFEATURE_XTILE_DATA),
+			       sizeof(struct xtile_data), tile_size);
+			__xstate_dump_leaves();
+			return -EINVAL;
+		}
+
+		if (max > max_tile)
+			max_tile = max;
+	}
+
+	state_size = sizeof(struct xtile_data) * max_tile;
+	if (size != state_size) {
+		pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
+		       __stringify(XFEATURE_XTILE_DATA), state_size, size);
+		__xstate_dump_leaves();
+		return -EINVAL;
+	}
+	return 0;
+}
+
 /*
  * We have a C struct for each 'xstate'.  We need to ensure
  * that our software representation matches what the CPU
@@ -546,6 +619,11 @@ static bool __init check_xstate_against_struct(int nr)
 	XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM,  struct avx_512_hi16_state);
 	XCHECK_SZ(sz, nr, XFEATURE_PKRU,      struct pkru_state);
 	XCHECK_SZ(sz, nr, XFEATURE_PASID,     struct ia32_pasid_state);
+	XCHECK_SZ(sz, nr, XFEATURE_XTILE_CFG, struct xtile_cfg);
+
+	/* The tile data size varies between implementations. */
+	if (nr == XFEATURE_XTILE_DATA)
+		check_xtile_data_against_struct(sz);
 
 	/*
 	 * Make *SURE* to add any feature numbers in below if
@@ -555,7 +633,7 @@ static bool __init check_xstate_against_struct(int nr)
 	if ((nr < XFEATURE_YMM) ||
 	    (nr >= XFEATURE_MAX) ||
 	    (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
-	    ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_LBR))) {
+	    ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_RSRVD_COMP_16))) {
 		WARN_ONCE(1, "no structure for xstate: %d\n", nr);
 		XSTATE_WARN_ON(1);
 		return false;
-- 
2.17.1


  parent reply	other threads:[~2021-10-21 23:06 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-21 22:55 [PATCH 00/23] x86: Support Intel Advanced Matrix Extensions (part 4) Chang S. Bae
2021-10-21 22:55 ` [PATCH 01/23] signal: Add an optional check for altstack size Chang S. Bae
2021-10-22  0:06   ` Bae, Chang Seok
2021-10-22 15:20   ` Eric W. Biederman
2021-10-22 20:58     ` Bae, Chang Seok
2021-10-22 22:51     ` Dave Hansen
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 02/23] x86/signal: Implement sigaltstack size validation Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 03/23] x86/fpu/xstate: Provide xstate_calculate_size() Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 04/23] x86/fpu: Add members to struct fpu to cache permission information Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 05/23] x86/fpu: Add fpu_state_config::legacy_features Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 06/23] x86/arch_prctl: Add controls for dynamic XSTATE components Chang S. Bae
2021-10-24 21:17   ` Borislav Petkov
2021-10-26  9:11     ` [PATCH] Documentation/x86: Add documentation for using dynamic XSTATE features Chang S. Bae
2021-10-26 16:16       ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-28 13:10       ` tip-bot2 for Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] x86/arch_prctl: Add controls for dynamic XSTATE components tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 07/23] x86/fpu: Add basic helpers for dynamically enabled features Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 08/23] x86/signal: Use fpu::__state_user_size for sigalt stack validation Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 09/23] x86/fpu/signal: Prepare for variable sigframe length Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 10/23] x86/fpu: Prepare fpu_clone() for dynamically enabled features Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 11/23] x86/fpu: Reset permission and fpstate on exec() Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 12/23] x86/cpufeatures: Add eXtended Feature Disabling (XFD) feature bit Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 13/23] x86/msr-index: Add MSRs for XFD Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 14/23] x86/fpu: Add XFD state to fpstate Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 15/23] x86/fpu: Add sanity checks for XFD Chang S. Bae
2021-10-25  8:11   ` Borislav Petkov
2021-10-25 20:15     ` Thomas Gleixner
2021-10-25  8:33   ` Mika Penttilä
2021-10-25 18:13     ` Thomas Gleixner
2021-10-25 19:57       ` Dave Hansen
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 16/23] x86/fpu: Update XFD state where required Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 17/23] x86/fpu/xstate: Add XFD #NM handler Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 18/23] x86/fpu/xstate: Add fpstate_realloc()/free() Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 19/23] x86/fpu/xstate: Prepare XSAVE feature table for gaps in state component numbers Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` Chang S. Bae [this message]
2021-10-26 16:16   ` [tip: x86/fpu] x86/fpu/amx: Define AMX state components and have it used for boot-time checks tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 21/23] x86/fpu: Calculate the default sizes independently Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 22/23] x86/fpu: Add XFD handling for dynamic states Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 23/23] x86/fpu/amx: Enable the AMX feature in 64-bit mode Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211021225527.10184-21-chang.seok.bae@intel.com \
    --to=chang.seok.bae@intel.com \
    --cc=arjan@linux.intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ravi.v.shankar@intel.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.