linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Chang S. Bae" <chang.seok.bae@intel.com>
To: bp@suse.de, luto@kernel.org, tglx@linutronix.de,
	mingo@kernel.org, x86@kernel.org
Cc: len.brown@intel.com, dave.hansen@intel.com,
	thiago.macieira@intel.com, jing2.liu@intel.com,
	ravi.v.shankar@intel.com, linux-kernel@vger.kernel.org,
	chang.seok.bae@intel.com
Subject: [PATCH v7 19/26] x86/fpu/amx: Define AMX state components and have it used for boot-time checks
Date: Sat, 10 Jul 2021 06:03:06 -0700	[thread overview]
Message-ID: <20210710130313.5072-20-chang.seok.bae@intel.com> (raw)
In-Reply-To: <20210710130313.5072-1-chang.seok.bae@intel.com>

Linux uses check_xstate_against_struct() to sanity check the size of
XSTATE-enabled features. AMX is the XSAVE-enabled feature, and its size is
not hard-coded but discoverable at run-time via CPUID.

The AMX state is composed of state components 17 and 18, which are all user
state components. The first component is the XTILECFG state of a 64-byte
tile-related control register. The state component 18, called XTILEDATA,
contains the actual tile data, and the state size varies on
implementations. The architectural maximum, as defined in the CPUID(0x1d,
1): EAX[15:0], is a byte less than 64KB. The first implementation supports
8KB.

Check the XTILEDATA state size dynamically. The feature introduces the new
tile register, TMM. Define one register struct only and read the number of
registers from CPUID. Cross-check the overall size with CPUID again.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
---
Changes from v4:
* Changed to return an error when tile data size mismatches. (Thomas Gleixner)
* Updated the function description and code comments.

Changes from v2:
* Updated the code comments.

Changes from v1:
* Rebased on the upstream kernel (5.10)
---
 arch/x86/include/asm/fpu/types.h  | 27 +++++++++++
 arch/x86/include/asm/fpu/xstate.h |  2 +
 arch/x86/kernel/fpu/xstate.c      | 78 +++++++++++++++++++++++++++++++
 3 files changed, 107 insertions(+)

diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 03160a1a79ad..f24b58b606dc 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -120,6 +120,9 @@ enum xfeature {
 	XFEATURE_RSRVD_COMP_13,
 	XFEATURE_RSRVD_COMP_14,
 	XFEATURE_LBR,
+	XFEATURE_RSRVD_COMP_16,
+	XFEATURE_XTILE_CFG,
+	XFEATURE_XTILE_DATA,
 
 	XFEATURE_MAX,
 };
@@ -136,11 +139,15 @@ enum xfeature {
 #define XFEATURE_MASK_PKRU		(1 << XFEATURE_PKRU)
 #define XFEATURE_MASK_PASID		(1 << XFEATURE_PASID)
 #define XFEATURE_MASK_LBR		(1 << XFEATURE_LBR)
+#define XFEATURE_MASK_XTILE_CFG	(1 << XFEATURE_XTILE_CFG)
+#define XFEATURE_MASK_XTILE_DATA	(1 << XFEATURE_XTILE_DATA)
 
 #define XFEATURE_MASK_FPSSE		(XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
 #define XFEATURE_MASK_AVX512		(XFEATURE_MASK_OPMASK \
 					 | XFEATURE_MASK_ZMM_Hi256 \
 					 | XFEATURE_MASK_Hi16_ZMM)
+#define XFEATURE_MASK_XTILE		(XFEATURE_MASK_XTILE_DATA \
+					 | XFEATURE_MASK_XTILE_CFG)
 
 #define FIRST_EXTENDED_XFEATURE	XFEATURE_YMM
 
@@ -153,6 +160,9 @@ struct reg_256_bit {
 struct reg_512_bit {
 	u8	regbytes[512/8];
 };
+struct reg_1024_byte {
+	u8	regbytes[1024];
+};
 
 /*
  * State component 2:
@@ -255,6 +265,23 @@ struct arch_lbr_state {
 	u64 ler_to;
 	u64 ler_info;
 	struct lbr_entry		entries[];
+};
+
+/*
+ * State component 17: 64-byte tile configuration register.
+ */
+struct xtile_cfg {
+	u64				tcfg[8];
+} __packed;
+
+/*
+ * State component 18: 1KB tile data register.
+ * Each register represents 16 64-byte rows of the matrix
+ * data. But the number of registers depends on the actual
+ * implementation.
+ */
+struct xtile_data {
+	struct reg_1024_byte		tmm;
 } __packed;
 
 /*
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 6051fd6a749f..dbf0db1df9bf 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -14,6 +14,8 @@
 
 #define XSTATE_CPUID		0x0000000d
 
+#define TILE_CPUID		0x0000001d
+
 #define FXSAVE_SIZE	512
 
 #define XSAVE_HDR_SIZE	    64
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index eda70bdca45c..e148f1202410 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -41,6 +41,14 @@ static const char *xfeature_names[] =
 	"Protection Keys User registers",
 	"PASID state",
 	"unknown xstate feature"	,
+	"unknown xstate feature"	,
+	"unknown xstate feature"	,
+	"unknown xstate feature"	,
+	"unknown xstate feature"	,
+	"unknown xstate feature"	,
+	"AMX Tile config"		,
+	"AMX Tile data"			,
+	"unknown xstate feature"	,
 };
 
 static unsigned short xsave_cpuid_features[] __initdata = {
@@ -54,6 +62,8 @@ static unsigned short xsave_cpuid_features[] __initdata = {
 	[XFEATURE_PT_UNIMPLEMENTED_SO_FAR]	= X86_FEATURE_INTEL_PT,
 	[XFEATURE_PKRU]				= X86_FEATURE_PKU,
 	[XFEATURE_PASID]			= X86_FEATURE_ENQCMD,
+	[XFEATURE_XTILE_CFG]			= X86_FEATURE_AMX_TILE,
+	[XFEATURE_XTILE_DATA]			= X86_FEATURE_AMX_TILE,
 };
 
 /*
@@ -389,6 +399,8 @@ static void __init print_xstate_features(void)
 	print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
 	print_xstate_feature(XFEATURE_MASK_PKRU);
 	print_xstate_feature(XFEATURE_MASK_PASID);
+	print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
+	print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
 }
 
 /*
@@ -662,6 +674,67 @@ static void __xstate_dump_leaves(void)
 	}								\
 } while (0)
 
+/**
+ * check_xtile_data_against_struct() - Check tile data state size.
+ *
+ * Calculate the state size by multiplying the single tile size which is
+ * recorded in a C struct, and the number of tiles that the CPU informs.
+ * Compare the provided size with the calculation.
+ *
+ * @size:	The tile data state size
+ *
+ * Returns:	0 on success, -EINVAL on mismatch.
+ */
+static int check_xtile_data_against_struct(int size)
+{
+	u32 max_palid, palid, state_size;
+	u32 eax, ebx, ecx, edx;
+	u16 max_tile;
+
+	/*
+	 * Check the maximum palette id:
+	 *   eax: the highest numbered palette subleaf.
+	 */
+	cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx);
+
+	/*
+	 * Cross-check each tile size and find the maximum number of
+	 * supported tiles.
+	 */
+	for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
+		u16 tile_size, max;
+
+		/*
+		 * Check the tile size info:
+		 *   eax[31:16]:  bytes per title
+		 *   ebx[31:16]:  the max names (or max number of tiles)
+		 */
+		cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx);
+		tile_size = eax >> 16;
+		max = ebx >> 16;
+
+		if (tile_size != sizeof(struct xtile_data)) {
+			pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
+			       __stringify(XFEATURE_XTILE_DATA),
+			       sizeof(struct xtile_data), tile_size);
+			__xstate_dump_leaves();
+			return -EINVAL;
+		}
+
+		if (max > max_tile)
+			max_tile = max;
+	}
+
+	state_size = sizeof(struct xtile_data) * max_tile;
+	if (size != state_size) {
+		pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
+		       __stringify(XFEATURE_XTILE_DATA), state_size, size);
+		__xstate_dump_leaves();
+		return -EINVAL;
+	}
+	return 0;
+}
+
 /*
  * We have a C struct for each 'xstate'.  We need to ensure
  * that our software representation matches what the CPU
@@ -685,6 +758,11 @@ static int check_xstate_against_struct(int nr)
 	XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM,  struct avx_512_hi16_state);
 	XCHECK_SZ(sz, nr, XFEATURE_PKRU,      struct pkru_state);
 	XCHECK_SZ(sz, nr, XFEATURE_PASID,     struct ia32_pasid_state);
+	XCHECK_SZ(sz, nr, XFEATURE_XTILE_CFG, struct xtile_cfg);
+
+	/* The tile data size varies between implementations. */
+	if (nr == XFEATURE_XTILE_DATA)
+		check_xtile_data_against_struct(sz);
 
 	/*
 	 * Make *SURE* to add any feature numbers in below if
-- 
2.17.1


  parent reply	other threads:[~2021-07-10 13:09 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-10 13:02 [PATCH v7 00/26] x86: Support Intel Advanced Matrix Extensions Chang S. Bae
2021-07-10 13:02 ` [PATCH v7 01/26] x86/fpu/xstate: Modify the initialization helper to handle both static and dynamic buffers Chang S. Bae
2021-07-10 13:02 ` [PATCH v7 02/26] x86/fpu/xstate: Modify state copy helpers " Chang S. Bae
2021-07-10 13:02 ` [PATCH v7 03/26] x86/fpu/xstate: Modify address finders " Chang S. Bae
2021-07-10 13:02 ` [PATCH v7 04/26] x86/fpu/xstate: Add a new variable to indicate dynamic user states Chang S. Bae
2021-07-10 13:02 ` [PATCH v7 05/26] x86/fpu/xstate: Add new variables to indicate dynamic XSTATE buffer size Chang S. Bae
2021-07-10 13:02 ` [PATCH v7 06/26] x86/fpu/xstate: Calculate and remember dynamic XSTATE buffer sizes Chang S. Bae
2021-07-10 13:02 ` [PATCH v7 07/26] x86/fpu/xstate: Convert the struct fpu 'state' field to a pointer Chang S. Bae
2021-07-10 13:02 ` [PATCH v7 08/26] x86/fpu/xstate: Introduce helpers to manage the XSTATE buffer dynamically Chang S. Bae
2021-07-10 13:02 ` [PATCH v7 09/26] x86/fpu/xstate: Update the XSTATE save function to support dynamic states Chang S. Bae
2021-07-10 13:02 ` [PATCH v7 10/26] x86/fpu/xstate: Update the XSTATE buffer address finder " Chang S. Bae
2021-07-10 13:02 ` [PATCH v7 11/26] x86/fpu/xstate: Update the XSTATE context copy function " Chang S. Bae
2021-07-10 13:02 ` [PATCH v7 12/26] x86/fpu/xstate: Use feature disable (XFD) to protect dynamic user state Chang S. Bae
2021-07-13 19:13   ` Thiago Macieira
2021-07-17 15:47     ` Bae, Chang Seok
2021-07-10 13:03 ` [PATCH v7 13/26] x86/fpu/xstate: Support ptracer-induced XSTATE buffer expansion Chang S. Bae
2021-07-10 13:03 ` [PATCH v7 14/26] x86/arch_prctl: Create ARCH_SET_XSTATE_ENABLE/ARCH_GET_XSTATE_ENABLE Chang S. Bae
2021-07-10 13:03 ` [PATCH v7 15/26] x86/fpu/xstate: Support both legacy and expanded signal XSTATE size Chang S. Bae
2021-07-10 13:03 ` [PATCH v7 16/26] x86/fpu/xstate: Adjust the XSAVE feature table to address gaps in state component numbers Chang S. Bae
2021-07-10 13:03 ` [PATCH v7 17/26] x86/fpu/xstate: Disable XSTATE support if an inconsistent state is detected Chang S. Bae
2021-07-10 13:03 ` [PATCH v7 18/26] x86/cpufeatures/amx: Enumerate Advanced Matrix Extension (AMX) feature bits Chang S. Bae
2021-07-10 13:03 ` Chang S. Bae [this message]
2021-07-10 13:03 ` [PATCH v7 20/26] x86/fpu/amx: Initialize child's AMX state Chang S. Bae
2021-07-10 13:03 ` [PATCH v7 21/26] x86/fpu/amx: Enable the AMX feature in 64-bit mode Chang S. Bae
2021-07-10 13:03 ` [PATCH v7 22/26] x86/fpu/xstate: Skip writing zeros to signal frame for dynamic user states if in INIT-state Chang S. Bae
2021-07-10 13:03 ` [PATCH v7 23/26] selftest/x86/amx: Test cases for the AMX state management Chang S. Bae
2021-07-10 13:03 ` [PATCH v7 24/26] x86/insn/amx: Add TILERELEASE instruction to the opcode map Chang S. Bae
2021-07-10 13:03 ` [PATCH v7 25/26] intel_idle/amx: Add SPR support with XTILEDATA capability Chang S. Bae
2021-07-16 17:34   ` Rafael J. Wysocki
2021-07-16 17:37     ` Bae, Chang Seok
2021-07-10 13:03 ` [PATCH v7 26/26] x86/fpu/xstate: Add a sanity check for XFD state when saving XSTATE Chang S. Bae

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210710130313.5072-20-chang.seok.bae@intel.com \
    --to=chang.seok.bae@intel.com \
    --cc=bp@suse.de \
    --cc=dave.hansen@intel.com \
    --cc=jing2.liu@intel.com \
    --cc=len.brown@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@kernel.org \
    --cc=ravi.v.shankar@intel.com \
    --cc=tglx@linutronix.de \
    --cc=thiago.macieira@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).