All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Chang S. Bae" <chang.seok.bae@intel.com>
To: tglx@linutronix.de, mingo@kernel.org, bp@suse.de,
	luto@kernel.org, x86@kernel.org
Cc: len.brown@intel.com, dave.hansen@intel.com, jing2.liu@intel.com,
	ravi.v.shankar@intel.com, linux-kernel@vger.kernel.org,
	chang.seok.bae@intel.com
Subject: [RFC PATCH 13/22] x86/fpu/xstate: Expand dynamic user state area on first use
Date: Thu,  1 Oct 2020 13:39:04 -0700	[thread overview]
Message-ID: <20201001203913.9125-14-chang.seok.bae@intel.com> (raw)
In-Reply-To: <20201001203913.9125-1-chang.seok.bae@intel.com>

Intel's Extended Feature Disable (XFD) feature is an extension of the XSAVE
architecture. XFD allows the kernel to enable a feature state in XCR0 and
to receive a #NM trap when a task uses instructions accessing that state.
In this way, Linux can allocate the large task->fpu buffer only for tasks
that use it.

XFD introduces two MSRs: IA32_XFD to enable/disable the feature and
IA32_XFD_ERR to assist the #NM trap handler. Both use the same
state-component bitmap format, used by XCR0.

Use this hardware capability to find the right time to expand xstate area.
Introduce two sets of helper functions for that:

1. The first set is primarily for interacting with the XFD hardware
   feature. Helpers for configuring disablement, e.g. in context switching,
   are:
	xdisable_setbits()
	xdisable_getbits()
	xdisable_switch()

2. The second set is for managing the first-use status and handling #NM
   trap:
	xfirstuse_enabled()
	xfirstuse_not_detected()
	xfirstuse_event_handler()

The #NM handler induces the xstate area expansion to save the first-used
states.

No functional change until the kernel enables dynamic user states and XFD.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/x86/include/asm/cpufeatures.h  |  1 +
 arch/x86/include/asm/fpu/internal.h | 53 ++++++++++++++++++++++++++++-
 arch/x86/include/asm/msr-index.h    |  2 ++
 arch/x86/kernel/fpu/core.c          | 37 ++++++++++++++++++++
 arch/x86/kernel/fpu/xstate.c        | 34 ++++++++++++++++--
 arch/x86/kernel/process.c           |  5 +++
 arch/x86/kernel/process_32.c        |  2 +-
 arch/x86/kernel/process_64.c        |  2 +-
 arch/x86/kernel/traps.c             |  3 ++
 9 files changed, 133 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 2901d5df4366..7d7fe1d82966 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -274,6 +274,7 @@
 #define X86_FEATURE_XSAVEC		(10*32+ 1) /* XSAVEC instruction */
 #define X86_FEATURE_XGETBV1		(10*32+ 2) /* XGETBV with ECX = 1 instruction */
 #define X86_FEATURE_XSAVES		(10*32+ 3) /* XSAVES/XRSTORS instructions */
+#define X86_FEATURE_XFD			(10*32+ 4) /* eXtended Feature Disabling */
 
 /*
  * Extended auxiliary flags: Linux defined - for features scattered in various
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 3b03ead87a46..f5dbbaa060fb 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -572,11 +572,60 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
  * Misc helper functions:
  */
 
+/* The first-use detection helpers: */
+
+static inline void xdisable_setbits(u64 value)
+{
+	wrmsrl_safe(MSR_IA32_XFD, value);
+}
+
+static inline u64 xdisable_getbits(void)
+{
+	u64 value;
+
+	rdmsrl_safe(MSR_IA32_XFD, &value);
+	return value;
+}
+
+static inline u64 xfirstuse_enabled(void)
+{
+	/* All the dynamic user components are first-use enabled. */
+	return xfeatures_mask_user_dynamic;
+}
+
+/*
+ * Convert fpu->firstuse_bv to xdisable configuration in MSR IA32_XFD.
+ * xdisable_setbits() only uses this.
+ */
+static inline u64 xfirstuse_not_detected(struct fpu *fpu)
+{
+	u64 firstuse_bv = (fpu->state_mask & xfirstuse_enabled());
+
+	/*
+	 * If first-use is not detected, set the bit. If the detection is
+	 * not enabled, the bit is always zero in firstuse_bv. So, make
+	 * following conversion:
+	 */
+	return  (xfirstuse_enabled() ^ firstuse_bv);
+}
+
+/* Update MSR IA32_XFD based on fpu->firstuse_bv */
+static inline void xdisable_switch(struct fpu *prev, struct fpu *next)
+{
+	if (!static_cpu_has(X86_FEATURE_XFD) || !xfirstuse_enabled())
+		return;
+
+	if (unlikely(prev->state_mask != next->state_mask))
+		xdisable_setbits(xfirstuse_not_detected(next));
+}
+
+bool xfirstuse_event_handler(struct fpu *fpu);
+
 /*
  * Load PKRU from the FPU context if available. Delay loading of the
  * complete FPU state until the return to userland.
  */
-static inline void switch_fpu_finish(struct fpu *new_fpu)
+static inline void switch_fpu_finish(struct fpu *old_fpu, struct fpu *new_fpu)
 {
 	u32 pkru_val = init_pkru_value;
 	struct pkru_state *pk;
@@ -586,6 +635,8 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
 
 	set_thread_flag(TIF_NEED_FPU_LOAD);
 
+	xdisable_switch(old_fpu, new_fpu);
+
 	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
 		return;
 
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 2859ee4f39a8..0ccbe8cc99ad 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -610,6 +610,8 @@
 #define MSR_IA32_BNDCFGS_RSVD		0x00000ffc
 
 #define MSR_IA32_XSS			0x00000da0
+#define MSR_IA32_XFD			0x000001c4
+#define MSR_IA32_XFD_ERR		0x000001c5
 
 #define MSR_IA32_APICBASE		0x0000001b
 #define MSR_IA32_APICBASE_BSP		(1<<8)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index ece6428ba85b..2e07bfcd54b3 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -518,3 +518,40 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
 	 */
 	return 0;
 }
+
+bool xfirstuse_event_handler(struct fpu *fpu)
+{
+	bool handled = false;
+	u64 event_mask;
+
+	/* Check whether the first-use detection is running. */
+	if (!static_cpu_has(X86_FEATURE_XFD) || !xfirstuse_enabled())
+		return handled;
+
+	rdmsrl_safe(MSR_IA32_XFD_ERR, &event_mask);
+
+	/* The trap event should happen to one of first-use enabled features */
+	WARN_ON(!(event_mask & xfirstuse_enabled()));
+
+	/* If IA32_XFD_ERR is empty, the current trap has nothing to do with. */
+	if (!event_mask)
+		return handled;
+
+	/*
+	 * The first-use event is presumed to be from userspace, so it should have
+	 * nothing to do with interrupt context.
+	 */
+	if (WARN_ON(in_interrupt()))
+		return handled;
+
+	if (alloc_xstate_area(fpu, event_mask, NULL))
+		return handled;
+
+	xdisable_setbits(xfirstuse_not_detected(fpu));
+
+	/* Clear the trap record. */
+	wrmsrl_safe(MSR_IA32_XFD_ERR, 0);
+	handled = true;
+
+	return handled;
+}
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index b9261ab4e5e2..13e8eff7a23b 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -130,6 +130,18 @@ static bool xfeature_is_supervisor(int xfeature_nr)
 	return ecx & 1;
 }
 
+static bool xfeature_disable_supported(int xfeature_nr)
+{
+	u32 eax, ebx, ecx, edx;
+
+	/*
+	 * If state component 'i' supports xfeature disable (or first-use
+	 * detection, ECX[2] return 1; otherwise, 0.
+	 */
+	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
+	return ecx & 4;
+}
+
 /*
  * Available once those arrays for the offset, size, and alignment info are set up,
  * by setup_xstate_features().
@@ -313,6 +325,9 @@ void fpu__init_cpu_xstate(void)
 		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
 				     xfeatures_mask_supervisor_dynamic());
 	}
+
+	if (boot_cpu_has(X86_FEATURE_XFD))
+		xdisable_setbits(xfirstuse_enabled());
 }
 
 static bool xfeature_enabled(enum xfeature xfeature)
@@ -511,8 +526,9 @@ static void __init print_xstate_offset_size(void)
 	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
 		if (!xfeature_enabled(i))
 			continue;
-		pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
-			 i, xstate_comp_offsets[i], i, xstate_sizes[i]);
+		pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d (%s)\n",
+			i, xstate_comp_offsets[i], i, xstate_sizes[i],
+			(xfeatures_mask_user_dynamic & BIT_ULL(i)) ? "on-demand" : "default");
 	}
 }
 
@@ -942,9 +958,18 @@ void __init fpu__init_system_xstate(void)
 	}
 
 	xfeatures_mask_all &= fpu__get_supported_xfeatures_mask();
-	/* Do not support the dynamically allocated area yet. */
 	xfeatures_mask_user_dynamic = 0;
 
+	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+		u64 feature_mask = BIT_ULL(i);
+
+		if (!(xfeatures_mask_user() & feature_mask))
+			continue;
+
+		if (xfeature_disable_supported(i))
+			xfeatures_mask_user_dynamic |= feature_mask;
+	}
+
 	/* Enable xstate instructions to be able to continue with initialization: */
 	fpu__init_cpu_xstate();
 	err = init_xstate_size();
@@ -999,6 +1024,9 @@ void fpu__resume_cpu(void)
 		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()  |
 				     xfeatures_mask_supervisor_dynamic());
 	}
+
+	if (boot_cpu_has(X86_FEATURE_XFD))
+		xdisable_setbits(xfirstuse_not_detected(&current->thread.fpu));
 }
 
 /*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 43d38bd09fb1..66850b63fe66 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -102,6 +102,11 @@ void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
 	*size = fpu_kernel_xstate_default_size;
 }
 
+void arch_release_task_struct(struct task_struct *tsk)
+{
+	free_xstate_area(&tsk->thread.fpu);
+}
+
 /*
  * Free thread data structures etc..
  */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4f2f54e1281c..7bd5d08eeb41 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -213,7 +213,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
 	this_cpu_write(current_task, next_p);
 
-	switch_fpu_finish(next_fpu);
+	switch_fpu_finish(prev_fpu, next_fpu);
 
 	/* Load the Intel cache allocation PQR MSR. */
 	resctrl_sched_in();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9afefe325acb..3970367c02c0 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -595,7 +595,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	this_cpu_write(current_task, next_p);
 	this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
 
-	switch_fpu_finish(next_fpu);
+	switch_fpu_finish(prev_fpu, next_fpu);
 
 	/* Reload sp0. */
 	update_task_stack(next_p);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 81a2fb711091..0eebcae99938 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1028,6 +1028,9 @@ DEFINE_IDTENTRY(exc_device_not_available)
 {
 	unsigned long cr0 = read_cr0();
 
+	if (xfirstuse_event_handler(&current->thread.fpu))
+		return;
+
 #ifdef CONFIG_MATH_EMULATION
 	if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) {
 		struct math_emu_info info = { };
-- 
2.17.1


  parent reply	other threads:[~2020-10-01 20:43 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-01 20:38 [RFC PATCH 00/22] x86: Support Intel Advanced Matrix Extensions Chang S. Bae
2020-10-01 20:38 ` [RFC PATCH 01/22] x86/fpu/xstate: Modify area init helper prototypes to access all the possible areas Chang S. Bae
2020-10-01 20:38 ` [RFC PATCH 02/22] x86/fpu/xstate: Modify xstate copy " Chang S. Bae
2020-10-01 20:38 ` [RFC PATCH 03/22] x86/fpu/xstate: Modify address finder " Chang S. Bae
2020-10-01 20:38 ` [RFC PATCH 04/22] x86/fpu/xstate: Modify save and restore helper " Chang S. Bae
2020-10-01 20:38 ` [RFC PATCH 05/22] x86/fpu/xstate: Introduce a new variable for dynamic user states Chang S. Bae
2020-10-01 20:38 ` [RFC PATCH 06/22] x86/fpu/xstate: Outline dynamic xstate area size in the task context Chang S. Bae
2020-10-01 20:38 ` [RFC PATCH 07/22] x86/fpu/xstate: Introduce helpers to manage an xstate area dynamically Chang S. Bae
2020-10-01 23:41   ` Andy Lutomirski
2020-10-13 22:00     ` Brown, Len
2020-10-01 20:38 ` [RFC PATCH 08/22] x86/fpu/xstate: Define the scope of the initial xstate data Chang S. Bae
2020-10-01 20:39 ` [RFC PATCH 09/22] x86/fpu/xstate: Introduce wrapper functions for organizing xstate area access Chang S. Bae
2020-10-01 20:39 ` [RFC PATCH 10/22] x86/fpu/xstate: Update xstate save function for supporting dynamic user xstate Chang S. Bae
2020-10-01 20:39 ` [RFC PATCH 11/22] x86/fpu/xstate: Update xstate area address finder " Chang S. Bae
2020-10-01 20:39 ` [RFC PATCH 12/22] x86/fpu/xstate: Update xstate context copy function for supporting dynamic area Chang S. Bae
2020-10-01 20:39 ` Chang S. Bae [this message]
2020-10-01 23:39   ` [RFC PATCH 13/22] x86/fpu/xstate: Expand dynamic user state area on first use Andy Lutomirski
2020-10-13 22:31     ` Brown, Len
2020-10-13 22:43       ` Dave Hansen
2020-10-14  1:11         ` Andy Lutomirski
2020-10-14  6:03           ` Dave Hansen
2020-10-14 16:10             ` Andy Lutomirski
2020-10-14 16:29               ` Dave Hansen
2020-11-03 21:32                 ` Bae, Chang Seok
2020-11-03 21:41                   ` Dave Hansen
2020-11-03 21:53                     ` Bae, Chang Seok
2020-10-14 10:41         ` Peter Zijlstra
2020-10-14  1:47       ` Andy Lutomirski
2020-10-01 20:39 ` [RFC PATCH 14/22] x86/fpu/xstate: Inherit dynamic user state when used in the parent Chang S. Bae
2020-10-01 20:39 ` [RFC PATCH 15/22] x86/fpu/xstate: Support ptracer-induced xstate area expansion Chang S. Bae
2020-10-01 20:39 ` [RFC PATCH 16/22] x86/fpu/xstate: Support dynamic user state in the signal handling path Chang S. Bae
2020-10-01 20:39 ` [RFC PATCH 17/22] x86/fpu/xstate: Extend the table for mapping xstate components with features Chang S. Bae
2020-10-01 20:39 ` [RFC PATCH 18/22] x86/cpufeatures/amx: Enumerate Advanced Matrix Extension (AMX) feature bits Chang S. Bae
2020-10-01 20:39 ` [RFC PATCH 19/22] x86/fpu/amx: Define AMX state components and have it used for boot-time checks Chang S. Bae
2020-10-01 20:39 ` [RFC PATCH 20/22] x86/fpu/amx: Enable the AMX feature in 64-bit mode Chang S. Bae
2020-10-01 20:39 ` [RFC PATCH 21/22] selftest/x86/amx: Include test cases for the AMX state management Chang S. Bae
2020-10-01 20:39 ` [RFC PATCH 22/22] x86/fpu/xstate: Introduce boot-parameters for control some state component support Chang S. Bae
2020-10-02  2:09   ` Randy Dunlap
2020-10-13 23:00     ` Brown, Len
2020-10-02 17:15   ` Andy Lutomirski
2020-10-02 17:26     ` Dave Hansen
2020-10-13 23:38     ` Brown, Len
2020-10-02 17:25   ` Dave Hansen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201001203913.9125-14-chang.seok.bae@intel.com \
    --to=chang.seok.bae@intel.com \
    --cc=bp@suse.de \
    --cc=dave.hansen@intel.com \
    --cc=jing2.liu@intel.com \
    --cc=len.brown@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@kernel.org \
    --cc=ravi.v.shankar@intel.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.