All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Chang S. Bae" <chang.seok.bae@intel.com>
To: linux-kernel@vger.kernel.org
Cc: x86@kernel.org, tglx@linutronix.de, dave.hansen@linux.intel.com,
	arjan@linux.intel.com, ravi.v.shankar@intel.com,
	chang.seok.bae@intel.com
Subject: [PATCH 18/23] x86/fpu/xstate: Add fpstate_realloc()/free()
Date: Thu, 21 Oct 2021 15:55:22 -0700	[thread overview]
Message-ID: <20211021225527.10184-19-chang.seok.bae@intel.com> (raw)
In-Reply-To: <20211021225527.10184-1-chang.seok.bae@intel.com>

The fpstate embedded in struct fpu is the default state for storing the FPU
registers. It's sized so that the default supported features can be stored.
For dynamically enabled features the register buffer is too small.

The #NM handler detects first use of a feature which is disabled in the
XFD MSR. After handling permission checks it recalculates the size for
kernel space and user space state and invokes fpstate_realloc() which
tries to reallocate fpstate and install it.

Provide the allocator function which checks whether the current buffer size
is sufficient and if not allocates one. If allocation is successful the new
fpstate is initialized with the new features and sizes and the now enabled
features is removed from the task's XFD mask.

realloc_fpstate() uses vzalloc(). If use of this mechanism grows to
re-allocate buffers larger than 64KB, a more sophisticated allocation
scheme that includes purpose-built reclaim capability might be justified.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
---
 arch/x86/include/asm/fpu/api.h |  7 +++
 arch/x86/kernel/fpu/xstate.c   | 97 +++++++++++++++++++++++++++++++---
 arch/x86/kernel/process.c      | 10 ++++
 3 files changed, 106 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 89762c28ad5a..c17d02decd65 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -130,6 +130,13 @@ static inline void fpstate_init_soft(struct swregs_state *soft) {}
 /* State tracking */
 DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
 
+/* Process cleanup */
+#ifdef CONFIG_X86_64
+extern void fpstate_free(struct fpu *fpu);
+#else
+static inline void fpstate_free(struct fpu *fpu) { }
+#endif
+
 /* fpstate-related functions which are exported to KVM */
 extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature);
 
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 1b2fad6e4964..3f65140b4e6f 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -12,6 +12,7 @@
 #include <linux/pkeys.h>
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
 
 #include <asm/fpu/api.h>
 #include <asm/fpu/regset.h>
@@ -22,6 +23,7 @@
 #include <asm/prctl.h>
 #include <asm/elf.h>
 
+#include "context.h"
 #include "internal.h"
 #include "legacy.h"
 #include "xstate.h"
@@ -1368,6 +1370,91 @@ void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
 }
 #endif /* CONFIG_X86_DEBUG_FPU */
 
+void fpstate_free(struct fpu *fpu)
+{
+	if (fpu->fpstate || fpu->fpstate != &fpu->__fpstate)
+		vfree(fpu->fpstate);
+}
+
+/**
+ * fpu_install_fpstate - Update the active fpstate in the FPU
+ *
+ * @fpu:	A struct fpu * pointer
+ * @newfps:	A struct fpstate * pointer
+ *
+ * Returns:	A null pointer if the last active fpstate is the embedded
+ *		one or the new fpstate is already installed;
+ *		otherwise, a pointer to the old fpstate which has to
+ *		be freed by the caller.
+ */
+static struct fpstate *fpu_install_fpstate(struct fpu *fpu,
+					   struct fpstate *newfps)
+{
+	struct fpstate *oldfps = fpu->fpstate;
+
+	if (fpu->fpstate == newfps)
+		return NULL;
+
+	fpu->fpstate = newfps;
+	return oldfps != &fpu->__fpstate ? oldfps : NULL;
+}
+
+/**
+ * fpstate_realloc - Reallocate struct fpstate for the requested new features
+ *
+ * @xfeatures:	A bitmap of xstate features which extend the enabled features
+ *		of that task
+ * @ksize:	The required size for the kernel buffer
+ * @usize:	The required size for user space buffers
+ *
+ * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
+ * terminates quickly, vfree()-induced IPIs may be a concern, but tasks
+ * with large states are likely to live longer.
+ *
+ * Returns: 0 on success, -ENOMEM on allocation error.
+ */
+static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
+			   unsigned int usize)
+{
+	struct fpu *fpu = &current->thread.fpu;
+	struct fpstate *curfps, *newfps = NULL;
+	unsigned int fpsize;
+
+	curfps = fpu->fpstate;
+	fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
+
+	newfps = vzalloc(fpsize);
+	if (!newfps)
+		return -ENOMEM;
+	newfps->size = ksize;
+	newfps->user_size = usize;
+	newfps->is_valloc = true;
+
+	fpregs_lock();
+	/*
+	 * Ensure that the current state is in the registers before
+	 * swapping fpstate as that might invalidate it due to layout
+	 * changes.
+	 */
+	if (test_thread_flag(TIF_NEED_FPU_LOAD))
+		fpregs_restore_userregs();
+
+	newfps->xfeatures = curfps->xfeatures | xfeatures;
+	newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+	newfps->xfd = curfps->xfd & ~xfeatures;
+
+	curfps = fpu_install_fpstate(fpu, newfps);
+
+	/* Do the final updates within the locked region */
+	xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
+	xfd_update_state(newfps);
+
+	fpregs_unlock();
+
+	vfree(curfps);
+	return 0;
+}
+
 static int validate_sigaltstack(unsigned int usize)
 {
 	struct task_struct *thread, *leader = current->group_leader;
@@ -1390,7 +1477,8 @@ static int __xstate_request_perm(u64 permitted, u64 requested)
 	/*
 	 * This deliberately does not exclude !XSAVES as we still might
 	 * decide to optionally context switch XCR0 or talk the silicon
-	 * vendors into extending XFD for the pre AMX states.
+	 * vendors into extending XFD for the pre AMX states, especially
+	 * AVX512.
 	 */
 	bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
 	struct fpu *fpu = &current->group_leader->thread.fpu;
@@ -1462,13 +1550,6 @@ static int xstate_request_perm(unsigned long idx)
 	return ret;
 }
 
-/* Place holder for now */
-static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
-			   unsigned int usize)
-{
-	return -ENOMEM;
-}
-
 int xfd_enable_feature(u64 xfd_err)
 {
 	u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 99025e32f105..f3f251787b99 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -32,6 +32,7 @@
 #include <asm/mwait.h>
 #include <asm/fpu/api.h>
 #include <asm/fpu/sched.h>
+#include <asm/fpu/xstate.h>
 #include <asm/debugreg.h>
 #include <asm/nmi.h>
 #include <asm/tlbflush.h>
@@ -90,9 +91,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 #endif
 	/* Drop the copied pointer to current's fpstate */
 	dst->thread.fpu.fpstate = NULL;
+
 	return 0;
 }
 
+#ifdef CONFIG_X86_64
+void arch_release_task_struct(struct task_struct *tsk)
+{
+	if (fpu_state_size_dynamic())
+		fpstate_free(&tsk->thread.fpu);
+}
+#endif
+
 /*
  * Free thread data structures etc..
  */
-- 
2.17.1


  parent reply	other threads:[~2021-10-21 23:05 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-21 22:55 [PATCH 00/23] x86: Support Intel Advanced Matrix Extensions (part 4) Chang S. Bae
2021-10-21 22:55 ` [PATCH 01/23] signal: Add an optional check for altstack size Chang S. Bae
2021-10-22  0:06   ` Bae, Chang Seok
2021-10-22 15:20   ` Eric W. Biederman
2021-10-22 20:58     ` Bae, Chang Seok
2021-10-22 22:51     ` Dave Hansen
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 02/23] x86/signal: Implement sigaltstack size validation Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 03/23] x86/fpu/xstate: Provide xstate_calculate_size() Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 04/23] x86/fpu: Add members to struct fpu to cache permission information Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 05/23] x86/fpu: Add fpu_state_config::legacy_features Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 06/23] x86/arch_prctl: Add controls for dynamic XSTATE components Chang S. Bae
2021-10-24 21:17   ` Borislav Petkov
2021-10-26  9:11     ` [PATCH] Documentation/x86: Add documentation for using dynamic XSTATE features Chang S. Bae
2021-10-26 16:16       ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-28 13:10       ` tip-bot2 for Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] x86/arch_prctl: Add controls for dynamic XSTATE components tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 07/23] x86/fpu: Add basic helpers for dynamically enabled features Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 08/23] x86/signal: Use fpu::__state_user_size for sigalt stack validation Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 09/23] x86/fpu/signal: Prepare for variable sigframe length Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 10/23] x86/fpu: Prepare fpu_clone() for dynamically enabled features Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 11/23] x86/fpu: Reset permission and fpstate on exec() Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 12/23] x86/cpufeatures: Add eXtended Feature Disabling (XFD) feature bit Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 13/23] x86/msr-index: Add MSRs for XFD Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 14/23] x86/fpu: Add XFD state to fpstate Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 15/23] x86/fpu: Add sanity checks for XFD Chang S. Bae
2021-10-25  8:11   ` Borislav Petkov
2021-10-25 20:15     ` Thomas Gleixner
2021-10-25  8:33   ` Mika Penttilä
2021-10-25 18:13     ` Thomas Gleixner
2021-10-25 19:57       ` Dave Hansen
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Thomas Gleixner
2021-10-21 22:55 ` [PATCH 16/23] x86/fpu: Update XFD state where required Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 17/23] x86/fpu/xstate: Add XFD #NM handler Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` Chang S. Bae [this message]
2021-10-26 16:16   ` [tip: x86/fpu] x86/fpu/xstate: Add fpstate_realloc()/free() tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 19/23] x86/fpu/xstate: Prepare XSAVE feature table for gaps in state component numbers Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 20/23] x86/fpu/amx: Define AMX state components and have it used for boot-time checks Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 21/23] x86/fpu: Calculate the default sizes independently Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 22/23] x86/fpu: Add XFD handling for dynamic states Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae
2021-10-21 22:55 ` [PATCH 23/23] x86/fpu/amx: Enable the AMX feature in 64-bit mode Chang S. Bae
2021-10-26 16:16   ` [tip: x86/fpu] " tip-bot2 for Chang S. Bae

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211021225527.10184-19-chang.seok.bae@intel.com \
    --to=chang.seok.bae@intel.com \
    --cc=arjan@linux.intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ravi.v.shankar@intel.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.