From: "Chang S. Bae" <chang.seok.bae@intel.com>
To: tglx@linutronix.de, mingo@kernel.org, bp@suse.de,
luto@kernel.org, x86@kernel.org
Cc: len.brown@intel.com, dave.hansen@intel.com, jing2.liu@intel.com,
ravi.v.shankar@intel.com, linux-kernel@vger.kernel.org,
chang.seok.bae@intel.com
Subject: [PATCH v2 07/22] x86/fpu/xstate: Introduce helpers to manage an xstate area dynamically
Date: Thu, 19 Nov 2020 15:32:42 -0800 [thread overview]
Message-ID: <20201119233257.2939-8-chang.seok.bae@intel.com> (raw)
In-Reply-To: <20201119233257.2939-1-chang.seok.bae@intel.com>
task->fpu has a buffer to keep the extended register states, but it is not
expandable at runtime. Introduce runtime methods and new fpu struct fields
to support the expansion.
fpu->state_mask indicates the saved states per task and fpu->state_ptr
points to the dynamically allocated area.
alloc_xstate_area() uses vmalloc() for its scalability. However, set a
threshold (64KB) to watch out a potential need for an alternative
mechanism.
Also, introduce a new helper -- get_xstate_size() to calculate the area
size.
No functional change until the kernel supports dynamic user states.
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
---
Changes from v1:
* Removed unneeded interrupt masking (Andy Lutomirski)
* Added vmalloc() error tracing (Dave Hansen, PeterZ, and Andy Lutomirski)
---
arch/x86/include/asm/fpu/types.h | 29 +++++--
arch/x86/include/asm/fpu/xstate.h | 3 +
arch/x86/include/asm/trace/fpu.h | 5 ++
arch/x86/kernel/fpu/core.c | 3 +
arch/x86/kernel/fpu/xstate.c | 121 ++++++++++++++++++++++++++++++
5 files changed, 156 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index f5a38a5f3ae1..a91b4f4df8c8 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -336,14 +336,33 @@ struct fpu {
*/
unsigned long avx512_timestamp;
+ /*
+ * @state_mask:
+ *
+ * The state component bitmap. It indicates the saved xstate in
+ * either @state or @state_ptr. The map value starts to be aligned
+ * with @state and then with @state_ptr once it is in use.
+ */
+ u64 state_mask;
+
+ /*
+ * @state_ptr:
+ *
+ * Copy of all extended register states, in a dynamically-allocated
+ * area, we save and restore over context switches. When a task is
+ * using extended features, the register state is always the most
+ * current. This state copy is more recent than @state. If the task
+ * context-switches away, they get saved here, representing the xstate.
+ */
+ union fpregs_state *state_ptr;
+
/*
* @state:
*
- * In-memory copy of all FPU registers that we save/restore
- * over context switches. If the task is using the FPU then
- * the registers in the FPU are more recent than this state
- * copy. If the task context-switches away then they get
- * saved here and represent the FPU state.
+ * Copy of some extended register state that we save and restore
+ * over context switches. If a task uses a dynamically-allocated
+ * area, @state_ptr, then it has a more recent state copy than this.
+ * This copy follows the same attributes as described for @state_ptr.
*/
union fpregs_state state;
/*
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 6ce8350672c2..49020c745eb6 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -103,6 +103,9 @@ extern void __init update_regset_xstate_info(unsigned int size,
u64 xstate_mask);
void *get_xsave_addr(struct fpu *fpu, int xfeature_nr);
+int alloc_xstate_area(struct fpu *fpu, u64 mask, unsigned int *alloc_size);
+void free_xstate_area(struct fpu *fpu);
+
const void *get_xsave_field_ptr(int xfeature_nr);
int using_compacted_format(void);
int xfeature_size(int xfeature_nr);
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 879b77792f94..bf88b3333873 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -89,6 +89,11 @@ DEFINE_EVENT(x86_fpu, x86_fpu_xstate_check_failed,
TP_ARGS(fpu)
);
+DEFINE_EVENT(x86_fpu, x86_fpu_xstate_alloc_failed,
+ TP_PROTO(struct fpu *fpu),
+ TP_ARGS(fpu)
+);
+
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH asm/trace/
#undef TRACE_INCLUDE_FILE
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 875620fdfe61..e25f7866800e 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -235,6 +235,9 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src)
*/
memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_default_size);
+ dst_fpu->state_mask = xfeatures_mask_all & ~xfeatures_mask_user_dynamic;
+ dst_fpu->state_ptr = NULL;
+
/*
* If the FPU registers are not current just memcpy() the state.
* Otherwise save current FPU registers directly into the child's FPU
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 297eaefce589..7ec856668717 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -10,6 +10,7 @@
#include <linux/pkeys.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
#include <asm/fpu/api.h>
#include <asm/fpu/internal.h>
@@ -19,6 +20,7 @@
#include <asm/tlbflush.h>
#include <asm/cpufeature.h>
+#include <asm/trace/fpu.h>
/*
* Although we spell it out in here, the Processor Trace
@@ -71,6 +73,7 @@ static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] =
static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
+static bool xstate_aligns[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = false};
/*
* The XSAVE area of kernel can be in standard or compacted format;
@@ -130,6 +133,48 @@ static bool xfeature_is_supervisor(int xfeature_nr)
return ecx & 1;
}
+/*
+ * Available once those arrays for the offset, size, and alignment info are set up,
+ * by setup_xstate_features().
+ */
+static unsigned int get_xstate_size(u64 mask)
+{
+ unsigned int size;
+ u64 xmask;
+ int i, nr;
+
+ if (!mask)
+ return 0;
+ else if (mask == (xfeatures_mask_all & ~xfeatures_mask_user_dynamic))
+ return fpu_kernel_xstate_default_size;
+ else if (mask == xfeatures_mask_all)
+ return fpu_kernel_xstate_max_size;
+
+ nr = fls64(mask) - 1;
+
+ if (!using_compacted_format())
+ return xstate_offsets[nr] + xstate_sizes[nr];
+
+ xmask = BIT_ULL(nr + 1) - 1;
+
+ if (mask == (xmask & xfeatures_mask_all))
+ return xstate_comp_offsets[nr] + xstate_sizes[nr];
+
+ /*
+ * Calculate the size by summing up each state together, since no known
+ * size found with the xstate area format out of the given mask.
+ */
+ for (size = FXSAVE_SIZE + XSAVE_HDR_SIZE, i = FIRST_EXTENDED_XFEATURE; i <= nr; i++) {
+ if (!(mask & BIT_ULL(i)))
+ continue;
+
+ if (xstate_aligns[i])
+ size = ALIGN(size, 64);
+ size += xstate_sizes[i];
+ }
+ return size;
+}
+
/*
* When executing XSAVEOPT (or other optimized XSAVE instructions), if
* a processor implementation detects that an FPU state component is still
@@ -270,10 +315,12 @@ static void __init setup_xstate_features(void)
xstate_offsets[XFEATURE_FP] = 0;
xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state,
xmm_space);
+ xstate_aligns[XFEATURE_FP] = true;
xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP];
xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state,
xmm_space);
+ xstate_aligns[XFEATURE_SSE] = true;
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if (!xfeature_enabled(i))
@@ -291,6 +338,7 @@ static void __init setup_xstate_features(void)
continue;
xstate_offsets[i] = ebx;
+ xstate_aligns[i] = (ecx & 2) ? true : false;
/*
* In our xstate size checks, we assume that the highest-numbered
@@ -757,6 +805,9 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size)
return false;
}
+/* The watched threshold size of dynamically allocated xstate area */
+#define XSTATE_AREA_MAX_BYTES (64 * 1024)
+
static int __init init_xstate_size(void)
{
/* Recompute the context size for enabled features: */
@@ -781,6 +832,14 @@ static int __init init_xstate_size(void)
if (!is_supported_xstate_size(fpu_kernel_xstate_default_size))
return -EINVAL;
+ /*
+ * When observing the allocation goes beyond the threshold, it is better to consider
+ * switching a better scalable mechanism than the current.
+ */
+ if (fpu_kernel_xstate_max_size > XSTATE_AREA_MAX_BYTES)
+ pr_warn("x86/fpu: xstate buffer too large (%u > %u)\n",
+ fpu_kernel_xstate_max_size, XSTATE_AREA_MAX_BYTES);
+
/*
* User space is always in standard format.
*/
@@ -871,6 +930,12 @@ void __init fpu__init_system_xstate(void)
if (err)
goto out_disable;
+ /*
+ * At this point, it passed the size sanity check. Have the init_task take
+ * the feature mask.
+ */
+ current->thread.fpu.state_mask = (xfeatures_mask_all & ~xfeatures_mask_user_dynamic);
+
/*
* Update info used for ptrace frames; use standard-format size and no
* supervisor xstates:
@@ -1090,6 +1155,62 @@ static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
return true;
}
+void free_xstate_area(struct fpu *fpu)
+{
+ vfree(fpu->state_ptr);
+}
+
+/*
+ * Allocate a new xstate area with a calculated size, based on the given bit value.
+ *
+ * No mechanism implemented yet to shrink or reclaim the xstate area on the fly,
+ * the need of which is subject to the real usage.
+ */
+int alloc_xstate_area(struct fpu *fpu, u64 mask, unsigned int *alloc_size)
+{
+ union fpregs_state *state_ptr;
+ unsigned int oldsz, newsz;
+ u64 state_mask;
+
+ state_mask = fpu->state_mask | mask;
+
+ oldsz = get_xstate_size(fpu->state_mask);
+ newsz = get_xstate_size(state_mask);
+
+ if (oldsz >= newsz)
+ return 0;
+
+ if (newsz > fpu_kernel_xstate_max_size) {
+ pr_warn_once("x86/fpu: state buffer too large (%u > %u bytes)\n",
+ newsz, fpu_kernel_xstate_max_size);
+ XSTATE_WARN_ON(1);
+ return 0;
+ }
+
+ /* We need 64B aligned pointer, but vmalloc() returns a page-aligned address */
+ state_ptr = vmalloc(newsz);
+ if (!state_ptr) {
+ trace_x86_fpu_xstate_alloc_failed(fpu);
+ return -ENOMEM;
+ }
+
+ memset(state_ptr, 0, newsz);
+ if (using_compacted_format())
+ fpstate_init_xstate(&state_ptr->xsave, state_mask);
+
+ /*
+ * As long as the register state is intact, save the xstate in the new area at
+ * the next context copy/switch or potentially ptrace-driven xstate writing
+ */
+
+ vfree(fpu->state_ptr);
+ fpu->state_ptr = state_ptr;
+ fpu->state_mask = state_mask;
+ if (alloc_size)
+ *alloc_size = newsz;
+ return 0;
+}
+
static void fill_gap(struct membuf *to, unsigned *last, unsigned offset)
{
if (*last >= offset)
--
2.17.1
next prev parent reply other threads:[~2020-11-19 23:38 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-11-19 23:32 [PATCH v2 00/22] x86: Support Intel Advanced Matrix Extensions Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 01/22] x86/fpu/xstate: Modify area init helper prototypes to access all the possible areas Chang S. Bae
2020-12-07 17:12 ` Borislav Petkov
2020-12-07 23:03 ` Bae, Chang Seok
2020-12-07 23:10 ` Borislav Petkov
2020-11-19 23:32 ` [PATCH v2 02/22] x86/fpu/xstate: Modify xstate copy " Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 03/22] x86/fpu/xstate: Modify address finder " Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 04/22] x86/fpu/xstate: Modify save and restore helper " Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 05/22] x86/fpu/xstate: Introduce a new variable for dynamic user states Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 06/22] x86/fpu/xstate: Outline dynamic xstate area size in the task context Chang S. Bae
2020-11-19 23:32 ` Chang S. Bae [this message]
2020-11-19 23:32 ` [PATCH v2 08/22] x86/fpu/xstate: Define the scope of the initial xstate data Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 09/22] x86/fpu/xstate: Introduce wrapper functions for organizing xstate area access Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 10/22] x86/fpu/xstate: Update xstate save function for supporting dynamic user xstate Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 11/22] x86/fpu/xstate: Update xstate area address finder " Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 12/22] x86/fpu/xstate: Update xstate context copy function for supporting dynamic area Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 13/22] x86/fpu/xstate: Expand dynamic user state area on first use Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 14/22] x86/fpu/xstate: Inherit dynamic user state when used in the parent Chang S. Bae
2020-11-20 5:08 ` Andy Lutomirski
2020-11-24 18:35 ` Len Brown
2020-11-19 23:32 ` [PATCH v2 15/22] x86/fpu/xstate: Support ptracer-induced xstate area expansion Chang S. Bae
2020-11-20 5:07 ` Andy Lutomirski
2020-11-24 18:22 ` Bae, Chang Seok
2020-11-24 18:33 ` Andy Lutomirski
2020-12-02 4:00 ` Bae, Chang Seok
2020-12-03 5:00 ` Andy Lutomirski
2020-12-08 19:12 ` Bae, Chang Seok
2020-11-19 23:32 ` [PATCH v2 16/22] x86/fpu/xstate: Extend the table for mapping xstate components with features Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 17/22] x86/cpufeatures/amx: Enumerate Advanced Matrix Extension (AMX) feature bits Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 18/22] x86/fpu/amx: Define AMX state components and have it used for boot-time checks Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 19/22] x86/fpu/amx: Enable the AMX feature in 64-bit mode Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 20/22] selftest/x86/amx: Include test cases for the AMX state management Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 21/22] x86/fpu/xstate: Support dynamic user state in the signal handling path Chang S. Bae
2020-11-19 23:32 ` [PATCH v2 22/22] x86/fpu/xstate: Introduce boot-parameters for control some state component support Chang S. Bae
2020-11-20 5:02 ` Andy Lutomirski
2020-11-24 18:50 ` Len Brown
2020-11-24 23:41 ` Andy Lutomirski
2020-12-01 18:00 ` Len Brown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201119233257.2939-8-chang.seok.bae@intel.com \
--to=chang.seok.bae@intel.com \
--cc=bp@suse.de \
--cc=dave.hansen@intel.com \
--cc=jing2.liu@intel.com \
--cc=len.brown@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mingo@kernel.org \
--cc=ravi.v.shankar@intel.com \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).