kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v10 02/28] x86/fpu/xstate: Modify the initialization helper to handle both static and dynamic buffers
       [not found] <20210825155413.19673-1-chang.seok.bae@intel.com>
@ 2021-08-25 15:53 ` Chang S. Bae
  2021-10-01 12:45   ` Thomas Gleixner
  2021-08-25 15:53 ` [PATCH v10 04/28] x86/fpu/xstate: Modify address finders " Chang S. Bae
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 18+ messages in thread
From: Chang S. Bae @ 2021-08-25 15:53 UTC (permalink / raw)
  To: bp, luto, tglx, mingo, x86
  Cc: len.brown, lenb, dave.hansen, thiago.macieira, jing2.liu,
	ravi.v.shankar, linux-kernel, chang.seok.bae, kvm

Have the function initializing the XSTATE buffer take a struct fpu *
pointer in preparation for dynamic state buffer support.

init_fpstate is a special case, which is indicated by a null pointer
parameter to fpstate_init().

Also, fpstate_init_xstate() now accepts the state component bitmap to
customize the compacted format.

No functional change.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
---
Changes from v5:
* Moved fpstate_init_xstate() back to the header (again).
* Massaged the changelog.

Changes from v4:
* Added a proper function description. (Borislav Petkov)
* Added the likely() statement as a null pointer is a special case.

Changes from v3:
* Updated the changelog. (Borislav Petkov)
* Updated the function comment to use kernel-doc style. (Borislav Petkov)

Changes from v2:
* Updated the changelog with task->fpu removed. (Borislav Petkov)
---
 arch/x86/include/asm/fpu/internal.h | 11 ++++++++++-
 arch/x86/kernel/fpu/core.c          | 28 +++++++++++++++++-----------
 arch/x86/kernel/fpu/init.c          |  2 +-
 arch/x86/kernel/fpu/xstate.c        |  3 +--
 arch/x86/kvm/x86.c                  |  2 +-
 5 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 5a18694a89b2..c7a64e2806a9 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -80,7 +80,7 @@ static __always_inline __pure bool use_fxsr(void)
 
 extern union fpregs_state init_fpstate;
 
-extern void fpstate_init(union fpregs_state *state);
+extern void fpstate_init(struct fpu *fpu);
 #ifdef CONFIG_MATH_EMULATION
 extern void fpstate_init_soft(struct swregs_state *soft);
 #else
@@ -88,6 +88,15 @@ static inline void fpstate_init_soft(struct swregs_state *soft) {}
 #endif
 extern void save_fpregs_to_fpstate(struct fpu *fpu);
 
+static inline void fpstate_init_xstate(struct xregs_state *xsave, u64 mask)
+{
+	/*
+	 * XRSTORS requires these bits set in xcomp_bv, or it will
+	 * trigger #GP:
+	 */
+	xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | mask;
+}
+
 /* Returns 0 or the negated trap number, which results in -EFAULT for #PF */
 #define user_insn(insn, output, input...)				\
 ({									\
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 7ada7bd03a32..c0098f8422de 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -203,15 +203,6 @@ void fpu_sync_fpstate(struct fpu *fpu)
 	fpregs_unlock();
 }
 
-static inline void fpstate_init_xstate(struct xregs_state *xsave)
-{
-	/*
-	 * XRSTORS requires these bits set in xcomp_bv, or it will
-	 * trigger #GP:
-	 */
-	xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all;
-}
-
 static inline void fpstate_init_fxstate(struct fxregs_state *fx)
 {
 	fx->cwd = 0x37f;
@@ -229,8 +220,23 @@ static inline void fpstate_init_fstate(struct fregs_state *fp)
 	fp->fos = 0xffff0000u;
 }
 
-void fpstate_init(union fpregs_state *state)
+/**
+ *
+ * fpstate_init - initialize the xstate buffer
+ *
+ * If @fpu is NULL, initialize init_fpstate.
+ *
+ * @fpu:	A struct fpu * pointer
+ */
+void fpstate_init(struct fpu *fpu)
 {
+	union fpregs_state *state;
+
+	if (likely(fpu))
+		state = &fpu->state;
+	else
+		state = &init_fpstate;
+
 	if (!static_cpu_has(X86_FEATURE_FPU)) {
 		fpstate_init_soft(&state->soft);
 		return;
@@ -239,7 +245,7 @@ void fpstate_init(union fpregs_state *state)
 	memset(state, 0, fpu_kernel_xstate_size);
 
 	if (static_cpu_has(X86_FEATURE_XSAVES))
-		fpstate_init_xstate(&state->xsave);
+		fpstate_init_xstate(&state->xsave, xfeatures_mask_all);
 	if (static_cpu_has(X86_FEATURE_FXSR))
 		fpstate_init_fxstate(&state->fxsave);
 	else
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 64e29927cc32..e14c72bc8706 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -124,7 +124,7 @@ static void __init fpu__init_system_generic(void)
 	 * Set up the legacy init FPU context. (xstate init might overwrite this
 	 * with a more modern format, if the CPU supports it.)
 	 */
-	fpstate_init(&init_fpstate);
+	fpstate_init(NULL);
 
 	fpu__init_system_mxcsr();
 }
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index fc1d529547e6..0fed7fbcf2e8 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -395,8 +395,7 @@ static void __init setup_init_fpu_buf(void)
 	print_xstate_features();
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
-						     xfeatures_mask_all;
+		fpstate_init_xstate(&init_fpstate.xsave, xfeatures_mask_all);
 
 	/*
 	 * Init all the features state with header.xfeatures being 0x0
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e5d5c5ed7dd4..ce529ab233d7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10608,7 +10608,7 @@ static void fx_init(struct kvm_vcpu *vcpu)
 	if (!vcpu->arch.guest_fpu)
 		return;
 
-	fpstate_init(&vcpu->arch.guest_fpu->state);
+	fpstate_init(vcpu->arch.guest_fpu);
 	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
 			host_xcr0 | XSTATE_COMPACTION_ENABLED;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v10 04/28] x86/fpu/xstate: Modify address finders to handle both static and dynamic buffers
       [not found] <20210825155413.19673-1-chang.seok.bae@intel.com>
  2021-08-25 15:53 ` [PATCH v10 02/28] x86/fpu/xstate: Modify the initialization helper to handle both static and dynamic buffers Chang S. Bae
@ 2021-08-25 15:53 ` Chang S. Bae
  2021-10-01 13:15   ` Thomas Gleixner
  2021-08-25 15:53 ` [PATCH v10 06/28] x86/fpu/xstate: Add new variables to indicate dynamic XSTATE buffer size Chang S. Bae
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 18+ messages in thread
From: Chang S. Bae @ 2021-08-25 15:53 UTC (permalink / raw)
  To: bp, luto, tglx, mingo, x86
  Cc: len.brown, lenb, dave.hansen, thiago.macieira, jing2.liu,
	ravi.v.shankar, linux-kernel, chang.seok.bae, kvm

Have all the functions finding XSTATE address take a struct fpu * pointer
in preparation for dynamic state buffer support.

init_fpstate is a special case, which is indicated by a null pointer
parameter to get_xsave_addr() and __raw_xsave_addr().

No functional change.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
---
Changes from v5:
* Adjusted some call sites for the new base.

Changes from v3:
* Updated the changelog. (Borislav Petkov)
* Updated the function comment to use kernel-doc style. (Borislav Petkov)

Changes from v2:
* Updated the changelog with task->fpu removed. (Borislav Petkov)

Changes from v1:
* Rebased on the upstream kernel (5.10)
---
 arch/x86/include/asm/fpu/xstate.h |  2 +-
 arch/x86/kernel/fpu/xstate.c      | 42 ++++++++++++++++++++++++-------
 arch/x86/kvm/x86.c                | 10 +++-----
 3 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index ede166e9d3f2..2451bccc6cac 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -134,7 +134,7 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 extern void __init update_regset_xstate_info(unsigned int size,
 					     u64 xstate_mask);
 
-void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
+void *get_xsave_addr(struct fpu *fpu, int xfeature_nr);
 int xfeature_size(int xfeature_nr);
 int copy_uabi_from_kernel_to_xstate(struct fpu *fpu, const void *kbuf);
 int copy_sigframe_from_user_to_xstate(struct fpu *fpu, const void __user *ubuf);
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index df39085e9d05..0a59df0c48e7 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -841,19 +841,34 @@ void fpu__resume_cpu(void)
 	}
 }
 
-/*
+/**
+ * __raw_xsave_addr - Find the address where the feature state is saved.
+ *
  * Given an xstate feature nr, calculate where in the xsave
  * buffer the state is.  Callers should ensure that the buffer
  * is valid.
+ *
+ * If @fpu is NULL, use init_fpstate.
+ *
+ * @fpu:	A struct fpu * pointer
+ *
+ * Return:	An address of the feature state in the buffer
  */
-static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
+static void *__raw_xsave_addr(struct fpu *fpu, int xfeature_nr)
 {
+	void *xsave;
+
 	if (!xfeature_enabled(xfeature_nr)) {
 		WARN_ON_FPU(1);
 		return NULL;
 	}
 
-	return (void *)xsave + xstate_comp_offsets[xfeature_nr];
+	if (fpu)
+		xsave = &fpu->state.xsave;
+	else
+		xsave = &init_fpstate.xsave;
+
+	return xsave + xstate_comp_offsets[xfeature_nr];
 }
 /*
  * Given the xsave area and a state inside, this function returns the
@@ -866,15 +881,18 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
  * this will return NULL.
  *
  * Inputs:
- *	xstate: the thread's storage area for all FPU data
+ *	fpu: the thread's FPU data to reference xstate buffer(s).
+ *	     (A null pointer parameter indicates init_fpstate.)
  *	xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
  *	XFEATURE_SSE, etc...)
  * Output:
  *	address of the state in the xsave area, or NULL if the
  *	field is not present in the xsave buffer.
  */
-void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
+void *get_xsave_addr(struct fpu *fpu, int xfeature_nr)
 {
+	struct xregs_state *xsave;
+
 	/*
 	 * Do we even *have* xsave state?
 	 */
@@ -887,6 +905,12 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
 	 */
 	WARN_ONCE(!(xfeatures_mask_all & BIT_ULL(xfeature_nr)),
 		  "get of unsupported state");
+
+	if (fpu)
+		xsave = &fpu->state.xsave;
+	else
+		xsave = &init_fpstate.xsave;
+
 	/*
 	 * This assumes the last 'xsave*' instruction to
 	 * have requested that 'xfeature_nr' be saved.
@@ -901,7 +925,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
 	if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
 		return NULL;
 
-	return __raw_xsave_addr(xsave, xfeature_nr);
+	return __raw_xsave_addr(fpu, xfeature_nr);
 }
 EXPORT_SYMBOL_GPL(get_xsave_addr);
 
@@ -1061,8 +1085,8 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
 			membuf_write(&to, &pkru, sizeof(pkru));
 		} else {
 			copy_feature(header.xfeatures & BIT_ULL(i), &to,
-				     __raw_xsave_addr(xsave, i),
-				     __raw_xsave_addr(xinit, i),
+				     __raw_xsave_addr(&tsk->thread.fpu, i),
+				     __raw_xsave_addr(NULL, i),
 				     xstate_sizes[i]);
 		}
 		/*
@@ -1129,7 +1153,7 @@ static int copy_uabi_to_xstate(struct fpu *fpu, const void *kbuf,
 		u64 mask = ((u64)1 << i);
 
 		if (hdr.xfeatures & mask) {
-			void *dst = __raw_xsave_addr(xsave, i);
+			void *dst = __raw_xsave_addr(fpu, i);
 
 			if (!dst)
 				continue;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ce529ab233d7..9c7c59317e5e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4726,7 +4726,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 			memcpy(dest + offset, &vcpu->arch.pkru,
 			       sizeof(vcpu->arch.pkru));
 		} else {
-			src = get_xsave_addr(xsave, xfeature_nr);
+			src = get_xsave_addr(vcpu->arch.guest_fpu, xfeature_nr);
 			if (src)
 				memcpy(dest + offset, src, size);
 		}
@@ -4769,7 +4769,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 			memcpy(&vcpu->arch.pkru, src + offset,
 			       sizeof(vcpu->arch.pkru));
 		} else {
-			void *dest = get_xsave_addr(xsave, xfeature_nr);
+			void *dest = get_xsave_addr(vcpu->arch.guest_fpu, xfeature_nr);
 
 			if (dest)
 				memcpy(dest, src + offset, size);
@@ -10840,12 +10840,10 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 		 */
 		if (init_event)
 			kvm_put_guest_fpu(vcpu);
-		mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
-					XFEATURE_BNDREGS);
+		mpx_state_buffer = get_xsave_addr(vcpu->arch.guest_fpu, XFEATURE_BNDREGS);
 		if (mpx_state_buffer)
 			memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
-		mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
-					XFEATURE_BNDCSR);
+		mpx_state_buffer = get_xsave_addr(vcpu->arch.guest_fpu, XFEATURE_BNDCSR);
 		if (mpx_state_buffer)
 			memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
 		if (init_event)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v10 06/28] x86/fpu/xstate: Add new variables to indicate dynamic XSTATE buffer size
       [not found] <20210825155413.19673-1-chang.seok.bae@intel.com>
  2021-08-25 15:53 ` [PATCH v10 02/28] x86/fpu/xstate: Modify the initialization helper to handle both static and dynamic buffers Chang S. Bae
  2021-08-25 15:53 ` [PATCH v10 04/28] x86/fpu/xstate: Modify address finders " Chang S. Bae
@ 2021-08-25 15:53 ` Chang S. Bae
  2021-10-01 13:32   ` Thomas Gleixner
  2021-08-25 15:53 ` [PATCH v10 08/28] x86/fpu/xstate: Convert the struct fpu 'state' field to a pointer Chang S. Bae
  2021-08-25 15:53 ` [PATCH v10 10/28] x86/fpu/xstate: Update the XSTATE save function to support dynamic states Chang S. Bae
  4 siblings, 1 reply; 18+ messages in thread
From: Chang S. Bae @ 2021-08-25 15:53 UTC (permalink / raw)
  To: bp, luto, tglx, mingo, x86
  Cc: len.brown, lenb, dave.hansen, thiago.macieira, jing2.liu,
	ravi.v.shankar, linux-kernel, chang.seok.bae, kvm

The XSTATE per-task buffer is in preparation to be dynamic for user states.
Introduce new size variables to indicate the minimum and maximum size of
the buffer. The value is determined at boot-time.

Instead of adding them as newly exported, introduce helper functions to
access them as well as the user buffer size.

No functional change. Those sizes have no difference, as the buffer is not
dynamic yet.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
---
Changes from v9:
* Remove access helpers. (Borislav Petkov)

Changes from v6:
* Massage the code comment.

Changes from v5:
* Made the new variables __ro_after_init for the new base code.
* Fixed the init_fpstate size for memset().

Changes from v3:
* Added as a new patch to add the variables along with new helpers.
  (Borislav Petkov)
---
 arch/x86/include/asm/fpu/xstate.h | 17 +++++++++++++
 arch/x86/include/asm/processor.h  | 10 +-------
 arch/x86/kernel/fpu/core.c        | 26 +++++++++++++------
 arch/x86/kernel/fpu/init.c        | 26 ++++++++-----------
 arch/x86/kernel/fpu/regset.c      |  2 +-
 arch/x86/kernel/fpu/signal.c      | 25 ++++++++++--------
 arch/x86/kernel/fpu/xstate.c      | 42 ++++++++++++++++++-------------
 arch/x86/kernel/process.c         |  7 ++++++
 arch/x86/kvm/x86.c                |  5 +++-
 9 files changed, 98 insertions(+), 62 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index bc4cba62906b..c4a0914b7717 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -136,6 +136,23 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 extern void __init update_regset_xstate_info(unsigned int size,
 					     u64 xstate_mask);
 
+/**
+ * struct fpu_xstate_buffer_config - xstate buffer configuration
+ * @max_size:			The CPUID-enumerated all-feature "maximum" size
+ *				for xstate per-task buffer.
+ * @min_size:			The size to fit into the statically-allocated
+ *				buffer. With dynamic states, this buffer no longer
+ *				contains all the enabled state components.
+ * @user_size:			The size of user-space buffer for signal and
+ *				ptrace frames, in the non-compacted format.
+ */
+struct fpu_xstate_buffer_config {
+	unsigned int min_size, max_size;
+	unsigned int user_size;
+};
+
+extern struct fpu_xstate_buffer_config fpu_buf_cfg;
+
 void *get_xsave_addr(struct fpu *fpu, int xfeature_nr);
 int xfeature_size(int xfeature_nr);
 int copy_uabi_from_kernel_to_xstate(struct fpu *fpu, const void *kbuf);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f3020c54e2cb..505f596d1046 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -459,9 +459,6 @@ DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
 DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
 #endif	/* !X86_64 */
 
-extern unsigned int fpu_kernel_xstate_size;
-extern unsigned int fpu_user_xstate_size;
-
 struct perf_event;
 
 struct thread_struct {
@@ -536,12 +533,7 @@ struct thread_struct {
 };
 
 /* Whitelist the FPU state from the task_struct for hardened usercopy. */
-static inline void arch_thread_struct_whitelist(unsigned long *offset,
-						unsigned long *size)
-{
-	*offset = offsetof(struct thread_struct, fpu.state);
-	*size = fpu_kernel_xstate_size;
-}
+extern void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size);
 
 static inline void
 native_load_sp0(unsigned long sp0)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index c0098f8422de..62cc993a890a 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -231,21 +231,30 @@ static inline void fpstate_init_fstate(struct fregs_state *fp)
 void fpstate_init(struct fpu *fpu)
 {
 	union fpregs_state *state;
+	unsigned int size;
+	u64 mask;
 
-	if (likely(fpu))
+	if (likely(fpu)) {
 		state = &fpu->state;
-	else
+		/* The dynamic user states are not prepared yet. */
+		mask = xfeatures_mask_all & ~xfeatures_mask_user_dynamic;
+		size = fpu_buf_cfg.min_size;
+	} else {
 		state = &init_fpstate;
+		mask = xfeatures_mask_all;
+		size = sizeof(init_fpstate);
+	}
 
 	if (!static_cpu_has(X86_FEATURE_FPU)) {
 		fpstate_init_soft(&state->soft);
 		return;
 	}
 
-	memset(state, 0, fpu_kernel_xstate_size);
+	memset(state, 0, size);
 
 	if (static_cpu_has(X86_FEATURE_XSAVES))
-		fpstate_init_xstate(&state->xsave, xfeatures_mask_all);
+		fpstate_init_xstate(&state->xsave, mask);
+
 	if (static_cpu_has(X86_FEATURE_FXSR))
 		fpstate_init_fxstate(&state->fxsave);
 	else
@@ -268,8 +277,11 @@ int fpu_clone(struct task_struct *dst)
 	/*
 	 * Don't let 'init optimized' areas of the XSAVE area
 	 * leak into the child task:
+	 *
+	 * The child does not inherit the dynamic states. So,
+	 * the xstate buffer has the minimum size.
 	 */
-	memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
+	memset(&dst_fpu->state.xsave, 0, fpu_buf_cfg.min_size);
 
 	/*
 	 * If the FPU registers are not owned by current just memcpy() the
@@ -278,7 +290,7 @@ int fpu_clone(struct task_struct *dst)
 	 */
 	fpregs_lock();
 	if (test_thread_flag(TIF_NEED_FPU_LOAD))
-		memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
+		memcpy(&dst_fpu->state, &src_fpu->state, fpu_buf_cfg.min_size);
 
 	else
 		save_fpregs_to_fpstate(dst_fpu);
@@ -337,7 +349,7 @@ static inline void restore_fpregs_from_init_fpstate(u64 features_mask)
 static inline unsigned int init_fpstate_copy_size(void)
 {
 	if (!use_xsave())
-		return fpu_kernel_xstate_size;
+		return fpu_buf_cfg.min_size;
 
 	/* XSAVE(S) just needs the legacy and the xstate header part */
 	return sizeof(init_fpstate.xsave);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index e14c72bc8706..da7341f95008 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -129,15 +129,6 @@ static void __init fpu__init_system_generic(void)
 	fpu__init_system_mxcsr();
 }
 
-/*
- * Size of the FPU context state. All tasks in the system use the
- * same context size, regardless of what portion they use.
- * This is inherent to the XSAVE architecture which puts all state
- * components into a single, continuous memory block:
- */
-unsigned int fpu_kernel_xstate_size __ro_after_init;
-EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
-
 /* Get alignment of the TYPE. */
 #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
 
@@ -167,8 +158,10 @@ static void __init fpu__init_task_struct_size(void)
 	/*
 	 * Add back the dynamically-calculated register state
 	 * size.
+	 *
+	 * Use the minimum size as embedded in task_struct.
 	 */
-	task_size += fpu_kernel_xstate_size;
+	task_size += fpu_buf_cfg.min_size;
 
 	/*
 	 * We dynamically size 'struct fpu', so we require that
@@ -193,6 +186,7 @@ static void __init fpu__init_task_struct_size(void)
 static void __init fpu__init_system_xstate_size_legacy(void)
 {
 	static int on_boot_cpu __initdata = 1;
+	unsigned int xstate_size;
 
 	WARN_ON_FPU(!on_boot_cpu);
 	on_boot_cpu = 0;
@@ -203,17 +197,17 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 	 */
 
 	if (!boot_cpu_has(X86_FEATURE_FPU)) {
-		fpu_kernel_xstate_size = sizeof(struct swregs_state);
+		xstate_size = sizeof(struct swregs_state);
 	} else {
 		if (boot_cpu_has(X86_FEATURE_FXSR))
-			fpu_kernel_xstate_size =
-				sizeof(struct fxregs_state);
+			xstate_size = sizeof(struct fxregs_state);
 		else
-			fpu_kernel_xstate_size =
-				sizeof(struct fregs_state);
+			xstate_size = sizeof(struct fregs_state);
 	}
 
-	fpu_user_xstate_size = fpu_kernel_xstate_size;
+	fpu_buf_cfg.min_size = xstate_size;
+	fpu_buf_cfg.max_size = xstate_size;
+	fpu_buf_cfg.user_size = xstate_size;
 }
 
 /* Legacy code to initialize eager fpu mode. */
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 49dd307003ec..80ee64183c7d 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -149,7 +149,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 	/*
 	 * A whole standard-format XSAVE buffer is needed:
 	 */
-	if (pos != 0 || count != fpu_user_xstate_size)
+	if (pos != 0 || count != fpu_buf_cfg.user_size)
 		return -EFAULT;
 
 	if (!kbuf) {
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index bec8c8046888..f5ec334c5a4e 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -36,7 +36,7 @@ static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
 	/* Check for the first magic field and other error scenarios. */
 	if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
 	    fx_sw->xstate_size < min_xstate_size ||
-	    fx_sw->xstate_size > fpu_user_xstate_size ||
+	    fx_sw->xstate_size > fpu_buf_cfg.user_size ||
 	    fx_sw->xstate_size > fx_sw->extended_size)
 		goto setfx;
 
@@ -107,7 +107,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
 		return err;
 
 	err |= __put_user(FP_XSTATE_MAGIC2,
-			  (__u32 __user *)(buf + fpu_user_xstate_size));
+			  (__u32 __user *)(buf + fpu_buf_cfg.user_size));
 
 	/*
 	 * Read the xfeatures which we copied (directly from the cpu or
@@ -144,7 +144,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
 	else
 		err = fnsave_to_user_sigframe((struct fregs_state __user *) buf);
 
-	if (unlikely(err) && __clear_user(buf, fpu_user_xstate_size))
+	if (unlikely(err) && __clear_user(buf, fpu_buf_cfg.user_size))
 		err = -EFAULT;
 	return err;
 }
@@ -205,7 +205,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 	fpregs_unlock();
 
 	if (ret) {
-		if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size))
+		if (!fault_in_pages_writeable(buf_fx, fpu_buf_cfg.user_size))
 			goto retry;
 		return -EFAULT;
 	}
@@ -304,12 +304,12 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore,
 static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
 			     bool ia32_fxstate)
 {
-	int state_size = fpu_kernel_xstate_size;
 	struct task_struct *tsk = current;
 	struct fpu *fpu = &tsk->thread.fpu;
 	struct user_i387_ia32_struct env;
 	u64 user_xfeatures = 0;
 	bool fx_only = false;
+	int state_size;
 	int ret;
 
 	if (use_xsave()) {
@@ -323,6 +323,8 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
 		state_size = fx_sw_user.xstate_size;
 		user_xfeatures = fx_sw_user.xfeatures;
 	} else {
+		/* The buffer cannot be dynamic without using XSAVE. */
+		state_size = fpu_buf_cfg.min_size;
 		user_xfeatures = XFEATURE_MASK_FPSSE;
 	}
 
@@ -418,8 +420,8 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
 }
 static inline int xstate_sigframe_size(void)
 {
-	return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE :
-			fpu_user_xstate_size;
+	return use_xsave() ? fpu_buf_cfg.user_size + FP_XSTATE_MAGIC2_SIZE :
+			fpu_buf_cfg.user_size;
 }
 
 /*
@@ -514,19 +516,20 @@ unsigned long fpu__get_fpstate_size(void)
  */
 void fpu__init_prepare_fx_sw_frame(void)
 {
-	int size = fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE;
+	int ext_size = fpu_buf_cfg.user_size + FP_XSTATE_MAGIC2_SIZE;
+	int xstate_size = fpu_buf_cfg.user_size;
 
 	fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
-	fx_sw_reserved.extended_size = size;
+	fx_sw_reserved.extended_size = ext_size;
 	fx_sw_reserved.xfeatures = xfeatures_mask_uabi();
-	fx_sw_reserved.xstate_size = fpu_user_xstate_size;
+	fx_sw_reserved.xstate_size = xstate_size;
 
 	if (IS_ENABLED(CONFIG_IA32_EMULATION) ||
 	    IS_ENABLED(CONFIG_X86_32)) {
 		int fsave_header_size = sizeof(struct fregs_state);
 
 		fx_sw_reserved_ia32 = fx_sw_reserved;
-		fx_sw_reserved_ia32.extended_size = size + fsave_header_size;
+		fx_sw_reserved_ia32.extended_size = ext_size + fsave_header_size;
 	}
 }
 
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 6a658ef913bd..058dc9df6b86 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -77,12 +77,8 @@ static unsigned int xstate_comp_offsets[XFEATURE_MAX] __ro_after_init =
 static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] __ro_after_init =
 	{ [ 0 ... XFEATURE_MAX - 1] = -1};
 
-/*
- * The XSAVE area of kernel can be in standard or compacted format;
- * it is always in standard format for user mode. This is the user
- * mode standard format size used for signal and ptrace frames.
- */
-unsigned int fpu_user_xstate_size __ro_after_init;
+struct fpu_xstate_buffer_config fpu_buf_cfg __ro_after_init;
+EXPORT_SYMBOL_GPL(fpu_buf_cfg);
 
 /*
  * Return whether the system supports a given xfeature.
@@ -595,7 +591,11 @@ static void do_extra_xstate_size_checks(void)
 		 */
 		paranoid_xstate_size += xfeature_size(i);
 	}
-	XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size);
+	/*
+	 * The size accounts for all the possible states reserved in the
+	 * per-task buffer.  Check against the maximum size.
+	 */
+	XSTATE_WARN_ON(paranoid_xstate_size != fpu_buf_cfg.max_size);
 }
 
 
@@ -690,21 +690,29 @@ static int __init init_xstate_size(void)
 	else
 		possible_xstate_size = xsave_size;
 
-	/* Ensure we have the space to store all enabled: */
-	if (!is_supported_xstate_size(possible_xstate_size))
-		return -EINVAL;
-
 	/*
-	 * The size is OK, we are definitely going to use xsave,
-	 * make it known to the world that we need more space.
+	 * The size accounts for all the possible states reserved in the
+	 * per-task buffer.  Set the maximum with this value.
 	 */
-	fpu_kernel_xstate_size = possible_xstate_size;
+	fpu_buf_cfg.max_size = possible_xstate_size;
+
+	/* Perform an extra check for the maximum size. */
 	do_extra_xstate_size_checks();
 
+	/*
+	 * Set the minimum to be the same as the maximum. The dynamic
+	 * user states are not supported yet.
+	 */
+	fpu_buf_cfg.min_size = possible_xstate_size;
+
+	/* Ensure the minimum size fits in the statically-allocated buffer: */
+	if (!is_supported_xstate_size(fpu_buf_cfg.min_size))
+		return -EINVAL;
+
 	/*
 	 * User space is always in standard format.
 	 */
-	fpu_user_xstate_size = xsave_size;
+	fpu_buf_cfg.user_size = xsave_size;
 	return 0;
 }
 
@@ -800,7 +808,7 @@ void __init fpu__init_system_xstate(void)
 	 * Update info used for ptrace frames; use standard-format size and no
 	 * supervisor xstates:
 	 */
-	update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask_uabi());
+	update_regset_xstate_info(fpu_buf_cfg.user_size, xfeatures_mask_uabi());
 
 	fpu__init_prepare_fx_sw_frame();
 	setup_init_fpu_buf();
@@ -820,7 +828,7 @@ void __init fpu__init_system_xstate(void)
 	print_xstate_offset_size();
 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 		xfeatures_mask_all,
-		fpu_kernel_xstate_size,
+		fpu_buf_cfg.max_size,
 		boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
 	return;
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 1d9463e3096b..d1ca963cb8f7 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -90,6 +90,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	return fpu_clone(dst);
 }
 
+void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
+{
+	*offset = offsetof(struct thread_struct, fpu.state);
+	/* The buffer embedded in thread_struct has the minimum size. */
+	*size = fpu_buf_cfg.min_size;
+}
+
 /*
  * Free thread data structures etc..
  */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9c7c59317e5e..04e38196da79 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9895,10 +9895,13 @@ static void kvm_save_current_fpu(struct fpu *fpu)
 	/*
 	 * If the target FPU state is not resident in the CPU registers, just
 	 * memcpy() from current, else save CPU state directly to the target.
+	 *
+	 * KVM does not support dynamic user states yet. Assume the buffer
+	 * always has the minimum size.
 	 */
 	if (test_thread_flag(TIF_NEED_FPU_LOAD))
 		memcpy(&fpu->state, &current->thread.fpu.state,
-		       fpu_kernel_xstate_size);
+		       fpu_buf_cfg.min_size);
 	else
 		save_fpregs_to_fpstate(fpu);
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v10 08/28] x86/fpu/xstate: Convert the struct fpu 'state' field to a pointer
       [not found] <20210825155413.19673-1-chang.seok.bae@intel.com>
                   ` (2 preceding siblings ...)
  2021-08-25 15:53 ` [PATCH v10 06/28] x86/fpu/xstate: Add new variables to indicate dynamic XSTATE buffer size Chang S. Bae
@ 2021-08-25 15:53 ` Chang S. Bae
  2021-08-25 15:53 ` [PATCH v10 10/28] x86/fpu/xstate: Update the XSTATE save function to support dynamic states Chang S. Bae
  4 siblings, 0 replies; 18+ messages in thread
From: Chang S. Bae @ 2021-08-25 15:53 UTC (permalink / raw)
  To: bp, luto, tglx, mingo, x86
  Cc: len.brown, lenb, dave.hansen, thiago.macieira, jing2.liu,
	ravi.v.shankar, linux-kernel, chang.seok.bae, kvm

The XSTATE per-task buffer is embedded into struct fpu. The field 'state'
represents the buffer. When the dynamic user state is in use, the buffer
may be dynamically allocated.

Convert the 'state' field to point either to the embedded buffer or to the
dynamically-allocated buffer. Also, add a new field to represent the
embedded buffer.

The initial task sets it before dealing with soft FPU. Make sure that every
FPU state has a valid pointer value on its creation.

No functional change.

Suggested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
---
Changes from v9:
* Update the code comment. (Borislav Petkov)

Changes from v5:
* Tightened up task size calculation (previously, it could over-calculate)
* Adjusted the changelog.

Changes from v4:
* Fixed KVM's user_fpu and guest_fpu to initialize the 'state' field correctly.
* Massaged the changelog.

Changes from v3:
* Added as a new patch to simplify the buffer access. (Borislav Petkov)
---
 arch/x86/include/asm/fpu/internal.h |  2 +-
 arch/x86/include/asm/fpu/types.h    | 31 +++++++++++++++++++++-------
 arch/x86/include/asm/trace/fpu.h    |  4 ++--
 arch/x86/kernel/fpu/core.c          | 32 +++++++++++++++--------------
 arch/x86/kernel/fpu/init.c          |  8 +++++---
 arch/x86/kernel/fpu/regset.c        | 24 +++++++++++-----------
 arch/x86/kernel/fpu/signal.c        | 24 +++++++++++-----------
 arch/x86/kernel/fpu/xstate.c        |  8 ++++----
 arch/x86/kernel/process.c           |  2 +-
 arch/x86/kvm/x86.c                  | 22 +++++++++++---------
 arch/x86/math-emu/fpu_aux.c         |  2 +-
 arch/x86/math-emu/fpu_entry.c       |  4 ++--
 arch/x86/math-emu/fpu_system.h      |  2 +-
 13 files changed, 94 insertions(+), 71 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index c7a64e2806a9..d2fc19c0e457 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -484,7 +484,7 @@ static inline void fpregs_restore_userregs(void)
 		 */
 		mask = xfeatures_mask_restore_user() |
 			xfeatures_mask_supervisor();
-		__restore_fpregs_from_fpstate(&fpu->state, mask);
+		__restore_fpregs_from_fpstate(fpu->state, mask);
 
 		fpregs_activate(fpu);
 		fpu->last_cpu = cpu;
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index f5a38a5f3ae1..ad5cbf922e30 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -339,15 +339,32 @@ struct fpu {
 	/*
 	 * @state:
 	 *
-	 * In-memory copy of all FPU registers that we save/restore
-	 * over context switches. If the task is using the FPU then
-	 * the registers in the FPU are more recent than this state
-	 * copy. If the task context-switches away then they get
-	 * saved here and represent the FPU state.
+	 * A pointer to indicate the in-memory copy of all FPU registers
+	 * that are saved/restored over context switches.
+	 *
+	 * Initially @state points to @__default_state. When dynamic states
+	 * get used, a memory is allocated for the larger state copy and
+	 * @state is updated to point to it. Then, the state in ->state
+	 * supersedes and invalidates the state in @__default_state.
+	 *
+	 * In general, if the task is using the FPU then the registers in
+	 * the FPU are more recent than the state copy. If the task
+	 * context-switches away then they get saved in ->state and
+	 * represent the FPU state.
+	 */
+	union fpregs_state		*state;
+
+	/*
+	 * @__default_state:
+	 *
+	 * Initial in-memory copy of all FPU registers that saved/restored
+	 * over context switches. When the task is switched to dynamic
+	 * states, this copy is replaced with the new in-memory copy in
+	 * ->state.
 	 */
-	union fpregs_state		state;
+	union fpregs_state		__default_state;
 	/*
-	 * WARNING: 'state' is dynamically-sized.  Do not put
+	 * WARNING: '__default_state' is dynamically-sized.  Do not put
 	 * anything after it here.
 	 */
 };
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 879b77792f94..ef82f4824ce7 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -22,8 +22,8 @@ DECLARE_EVENT_CLASS(x86_fpu,
 		__entry->fpu		= fpu;
 		__entry->load_fpu	= test_thread_flag(TIF_NEED_FPU_LOAD);
 		if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
-			__entry->xfeatures = fpu->state.xsave.header.xfeatures;
-			__entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
+			__entry->xfeatures = fpu->state->xsave.header.xfeatures;
+			__entry->xcomp_bv  = fpu->state->xsave.header.xcomp_bv;
 		}
 	),
 	TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx",
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 62cc993a890a..6b55b8c651f6 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -99,19 +99,19 @@ EXPORT_SYMBOL(irq_fpu_usable);
 void save_fpregs_to_fpstate(struct fpu *fpu)
 {
 	if (likely(use_xsave())) {
-		os_xsave(&fpu->state.xsave);
+		os_xsave(&fpu->state->xsave);
 
 		/*
 		 * AVX512 state is tracked here because its use is
 		 * known to slow the max clock speed of the core.
 		 */
-		if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
+		if (fpu->state->xsave.header.xfeatures & XFEATURE_MASK_AVX512)
 			fpu->avx512_timestamp = jiffies;
 		return;
 	}
 
 	if (likely(use_fxsr())) {
-		fxsave(&fpu->state.fxsave);
+		fxsave(&fpu->state->fxsave);
 		return;
 	}
 
@@ -119,8 +119,8 @@ void save_fpregs_to_fpstate(struct fpu *fpu)
 	 * Legacy FPU register saving, FNSAVE always clears FPU registers,
 	 * so we have to reload them from the memory state.
 	 */
-	asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
-	frstor(&fpu->state.fsave);
+	asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state->fsave));
+	frstor(&fpu->state->fsave);
 }
 EXPORT_SYMBOL(save_fpregs_to_fpstate);
 
@@ -235,7 +235,7 @@ void fpstate_init(struct fpu *fpu)
 	u64 mask;
 
 	if (likely(fpu)) {
-		state = &fpu->state;
+		state = fpu->state;
 		/* The dynamic user states are not prepared yet. */
 		mask = xfeatures_mask_all & ~xfeatures_mask_user_dynamic;
 		size = fpu_buf_cfg.min_size;
@@ -274,6 +274,8 @@ int fpu_clone(struct task_struct *dst)
 	if (!cpu_feature_enabled(X86_FEATURE_FPU))
 		return 0;
 
+	dst_fpu->state = &dst_fpu->__default_state;
+
 	/*
 	 * Don't let 'init optimized' areas of the XSAVE area
 	 * leak into the child task:
@@ -281,7 +283,7 @@ int fpu_clone(struct task_struct *dst)
 	 * The child does not inherit the dynamic states. So,
 	 * the xstate buffer has the minimum size.
 	 */
-	memset(&dst_fpu->state.xsave, 0, fpu_buf_cfg.min_size);
+	memset(&dst_fpu->state->xsave, 0, fpu_buf_cfg.min_size);
 
 	/*
 	 * If the FPU registers are not owned by current just memcpy() the
@@ -290,7 +292,7 @@ int fpu_clone(struct task_struct *dst)
 	 */
 	fpregs_lock();
 	if (test_thread_flag(TIF_NEED_FPU_LOAD))
-		memcpy(&dst_fpu->state, &src_fpu->state, fpu_buf_cfg.min_size);
+		memcpy(dst_fpu->state, src_fpu->state, fpu_buf_cfg.min_size);
 
 	else
 		save_fpregs_to_fpstate(dst_fpu);
@@ -377,7 +379,7 @@ static void fpu_reset_fpstate(void)
 	 * user space as PKRU is eagerly written in switch_to() and
 	 * flush_thread().
 	 */
-	memcpy(&fpu->state, &init_fpstate, init_fpstate_copy_size());
+	memcpy(fpu->state, &init_fpstate, init_fpstate_copy_size());
 	set_thread_flag(TIF_NEED_FPU_LOAD);
 	fpregs_unlock();
 }
@@ -404,7 +406,7 @@ void fpu__clear_user_states(struct fpu *fpu)
 	 */
 	if (xfeatures_mask_supervisor() &&
 	    !fpregs_state_valid(fpu, smp_processor_id())) {
-		os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor());
+		os_xrstor(&fpu->state->xsave, xfeatures_mask_supervisor());
 	}
 
 	/* Reset user states in registers. */
@@ -486,11 +488,11 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
 		 * fully reproduce the context of the exception.
 		 */
 		if (boot_cpu_has(X86_FEATURE_FXSR)) {
-			cwd = fpu->state.fxsave.cwd;
-			swd = fpu->state.fxsave.swd;
+			cwd = fpu->state->fxsave.cwd;
+			swd = fpu->state->fxsave.swd;
 		} else {
-			cwd = (unsigned short)fpu->state.fsave.cwd;
-			swd = (unsigned short)fpu->state.fsave.swd;
+			cwd = (unsigned short)fpu->state->fsave.cwd;
+			swd = (unsigned short)fpu->state->fsave.swd;
 		}
 
 		err = swd & ~cwd;
@@ -504,7 +506,7 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
 		unsigned short mxcsr = MXCSR_DEFAULT;
 
 		if (boot_cpu_has(X86_FEATURE_XMM))
-			mxcsr = fpu->state.fxsave.mxcsr;
+			mxcsr = fpu->state->fxsave.mxcsr;
 
 		err = ~(mxcsr >> 7) & mxcsr;
 	}
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index da7341f95008..cd1f3114f3ca 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -31,10 +31,12 @@ static void fpu__init_cpu_generic(void)
 		cr0 |= X86_CR0_EM;
 	write_cr0(cr0);
 
+	current->thread.fpu.state = &current->thread.fpu.__default_state;
+
 	/* Flush out any pending x87 state: */
 #ifdef CONFIG_MATH_EMULATION
 	if (!boot_cpu_has(X86_FEATURE_FPU))
-		fpstate_init_soft(&current->thread.fpu.state.soft);
+		fpstate_init_soft(&current->thread.fpu.state->soft);
 	else
 #endif
 		asm volatile ("fninit");
@@ -153,7 +155,7 @@ static void __init fpu__init_task_struct_size(void)
 	 * Subtract off the static size of the register state.
 	 * It potentially has a bunch of padding.
 	 */
-	task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state);
+	task_size -= sizeof(((struct task_struct *)0)->thread.fpu.__default_state);
 
 	/*
 	 * Add back the dynamically-calculated register state
@@ -170,7 +172,7 @@ static void __init fpu__init_task_struct_size(void)
 	 * you hit a compile error here, check the structure to
 	 * see if something got added to the end.
 	 */
-	CHECK_MEMBER_AT_END_OF(struct fpu, state);
+	CHECK_MEMBER_AT_END_OF(struct fpu, __default_state);
 	CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
 	CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
 
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 80ee64183c7d..7ea10f98c2b0 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -74,8 +74,8 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
 	sync_fpstate(fpu);
 
 	if (!use_xsave()) {
-		return membuf_write(&to, &fpu->state.fxsave,
-				    sizeof(fpu->state.fxsave));
+		return membuf_write(&to, &fpu->state->fxsave,
+				    sizeof(fpu->state->fxsave));
 	}
 
 	copy_xstate_to_uabi_buf(to, target, XSTATE_COPY_FX);
@@ -110,15 +110,15 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
 	fpu_force_restore(fpu);
 
 	/* Copy the state  */
-	memcpy(&fpu->state.fxsave, &newstate, sizeof(newstate));
+	memcpy(&fpu->state->fxsave, &newstate, sizeof(newstate));
 
 	/* Clear xmm8..15 */
-	BUILD_BUG_ON(sizeof(fpu->state.fxsave.xmm_space) != 16 * 16);
-	memset(&fpu->state.fxsave.xmm_space[8], 0, 8 * 16);
+	BUILD_BUG_ON(sizeof(fpu->state->fxsave.xmm_space) != 16 * 16);
+	memset(&fpu->state->fxsave.xmm_space[8], 0, 8 * 16);
 
 	/* Mark FP and SSE as in use when XSAVE is enabled */
 	if (use_xsave())
-		fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
+		fpu->state->xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
 
 	return 0;
 }
@@ -283,7 +283,7 @@ static void __convert_from_fxsr(struct user_i387_ia32_struct *env,
 void
 convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
 {
-	__convert_from_fxsr(env, tsk, &tsk->thread.fpu.state.fxsave);
+	__convert_from_fxsr(env, tsk, &tsk->thread.fpu.state->fxsave);
 }
 
 void convert_to_fxsr(struct fxregs_state *fxsave,
@@ -326,7 +326,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 		return fpregs_soft_get(target, regset, to);
 
 	if (!cpu_feature_enabled(X86_FEATURE_FXSR)) {
-		return membuf_write(&to, &fpu->state.fsave,
+		return membuf_write(&to, &fpu->state->fsave,
 				    sizeof(struct fregs_state));
 	}
 
@@ -337,7 +337,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 		copy_xstate_to_uabi_buf(mb, target, XSTATE_COPY_FP);
 		fx = &fxsave;
 	} else {
-		fx = &fpu->state.fxsave;
+		fx = &fpu->state->fxsave;
 	}
 
 	__convert_from_fxsr(&env, target, fx);
@@ -366,16 +366,16 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	fpu_force_restore(fpu);
 
 	if (cpu_feature_enabled(X86_FEATURE_FXSR))
-		convert_to_fxsr(&fpu->state.fxsave, &env);
+		convert_to_fxsr(&fpu->state->fxsave, &env);
 	else
-		memcpy(&fpu->state.fsave, &env, sizeof(env));
+		memcpy(&fpu->state->fsave, &env, sizeof(env));
 
 	/*
 	 * Update the header bit in the xsave header, indicating the
 	 * presence of FP.
 	 */
 	if (cpu_feature_enabled(X86_FEATURE_XSAVE))
-		fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP;
+		fpu->state->xsave.header.xfeatures |= XFEATURE_MASK_FP;
 
 	return 0;
 }
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index f5ec334c5a4e..8b333b1a4d07 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -67,13 +67,13 @@ static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
 static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
 {
 	if (use_fxsr()) {
-		struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
+		struct xregs_state *xsave = &tsk->thread.fpu.state->xsave;
 		struct user_i387_ia32_struct env;
 		struct _fpstate_32 __user *fp = buf;
 
 		fpregs_lock();
 		if (!test_thread_flag(TIF_NEED_FPU_LOAD))
-			fxsave(&tsk->thread.fpu.state.fxsave);
+			fxsave(&tsk->thread.fpu.state->fxsave);
 		fpregs_unlock();
 
 		convert_from_fxsr(&env, tsk);
@@ -294,7 +294,7 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore,
 	 * been restored from a user buffer directly.
 	 */
 	if (test_thread_flag(TIF_NEED_FPU_LOAD) && xfeatures_mask_supervisor())
-		os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor());
+		os_xrstor(&fpu->state->xsave, xfeatures_mask_supervisor());
 
 	fpregs_mark_activate();
 	fpregs_unlock();
@@ -365,7 +365,7 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
 		 * the right place in memory. It's ia32 mode. Shrug.
 		 */
 		if (xfeatures_mask_supervisor())
-			os_xsave(&fpu->state.xsave);
+			os_xsave(&fpu->state->xsave);
 		set_thread_flag(TIF_NEED_FPU_LOAD);
 	}
 	__fpu_invalidate_fpregs_state(fpu);
@@ -377,21 +377,21 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
 		if (ret)
 			return ret;
 	} else {
-		if (__copy_from_user(&fpu->state.fxsave, buf_fx,
-				     sizeof(fpu->state.fxsave)))
+		if (__copy_from_user(&fpu->state->fxsave, buf_fx,
+				     sizeof(fpu->state->fxsave)))
 			return -EFAULT;
 
 		/* Reject invalid MXCSR values. */
-		if (fpu->state.fxsave.mxcsr & ~mxcsr_feature_mask)
+		if (fpu->state->fxsave.mxcsr & ~mxcsr_feature_mask)
 			return -EINVAL;
 
 		/* Enforce XFEATURE_MASK_FPSSE when XSAVE is enabled */
 		if (use_xsave())
-			fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
+			fpu->state->xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
 	}
 
 	/* Fold the legacy FP storage */
-	convert_to_fxsr(&fpu->state.fxsave, &env);
+	convert_to_fxsr(&fpu->state->fxsave, &env);
 
 	fpregs_lock();
 	if (use_xsave()) {
@@ -406,10 +406,10 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
 		 */
 		u64 mask = user_xfeatures | xfeatures_mask_supervisor();
 
-		fpu->state.xsave.header.xfeatures &= mask;
-		ret = os_xrstor_safe(&fpu->state.xsave, xfeatures_mask_all);
+		fpu->state->xsave.header.xfeatures &= mask;
+		ret = os_xrstor_safe(&fpu->state->xsave, xfeatures_mask_all);
 	} else {
-		ret = fxrstor_safe(&fpu->state.fxsave);
+		ret = fxrstor_safe(&fpu->state->fxsave);
 	}
 
 	if (likely(!ret))
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 2e474fbdc241..4496750208a8 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -882,7 +882,7 @@ static void *__raw_xsave_addr(struct fpu *fpu, int xfeature_nr)
 	}
 
 	if (fpu)
-		xsave = &fpu->state.xsave;
+		xsave = &fpu->state->xsave;
 	else
 		xsave = &init_fpstate.xsave;
 
@@ -925,7 +925,7 @@ void *get_xsave_addr(struct fpu *fpu, int xfeature_nr)
 		  "get of unsupported state");
 
 	if (fpu)
-		xsave = &fpu->state.xsave;
+		xsave = &fpu->state->xsave;
 	else
 		xsave = &init_fpstate.xsave;
 
@@ -1017,7 +1017,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
 			     enum xstate_copy_mode copy_mode)
 {
 	const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
-	struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
+	struct xregs_state *xsave = &tsk->thread.fpu.state->xsave;
 	struct xregs_state *xinit = &init_fpstate.xsave;
 	struct xstate_header header;
 	unsigned int zerofrom;
@@ -1134,7 +1134,7 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
 static int copy_uabi_to_xstate(struct fpu *fpu, const void *kbuf,
 			       const void __user *ubuf)
 {
-	struct xregs_state *xsave = &fpu->state.xsave;
+	struct xregs_state *xsave = &fpu->state->xsave;
 	unsigned int offset, size;
 	struct xstate_header hdr;
 	u64 mask;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index d1ca963cb8f7..33f5d8d07367 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -92,7 +92,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 
 void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
 {
-	*offset = offsetof(struct thread_struct, fpu.state);
+	*offset = offsetof(struct thread_struct, fpu.__default_state);
 	/* The buffer embedded in thread_struct has the minimum size. */
 	*size = fpu_buf_cfg.min_size;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 04e38196da79..74dde635df40 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4694,7 +4694,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
 
 static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 {
-	struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
+	struct xregs_state *xsave = &vcpu->arch.guest_fpu->state->xsave;
 	u64 xstate_bv = xsave->header.xfeatures;
 	u64 valid;
 
@@ -4737,7 +4737,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 
 static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 {
-	struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
+	struct xregs_state *xsave = &vcpu->arch.guest_fpu->state->xsave;
 	u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
 	u64 valid;
 
@@ -4790,7 +4790,7 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 		fill_xsave((u8 *) guest_xsave->region, vcpu);
 	} else {
 		memcpy(guest_xsave->region,
-			&vcpu->arch.guest_fpu->state.fxsave,
+			&vcpu->arch.guest_fpu->state->fxsave,
 			sizeof(struct fxregs_state));
 		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
 			XFEATURE_MASK_FPSSE;
@@ -4824,7 +4824,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 		if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
 			mxcsr & ~mxcsr_feature_mask)
 			return -EINVAL;
-		memcpy(&vcpu->arch.guest_fpu->state.fxsave,
+		memcpy(&vcpu->arch.guest_fpu->state->fxsave,
 			guest_xsave->region, sizeof(struct fxregs_state));
 	}
 	return 0;
@@ -9900,7 +9900,7 @@ static void kvm_save_current_fpu(struct fpu *fpu)
 	 * always has the minimum size.
 	 */
 	if (test_thread_flag(TIF_NEED_FPU_LOAD))
-		memcpy(&fpu->state, &current->thread.fpu.state,
+		memcpy(fpu->state, current->thread.fpu.state,
 		       fpu_buf_cfg.min_size);
 	else
 		save_fpregs_to_fpstate(fpu);
@@ -9919,7 +9919,7 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 	 */
 	if (vcpu->arch.guest_fpu)
 		/* PKRU is separately restored in kvm_x86_ops.run. */
-		__restore_fpregs_from_fpstate(&vcpu->arch.guest_fpu->state,
+		__restore_fpregs_from_fpstate(vcpu->arch.guest_fpu->state,
 					~XFEATURE_MASK_PKRU);
 
 	fpregs_mark_activate();
@@ -9940,7 +9940,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.guest_fpu)
 		kvm_save_current_fpu(vcpu->arch.guest_fpu);
 
-	restore_fpregs_from_fpstate(&vcpu->arch.user_fpu->state);
+	restore_fpregs_from_fpstate(vcpu->arch.user_fpu->state);
 
 	fpregs_mark_activate();
 	fpregs_unlock();
@@ -10529,7 +10529,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 
 	vcpu_load(vcpu);
 
-	fxsave = &vcpu->arch.guest_fpu->state.fxsave;
+	fxsave = &vcpu->arch.guest_fpu->state->fxsave;
 	memcpy(fpu->fpr, fxsave->st_space, 128);
 	fpu->fcw = fxsave->cwd;
 	fpu->fsw = fxsave->swd;
@@ -10552,7 +10552,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 
 	vcpu_load(vcpu);
 
-	fxsave = &vcpu->arch.guest_fpu->state.fxsave;
+	fxsave = &vcpu->arch.guest_fpu->state->fxsave;
 
 	memcpy(fxsave->st_space, fpu->fpr, 128);
 	fxsave->cwd = fpu->fcw;
@@ -10613,7 +10613,7 @@ static void fx_init(struct kvm_vcpu *vcpu)
 
 	fpstate_init(vcpu->arch.guest_fpu);
 	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
+		vcpu->arch.guest_fpu->state->xsave.header.xcomp_bv =
 			host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
 	/*
@@ -10693,6 +10693,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 		pr_err("kvm: failed to allocate userspace's fpu\n");
 		goto free_emulate_ctxt;
 	}
+	vcpu->arch.user_fpu->state = &vcpu->arch.user_fpu->__default_state;
 
 	vcpu->arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
 						 GFP_KERNEL_ACCOUNT);
@@ -10700,6 +10701,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 		pr_err("kvm: failed to allocate vcpu's fpu\n");
 		goto free_user_fpu;
 	}
+	vcpu->arch.guest_fpu->state = &vcpu->arch.guest_fpu->__default_state;
 	fx_init(vcpu);
 
 	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c
index 034748459482..51432a73024c 100644
--- a/arch/x86/math-emu/fpu_aux.c
+++ b/arch/x86/math-emu/fpu_aux.c
@@ -53,7 +53,7 @@ void fpstate_init_soft(struct swregs_state *soft)
 
 void finit(void)
 {
-	fpstate_init_soft(&current->thread.fpu.state.soft);
+	fpstate_init_soft(&current->thread.fpu.state->soft);
 }
 
 /*
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 8679a9d6c47f..6ba56632170e 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -640,7 +640,7 @@ int fpregs_soft_set(struct task_struct *target,
 		    unsigned int pos, unsigned int count,
 		    const void *kbuf, const void __user *ubuf)
 {
-	struct swregs_state *s387 = &target->thread.fpu.state.soft;
+	struct swregs_state *s387 = &target->thread.fpu.state->soft;
 	void *space = s387->st_space;
 	int ret;
 	int offset, other, i, tags, regnr, tag, newtop;
@@ -691,7 +691,7 @@ int fpregs_soft_get(struct task_struct *target,
 		    const struct user_regset *regset,
 		    struct membuf to)
 {
-	struct swregs_state *s387 = &target->thread.fpu.state.soft;
+	struct swregs_state *s387 = &target->thread.fpu.state->soft;
 	const void *space = s387->st_space;
 	int offset = (S387->ftop & 7) * 10, other = 80 - offset;
 
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index 9b41391867dc..a6291ddfdda6 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -73,7 +73,7 @@ static inline bool seg_writable(struct desc_struct *d)
 	return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_WRITABLE;
 }
 
-#define I387			(&current->thread.fpu.state)
+#define I387			(current->thread.fpu.state)
 #define FPU_info		(I387->soft.info)
 
 #define FPU_CS			(*(unsigned short *) &(FPU_info->regs->cs))
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v10 10/28] x86/fpu/xstate: Update the XSTATE save function to support dynamic states
       [not found] <20210825155413.19673-1-chang.seok.bae@intel.com>
                   ` (3 preceding siblings ...)
  2021-08-25 15:53 ` [PATCH v10 08/28] x86/fpu/xstate: Convert the struct fpu 'state' field to a pointer Chang S. Bae
@ 2021-08-25 15:53 ` Chang S. Bae
  2021-10-01 15:41   ` Thomas Gleixner
  4 siblings, 1 reply; 18+ messages in thread
From: Chang S. Bae @ 2021-08-25 15:53 UTC (permalink / raw)
  To: bp, luto, tglx, mingo, x86
  Cc: len.brown, lenb, dave.hansen, thiago.macieira, jing2.liu,
	ravi.v.shankar, linux-kernel, chang.seok.bae, kvm

Extend os_xsave() to receive a mask argument of which states to save, in
preparation for dynamic user state handling.

Update KVM to set a valid fpu->state_mask, so it can continue to share with
the core code.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
---
Changes from v5:
* Adjusted the changelog and code for the new base code.

Changes from v3:
* Updated the changelog. (Borislav Petkov)
* Made the code change more reviewable.

Changes from v2:
* Updated the changelog to clarify the KVM code changes.
---
 arch/x86/include/asm/fpu/internal.h | 3 +--
 arch/x86/kernel/fpu/core.c          | 2 +-
 arch/x86/kernel/fpu/signal.c        | 2 +-
 arch/x86/kvm/x86.c                  | 9 +++++++--
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index d2fc19c0e457..263e349ff85a 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -298,9 +298,8 @@ static inline void os_xrstor_booting(struct xregs_state *xstate)
  * Uses either XSAVE or XSAVEOPT or XSAVES depending on the CPU features
  * and command line options. The choice is permanent until the next reboot.
  */
-static inline void os_xsave(struct xregs_state *xstate)
+static inline void os_xsave(struct xregs_state *xstate, u64 mask)
 {
-	u64 mask = xfeatures_mask_all;
 	u32 lmask = mask;
 	u32 hmask = mask >> 32;
 	int err;
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 2941d03912db..164e75c37dbb 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -99,7 +99,7 @@ EXPORT_SYMBOL(irq_fpu_usable);
 void save_fpregs_to_fpstate(struct fpu *fpu)
 {
 	if (likely(use_xsave())) {
-		os_xsave(&fpu->state->xsave);
+		os_xsave(&fpu->state->xsave, fpu->state_mask);
 
 		/*
 		 * AVX512 state is tracked here because its use is
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 8b333b1a4d07..fe2732db6d6b 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -365,7 +365,7 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
 		 * the right place in memory. It's ia32 mode. Shrug.
 		 */
 		if (xfeatures_mask_supervisor())
-			os_xsave(&fpu->state->xsave);
+			os_xsave(&fpu->state->xsave, fpu->state_mask);
 		set_thread_flag(TIF_NEED_FPU_LOAD);
 	}
 	__fpu_invalidate_fpregs_state(fpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 74dde635df40..7c46747f6865 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9899,11 +9899,16 @@ static void kvm_save_current_fpu(struct fpu *fpu)
 	 * KVM does not support dynamic user states yet. Assume the buffer
 	 * always has the minimum size.
 	 */
-	if (test_thread_flag(TIF_NEED_FPU_LOAD))
+	if (test_thread_flag(TIF_NEED_FPU_LOAD)) {
 		memcpy(fpu->state, current->thread.fpu.state,
 		       fpu_buf_cfg.min_size);
-	else
+	} else {
+		struct fpu *src_fpu = &current->thread.fpu;
+
+		if (fpu->state_mask != src_fpu->state_mask)
+			fpu->state_mask = src_fpu->state_mask;
 		save_fpregs_to_fpstate(fpu);
+	}
 }
 
 /* Swap (qemu) user FPU context for the guest FPU context. */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v10 02/28] x86/fpu/xstate: Modify the initialization helper to handle both static and dynamic buffers
  2021-08-25 15:53 ` [PATCH v10 02/28] x86/fpu/xstate: Modify the initialization helper to handle both static and dynamic buffers Chang S. Bae
@ 2021-10-01 12:45   ` Thomas Gleixner
  2021-10-03 22:35     ` Bae, Chang Seok
  0 siblings, 1 reply; 18+ messages in thread
From: Thomas Gleixner @ 2021-10-01 12:45 UTC (permalink / raw)
  To: Chang S. Bae, bp, luto, mingo, x86
  Cc: len.brown, lenb, dave.hansen, thiago.macieira, jing2.liu,
	ravi.v.shankar, linux-kernel, chang.seok.bae, kvm

Chang,

On Wed, Aug 25 2021 at 08:53, Chang S. Bae wrote:
> Have the function initializing the XSTATE buffer take a struct fpu *
> pointer in preparation for dynamic state buffer support.
>
> init_fpstate is a special case, which is indicated by a null pointer
> parameter to fpstate_init().
>
> Also, fpstate_init_xstate() now accepts the state component bitmap to
> customize the compacted format.

That's not a changelog. Changelogs have to explain the WHY not the WHAT.

I can see the WHY when I look at the later changes, but that's not how
it works.

Also the subject of this patch is just wrong. It does not make the
functions handle dynamic buffers, it prepares them to add support for
that later.

> +static inline void fpstate_init_xstate(struct xregs_state *xsave, u64 mask)
> +{
> +	/*
> +	 * XRSTORS requires these bits set in xcomp_bv, or it will
> +	 * trigger #GP:
> +	 */
> +	xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | mask;
> +}

This wants to be a separate cleanup patch which replaces the open coded
variant here:

> diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
> index fc1d529547e6..0fed7fbcf2e8 100644
> --- a/arch/x86/kernel/fpu/xstate.c
> +++ b/arch/x86/kernel/fpu/xstate.c
> @@ -395,8 +395,7 @@ static void __init setup_init_fpu_buf(void)
>  	print_xstate_features();
>  
>  	if (boot_cpu_has(X86_FEATURE_XSAVES))
> -		init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
> -						     xfeatures_mask_all;
> +		fpstate_init_xstate(&init_fpstate.xsave, xfeatures_mask_all);

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v10 04/28] x86/fpu/xstate: Modify address finders to handle both static and dynamic buffers
  2021-08-25 15:53 ` [PATCH v10 04/28] x86/fpu/xstate: Modify address finders " Chang S. Bae
@ 2021-10-01 13:15   ` Thomas Gleixner
  2021-10-03 22:35     ` Bae, Chang Seok
  0 siblings, 1 reply; 18+ messages in thread
From: Thomas Gleixner @ 2021-10-01 13:15 UTC (permalink / raw)
  To: Chang S. Bae, bp, luto, mingo, x86
  Cc: len.brown, lenb, dave.hansen, thiago.macieira, jing2.liu,
	ravi.v.shankar, linux-kernel, chang.seok.bae, kvm

Chang,

On Wed, Aug 25 2021 at 08:53, Chang S. Bae wrote:

> Have all the functions finding XSTATE address take a struct fpu * pointer
> in preparation for dynamic state buffer support.
>
> init_fpstate is a special case, which is indicated by a null pointer
> parameter to get_xsave_addr() and __raw_xsave_addr().

Same comment vs. subject. Prepare ...

> +	if (fpu)
> +		xsave = &fpu->state.xsave;
> +	else
> +		xsave = &init_fpstate.xsave;
> +
> +	return xsave + xstate_comp_offsets[xfeature_nr];

So you have the same conditionals and the same comments vs. that NULL
pointer oddity how many times now all over the place?

That can be completely avoided:

Patch 1:

-union fpregs_state init_fpstate __ro_after_init;
+static union fpregs_state init_fpstate __ro_after_init;
+struct fpu init_fpu = { .state = &init_fpstate } __ro_after_init;

and make all users of init_fpstate access it through init_fpu.

Patches 2..N which change arguments from fpregs_state to fpu:

-	fun(init_fpu->state);
+	fun(&init_fpu);

Patch M which adds state_mask:

@fpu__init_system_xstate()
+	init_fpu.state_mask = xfeatures_mask_all;

Hmm?

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v10 06/28] x86/fpu/xstate: Add new variables to indicate dynamic XSTATE buffer size
  2021-08-25 15:53 ` [PATCH v10 06/28] x86/fpu/xstate: Add new variables to indicate dynamic XSTATE buffer size Chang S. Bae
@ 2021-10-01 13:32   ` Thomas Gleixner
  2021-10-03 22:36     ` Bae, Chang Seok
  0 siblings, 1 reply; 18+ messages in thread
From: Thomas Gleixner @ 2021-10-01 13:32 UTC (permalink / raw)
  To: Chang S. Bae, bp, luto, mingo, x86
  Cc: len.brown, lenb, dave.hansen, thiago.macieira, jing2.liu,
	ravi.v.shankar, linux-kernel, chang.seok.bae, kvm

On Wed, Aug 25 2021 at 08:53, Chang S. Bae wrote:
> +/**
> + * struct fpu_xstate_buffer_config - xstate buffer configuration
> + * @max_size:			The CPUID-enumerated all-feature "maximum" size
> + *				for xstate per-task buffer.
> + * @min_size:			The size to fit into the statically-allocated
> + *				buffer. With dynamic states, this buffer no longer
> + *				contains all the enabled state components.
> + * @user_size:			The size of user-space buffer for signal and
> + *				ptrace frames, in the non-compacted format.
> + */

>  void fpstate_init(struct fpu *fpu)
>  {
>  	union fpregs_state *state;
> +	unsigned int size;
> +	u64 mask;
>  
> -	if (likely(fpu))
> +	if (likely(fpu)) {
>  		state = &fpu->state;
> -	else
> +		/* The dynamic user states are not prepared yet. */
> +		mask = xfeatures_mask_all & ~xfeatures_mask_user_dynamic;

The patch ordering is really odd. Why aren't you adding

     fpu->state_mask
and
     fpu->state_size

first and initialize state_mask and state_size to the fixed mode and
then add the dynamic sizing on top?

>  	/*
>  	 * If the target FPU state is not resident in the CPU registers, just
>  	 * memcpy() from current, else save CPU state directly to the target.
> +	 *
> +	 * KVM does not support dynamic user states yet. Assume the buffer
> +	 * always has the minimum size.
>  	 */
>  	if (test_thread_flag(TIF_NEED_FPU_LOAD))
>  		memcpy(&fpu->state, &current->thread.fpu.state,
> -		       fpu_kernel_xstate_size);
> +		       fpu_buf_cfg.min_size);

Which completely avoids the export of fpu_buf_cfg for KVM because the
information is just available via struc fpu. As a bonus the export of
fpu_kernel_xstate_size can be removed as well.

Hmm?

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v10 10/28] x86/fpu/xstate: Update the XSTATE save function to support dynamic states
  2021-08-25 15:53 ` [PATCH v10 10/28] x86/fpu/xstate: Update the XSTATE save function to support dynamic states Chang S. Bae
@ 2021-10-01 15:41   ` Thomas Gleixner
  2021-10-02 21:31     ` Thomas Gleixner
  0 siblings, 1 reply; 18+ messages in thread
From: Thomas Gleixner @ 2021-10-01 15:41 UTC (permalink / raw)
  To: Chang S. Bae, bp, luto, mingo, x86
  Cc: len.brown, lenb, dave.hansen, thiago.macieira, jing2.liu,
	ravi.v.shankar, linux-kernel, chang.seok.bae, kvm

On Wed, Aug 25 2021 at 08:53, Chang S. Bae wrote:
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 74dde635df40..7c46747f6865 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9899,11 +9899,16 @@ static void kvm_save_current_fpu(struct fpu *fpu)
>  	 * KVM does not support dynamic user states yet. Assume the buffer
>  	 * always has the minimum size.
>  	 */
> -	if (test_thread_flag(TIF_NEED_FPU_LOAD))
> +	if (test_thread_flag(TIF_NEED_FPU_LOAD)) {
>  		memcpy(fpu->state, current->thread.fpu.state,
>  		       fpu_buf_cfg.min_size);

What happens with the rest of the state?

> -	else
> +	} else {
> +		struct fpu *src_fpu = &current->thread.fpu;
> +
> +		if (fpu->state_mask != src_fpu->state_mask)
> +			fpu->state_mask = src_fpu->state_mask;

What guarantees that the state size of @fpu is big enough when src_fpu
has dynamic features included?

>  		save_fpregs_to_fpstate(fpu);

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v10 10/28] x86/fpu/xstate: Update the XSTATE save function to support dynamic states
  2021-10-01 15:41   ` Thomas Gleixner
@ 2021-10-02 21:31     ` Thomas Gleixner
  2021-10-02 22:54       ` Bae, Chang Seok
  2021-10-05  7:50       ` Paolo Bonzini
  0 siblings, 2 replies; 18+ messages in thread
From: Thomas Gleixner @ 2021-10-02 21:31 UTC (permalink / raw)
  To: Chang S. Bae, bp, luto, mingo, x86
  Cc: len.brown, lenb, dave.hansen, thiago.macieira, jing2.liu,
	ravi.v.shankar, linux-kernel, chang.seok.bae, kvm, Paolo Bonzini

On Fri, Oct 01 2021 at 17:41, Thomas Gleixner wrote:
> On Wed, Aug 25 2021 at 08:53, Chang S. Bae wrote:
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 74dde635df40..7c46747f6865 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -9899,11 +9899,16 @@ static void kvm_save_current_fpu(struct fpu *fpu)
>>  	 * KVM does not support dynamic user states yet. Assume the buffer
>>  	 * always has the minimum size.

I have to come back to this because that assumption is just broken.

create_vcpu()
   vcpu->user_fpu = alloc_default_fpu_size();
   vcpu->guest_fpu = alloc_default_fpu_size();

vcpu_task()
   get_amx_permission()
   use_amx()
     #NM
     alloc_larger_state()
   ...
   kvm_arch_vcpu_ioctl_run()
     kvm_arch_vcpu_ioctl_run()
       kvm_load_guest_fpu()
         kvm_save_current_fpu(vcpu->arch.user_fpu);
           save_fpregs_to_fpstate(fpu);         <- Out of bounds write

Adding a comment that KVM does not yet support dynamic user states does
not cut it, really.

Even if the above is unlikely, it is possible and has to be handled
correctly at the point where AMX support is enabled in the kernel
independent of guest support.

You have two options:

  1) Always allocate the large buffer size which is required to
     accomodate all possible features.

     Trivial, but waste of memory.

  2) Make the allocation dynamic which seems to be trivial to do in
     kvm_load_guest_fpu() at least for vcpu->user_fpu.

     The vcpu->guest_fpu handling can probably be postponed to the
     point where AMX is actually exposed to guests, but it's probably
     not the worst idea to think about the implications now.

Paolo, any opinions?

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v10 10/28] x86/fpu/xstate: Update the XSTATE save function to support dynamic states
  2021-10-02 21:31     ` Thomas Gleixner
@ 2021-10-02 22:54       ` Bae, Chang Seok
  2021-10-05  8:16         ` Paolo Bonzini
  2021-10-05  7:50       ` Paolo Bonzini
  1 sibling, 1 reply; 18+ messages in thread
From: Bae, Chang Seok @ 2021-10-02 22:54 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: bp, Lutomirski, Andy, mingo, x86, Brown, Len, lenb, Hansen, Dave,
	Macieira, Thiago, Liu, Jing2, Shankar, Ravi V, linux-kernel, kvm,
	Paolo Bonzini

On Oct 2, 2021, at 14:31, Thomas Gleixner <tglx@linutronix.de> wrote:
> On Fri, Oct 01 2021 at 17:41, Thomas Gleixner wrote:
>> On Wed, Aug 25 2021 at 08:53, Chang S. Bae wrote:
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index 74dde635df40..7c46747f6865 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -9899,11 +9899,16 @@ static void kvm_save_current_fpu(struct fpu *fpu)
>>> 	 * KVM does not support dynamic user states yet. Assume the buffer
>>> 	 * always has the minimum size.
> 
> I have to come back to this because that assumption is just broken.
> 
> create_vcpu()
>   vcpu->user_fpu = alloc_default_fpu_size();
>   vcpu->guest_fpu = alloc_default_fpu_size();
> 
> vcpu_task()
>   get_amx_permission()
>   use_amx()
>     #NM
>     alloc_larger_state()
>   ...
>   kvm_arch_vcpu_ioctl_run()
>     kvm_arch_vcpu_ioctl_run()
>       kvm_load_guest_fpu()
>         kvm_save_current_fpu(vcpu->arch.user_fpu);
>           save_fpregs_to_fpstate(fpu);         <- Out of bounds write
> 
> Adding a comment that KVM does not yet support dynamic user states does
> not cut it, really.
> 
> Even if the above is unlikely, it is possible and has to be handled
> correctly at the point where AMX support is enabled in the kernel
> independent of guest support.

I see. 

> You have two options:
> 
>  1) Always allocate the large buffer size which is required to
>     accomodate all possible features.
> 
>     Trivial, but waste of memory.
> 
>  2) Make the allocation dynamic which seems to be trivial to do in
>     kvm_load_guest_fpu() at least for vcpu->user_fpu.
> 
>     The vcpu->guest_fpu handling can probably be postponed to the
>     point where AMX is actually exposed to guests, but it's probably
>     not the worst idea to think about the implications now.

FWIW, the proposed KVM patch for AMX looks to take (1) here [1] and
Paolo said [2]:

    Most guests will not need the whole xstate feature set.  So perhaps you 
    could set XFD to the host value | the guest value, trap #NM if the 
    host XFD is zero, and possibly reflect the exception to the guest's XFD 
    and XFD_ERR.

    In addition, loading the guest XFD MSRs should use the MSR autoload 
    feature (add_atomic_switch_msr).

And then I guess discussion goes on..

Thanks,
Chang

[1] https://lore.kernel.org/kvm/20210207154256.52850-4-jing2.liu@linux.intel.com/
[2] https://lore.kernel.org/kvm/ae5b0195-b04f-8eef-9e0d-2a46c761d2d5@redhat.com/


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v10 02/28] x86/fpu/xstate: Modify the initialization helper to handle both static and dynamic buffers
  2021-10-01 12:45   ` Thomas Gleixner
@ 2021-10-03 22:35     ` Bae, Chang Seok
  0 siblings, 0 replies; 18+ messages in thread
From: Bae, Chang Seok @ 2021-10-03 22:35 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: bp, Lutomirski, Andy, mingo, x86, Brown, Len, lenb, Hansen, Dave,
	Macieira, Thiago, Liu, Jing2, Shankar, Ravi V, linux-kernel, kvm

On Oct 1, 2021, at 05:45, Thomas Gleixner <tglx@linutronix.de> wrote:
> On Wed, Aug 25 2021 at 08:53, Chang S. Bae wrote:
>> Have the function initializing the XSTATE buffer take a struct fpu *
>> pointer in preparation for dynamic state buffer support.
>> 
>> init_fpstate is a special case, which is indicated by a null pointer
>> parameter to fpstate_init().
>> 
>> Also, fpstate_init_xstate() now accepts the state component bitmap to
>> customize the compacted format.
> 
> That's not a changelog. Changelogs have to explain the WHY not the WHAT.
> 
> I can see the WHY when I look at the later changes, but that's not how
> it works.

The same feedback was raised before [1]. I thought this changelog has been
settled down with Boris [2].

How about:

    “To prepare dynamic features, change fpstate_init()’s argument to a struct
     fpu * pointer instead of a struct fpregs_state * pointer.  A struct fpu
     will have new fields to handle dynamic features."

With fpstate_init_xstate() changes in a separate patch and defining init_fpu,
the last two sentences shall be removed.

> Also the subject of this patch is just wrong. It does not make the
> functions handle dynamic buffers, it prepares them to add support for
> that later.

How about “Prepare fpstate_init() to handle dynamic features"

>> +static inline void fpstate_init_xstate(struct xregs_state *xsave, u64 mask)
>> +{
>> +	/*
>> +	 * XRSTORS requires these bits set in xcomp_bv, or it will
>> +	 * trigger #GP:
>> +	 */
>> +	xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | mask;
>> +}
> 
> This wants to be a separate cleanup patch which replaces the open coded
> variant here:

Okay, maybe the change becomes to be the new patch1.

>> diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
>> index fc1d529547e6..0fed7fbcf2e8 100644
>> --- a/arch/x86/kernel/fpu/xstate.c
>> +++ b/arch/x86/kernel/fpu/xstate.c
>> @@ -395,8 +395,7 @@ static void __init setup_init_fpu_buf(void)
>> 	print_xstate_features();
>> 
>> 	if (boot_cpu_has(X86_FEATURE_XSAVES))
>> -		init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
>> -						     xfeatures_mask_all;
>> +		fpstate_init_xstate(&init_fpstate.xsave, xfeatures_mask_all);

Thanks,
Chang

[1] https://lore.kernel.org/lkml/20201207171251.GB16640@zn.tnic/
[2] https://lore.kernel.org/lkml/20210115124038.GA11337@zn.tnic/



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v10 04/28] x86/fpu/xstate: Modify address finders to handle both static and dynamic buffers
  2021-10-01 13:15   ` Thomas Gleixner
@ 2021-10-03 22:35     ` Bae, Chang Seok
  2021-10-04 12:54       ` Thomas Gleixner
  0 siblings, 1 reply; 18+ messages in thread
From: Bae, Chang Seok @ 2021-10-03 22:35 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: bp, Lutomirski, Andy, mingo, x86, Brown, Len, lenb, Hansen, Dave,
	Macieira, Thiago, Liu, Jing2, Shankar, Ravi V, linux-kernel, kvm

On Oct 1, 2021, at 06:15, Thomas Gleixner <tglx@linutronix.de> wrote:
> On Wed, Aug 25 2021 at 08:53, Chang S. Bae wrote:
> 
>> Have all the functions finding XSTATE address take a struct fpu * pointer
>> in preparation for dynamic state buffer support.
>> 
>> init_fpstate is a special case, which is indicated by a null pointer
>> parameter to get_xsave_addr() and __raw_xsave_addr().
> 
> Same comment vs. subject. Prepare ...

How about:
    "Prepare address finders to handle dynamic features"

>> +	if (fpu)
>> +		xsave = &fpu->state.xsave;
>> +	else
>> +		xsave = &init_fpstate.xsave;
>> +
>> +	return xsave + xstate_comp_offsets[xfeature_nr];
> 
> So you have the same conditionals and the same comments vs. that NULL
> pointer oddity how many times now all over the place?
> 
> That can be completely avoided:
> 
> Patch 1:
> 
> -union fpregs_state init_fpstate __ro_after_init;
> +static union fpregs_state init_fpstate __ro_after_init;
> +struct fpu init_fpu = { .state = &init_fpstate } __ro_after_init;
> 
> and make all users of init_fpstate access it through init_fpu.
> 
> Patches 2..N which change arguments from fpregs_state to fpu:
> 
> -	fun(init_fpu->state);
> +	fun(&init_fpu);
> 
> Patch M which adds state_mask:
> 
> @fpu__init_system_xstate()
> +	init_fpu.state_mask = xfeatures_mask_all;
> 
> Hmm?

Okay, a NULL pointer is odd and it as an argument should be avoided. Defining
a separate struct fpu for the initial state can make every function expect a
valid struct fpu pointer.

I think that the patch set will have such order (once [1] is dropped out) of,
    - patch1 (new): a cleanup patch for fpstate_init_xstate() in patch1
    - patch2 (new): the above init_fpu goes into this, and 
    - patch3-5: changes arguments to fpu,
    - patch6 (new): finally patch 6 to add ->state_mask and ->state_size.
    …

Thanks,
Chang

[1] https://lore.kernel.org/lkml/20210825155413.19673-2-chang.seok.bae@intel.com/


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v10 06/28] x86/fpu/xstate: Add new variables to indicate dynamic XSTATE buffer size
  2021-10-01 13:32   ` Thomas Gleixner
@ 2021-10-03 22:36     ` Bae, Chang Seok
  0 siblings, 0 replies; 18+ messages in thread
From: Bae, Chang Seok @ 2021-10-03 22:36 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: bp, Lutomirski, Andy, mingo, x86, Brown, Len, lenb, Hansen, Dave,
	Macieira, Thiago, Liu, Jing2, Shankar, Ravi V, linux-kernel, kvm

On Oct 1, 2021, at 06:32, Thomas Gleixner <tglx@linutronix.de> wrote:
> On Wed, Aug 25 2021 at 08:53, Chang S. Bae wrote:
>> +/**
>> + * struct fpu_xstate_buffer_config - xstate buffer configuration
>> + * @max_size:			The CPUID-enumerated all-feature "maximum" size
>> + *				for xstate per-task buffer.
>> + * @min_size:			The size to fit into the statically-allocated
>> + *				buffer. With dynamic states, this buffer no longer
>> + *				contains all the enabled state components.
>> + * @user_size:			The size of user-space buffer for signal and
>> + *				ptrace frames, in the non-compacted format.
>> + */
> 
>> void fpstate_init(struct fpu *fpu)
>> {
>> 	union fpregs_state *state;
>> +	unsigned int size;
>> +	u64 mask;
>> 
>> -	if (likely(fpu))
>> +	if (likely(fpu)) {
>> 		state = &fpu->state;
>> -	else
>> +		/* The dynamic user states are not prepared yet. */
>> +		mask = xfeatures_mask_all & ~xfeatures_mask_user_dynamic;
> 
> The patch ordering is really odd. Why aren't you adding
> 
>     fpu->state_mask
> and
>     fpu->state_size
> 
> first and initialize state_mask and state_size to the fixed mode and
> then add the dynamic sizing on top?

I once considered the need of referencing fpu->state_size is not that much. At
runtime, the buffer re-allocation needs to calculate it. Then fpstate_init()
and the below are the ones that reference the size.

Maybe I was too conservative in adding this. I’m going to follow your
suggestion in a new patch.

>> 	/*
>> 	 * If the target FPU state is not resident in the CPU registers, just
>> 	 * memcpy() from current, else save CPU state directly to the target.
>> +	 *
>> +	 * KVM does not support dynamic user states yet. Assume the buffer
>> +	 * always has the minimum size.
>> 	 */
>> 	if (test_thread_flag(TIF_NEED_FPU_LOAD))
>> 		memcpy(&fpu->state, &current->thread.fpu.state,
>> -		       fpu_kernel_xstate_size);
>> +		       fpu_buf_cfg.min_size);
> 
> Which completely avoids the export of fpu_buf_cfg for KVM because the
> information is just available via struc fpu. As a bonus the export of
> fpu_kernel_xstate_size can be removed as well.
> 
> Hmm?

Yes, it is. But I suspect it needs to reference size values. KVM’s fpu data
can be allocated with the min size, instead of sizeof(struct fpu). KVM code
might want to know the max to support dynamic features, as similar to [1] or
so.

Thanks,
Chang

[1] https://lore.kernel.org/kvm/20210207154256.52850-4-jing2.liu@linux.intel.com/


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v10 04/28] x86/fpu/xstate: Modify address finders to handle both static and dynamic buffers
  2021-10-03 22:35     ` Bae, Chang Seok
@ 2021-10-04 12:54       ` Thomas Gleixner
  0 siblings, 0 replies; 18+ messages in thread
From: Thomas Gleixner @ 2021-10-04 12:54 UTC (permalink / raw)
  To: Bae, Chang Seok
  Cc: bp, Lutomirski, Andy, mingo, x86, Brown, Len, lenb, Hansen, Dave,
	Macieira, Thiago, Liu, Jing2, Shankar, Ravi V, linux-kernel, kvm

On Sun, Oct 03 2021 at 22:35, Chang Seok Bae wrote:
> On Oct 1, 2021, at 06:15, Thomas Gleixner <tglx@linutronix.de> wrote:
>
> Okay, a NULL pointer is odd and it as an argument should be avoided. Defining
> a separate struct fpu for the initial state can make every function expect a
> valid struct fpu pointer.
>
> I think that the patch set will have such order (once [1] is dropped out) of,
>     - patch1 (new): a cleanup patch for fpstate_init_xstate() in patch1
>     - patch2 (new): the above init_fpu goes into this, and 
>     - patch3-5: changes arguments to fpu,

So actually I sat down over the weekend and looked at that again. Adding this
to struct fpu is wrong. The size and features information belongs into
something like this:

struct fpstate {
	unsigned int		size;
        u64			xfeatures;

        union fpregs_state	regs;
};

Why? Simply because fpstate is the container for the dynamically sized
regs and that's where it semantically belongs.

While staring at that I just started to cleanup stuff all over the place
to make the integration of this simpler.

The patches are completely untested and have no changelogs yet, but if
you want a preview, I've uploaded a patch series to:

    https://tglx.de/~tglx/patches.tar

I'm still staring at some of the dynamic feature integrations, but this
is roughly where this should be heading.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v10 10/28] x86/fpu/xstate: Update the XSTATE save function to support dynamic states
  2021-10-02 21:31     ` Thomas Gleixner
  2021-10-02 22:54       ` Bae, Chang Seok
@ 2021-10-05  7:50       ` Paolo Bonzini
  2021-10-05  9:55         ` Thomas Gleixner
  1 sibling, 1 reply; 18+ messages in thread
From: Paolo Bonzini @ 2021-10-05  7:50 UTC (permalink / raw)
  To: Thomas Gleixner, Chang S. Bae, bp, luto, mingo, x86
  Cc: len.brown, lenb, dave.hansen, thiago.macieira, jing2.liu,
	ravi.v.shankar, linux-kernel, kvm

On 02/10/21 23:31, Thomas Gleixner wrote:
> You have two options:
> 
>    1) Always allocate the large buffer size which is required to
>       accomodate all possible features.
> 
>       Trivial, but waste of memory.
> 
>    2) Make the allocation dynamic which seems to be trivial to do in
>       kvm_load_guest_fpu() at least for vcpu->user_fpu.
> 
>       The vcpu->guest_fpu handling can probably be postponed to the
>       point where AMX is actually exposed to guests, but it's probably
>       not the worst idea to think about the implications now.
> 
> Paolo, any opinions?

Unless we're missing something, dynamic allocation should not be hard to 
do for both guest_fpu and user_fpu; either near the call sites of 
kvm_save_current_fpu, or in the function itself.  Basically adding 
something like

	struct kvm_fpu {
		struct fpu *state;
		unsigned size;
	} user_fpu, guest_fpu;

to struct kvm_vcpu.  Since the size can vary, it can be done simply with 
kzalloc instead of the x86_fpu_cache that KVM has now.

The only small complication is that kvm_save_current_fpu is called 
within fpregs_lock; the allocation has to be outside so that you can use 
GFP_KERNEL even on RT kernels.   If the code looks better with 
fpregs_lock moved within kvm_save_current_fpu, go ahead and do it like that.

Paolo


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v10 10/28] x86/fpu/xstate: Update the XSTATE save function to support dynamic states
  2021-10-02 22:54       ` Bae, Chang Seok
@ 2021-10-05  8:16         ` Paolo Bonzini
  0 siblings, 0 replies; 18+ messages in thread
From: Paolo Bonzini @ 2021-10-05  8:16 UTC (permalink / raw)
  To: Bae, Chang Seok, Thomas Gleixner
  Cc: bp, Lutomirski, Andy, mingo, x86, Brown, Len, lenb, Hansen, Dave,
	Macieira, Thiago, Liu, Jing2, Shankar, Ravi V, linux-kernel, kvm

On 03/10/21 00:54, Bae, Chang Seok wrote:
> FWIW, the proposed KVM patch for AMX looks to take (1) here [1] and
> Paolo said [2]:
> 
>      Most guests will not need the whole xstate feature set.  So perhaps you
>      could set XFD to the host value | the guest value, trap #NM if the
>      host XFD is zero, and possibly reflect the exception to the guest's XFD
                    ^^^^

(better: if the host XFD is nonzero, and the guest XCR0 includes any bit 
whose state is optional).

>      and XFD_ERR.

This comment is about letting arch/x86/kernel resize current->thread.fpu 
while the guest runs.  It's not necessary before KVM supports AMX, 
because KVM will never let a guest set XCR0[18] (__kvm_set_xcr).

Thomas instead was talking about allocation of vcpu->arch.guest_fpu and 
vcpu->arch.user_fpu.

For dynamic allocation in kvm_save_current_fpu, you can retrieve the 
XINUSE bitmask (XGETBV with RCX=1).  If it contains any bits that have 
optional state, you check if KVM's vcpu->arch.guest_fpu or 
vcpu->arch.user_fpu are already big enough, and if not do the reallocation.

If X86_FEATURE_XGETBV1 is not available, you will not need to resize. 
If XFD is supported but X86_FEATURE_XGETBV1 is not, you can just make 
kvm_arch_init fail with -ENODEV.  It's a nonsensical combination.

Thanks,

Paolo

>      In addition, loading the guest XFD MSRs should use the MSR autoload
>      feature (add_atomic_switch_msr).
> 
> And then I guess discussion goes on..


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v10 10/28] x86/fpu/xstate: Update the XSTATE save function to support dynamic states
  2021-10-05  7:50       ` Paolo Bonzini
@ 2021-10-05  9:55         ` Thomas Gleixner
  0 siblings, 0 replies; 18+ messages in thread
From: Thomas Gleixner @ 2021-10-05  9:55 UTC (permalink / raw)
  To: Paolo Bonzini, Chang S. Bae, bp, luto, mingo, x86
  Cc: len.brown, lenb, dave.hansen, thiago.macieira, jing2.liu,
	ravi.v.shankar, linux-kernel, kvm

On Tue, Oct 05 2021 at 09:50, Paolo Bonzini wrote:
> On 02/10/21 23:31, Thomas Gleixner wrote:
>> You have two options:
>> 
>>    1) Always allocate the large buffer size which is required to
>>       accomodate all possible features.
>> 
>>       Trivial, but waste of memory.
>> 
>>    2) Make the allocation dynamic which seems to be trivial to do in
>>       kvm_load_guest_fpu() at least for vcpu->user_fpu.
>> 
>>       The vcpu->guest_fpu handling can probably be postponed to the
>>       point where AMX is actually exposed to guests, but it's probably
>>       not the worst idea to think about the implications now.
>> 
>> Paolo, any opinions?
>
> Unless we're missing something, dynamic allocation should not be hard to 
> do for both guest_fpu and user_fpu; either near the call sites of 
> kvm_save_current_fpu, or in the function itself.  Basically adding 
> something like
>
> 	struct kvm_fpu {
> 		struct fpu *state;
> 		unsigned size;
> 	} user_fpu, guest_fpu;
>
> to struct kvm_vcpu.  Since the size can vary, it can be done simply with 
> kzalloc instead of the x86_fpu_cache that KVM has now.
>
> The only small complication is that kvm_save_current_fpu is called 
> within fpregs_lock; the allocation has to be outside so that you can use 
> GFP_KERNEL even on RT kernels.   If the code looks better with 
> fpregs_lock moved within kvm_save_current_fpu, go ahead and do it like that.

I'm reworking quite some of this already and with the new bits you don't
have to do anything in kvm_fpu because the size and allowed feature bits
are going to be part of fpu->fpstate.

Stay tuned.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2021-10-05  9:56 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20210825155413.19673-1-chang.seok.bae@intel.com>
2021-08-25 15:53 ` [PATCH v10 02/28] x86/fpu/xstate: Modify the initialization helper to handle both static and dynamic buffers Chang S. Bae
2021-10-01 12:45   ` Thomas Gleixner
2021-10-03 22:35     ` Bae, Chang Seok
2021-08-25 15:53 ` [PATCH v10 04/28] x86/fpu/xstate: Modify address finders " Chang S. Bae
2021-10-01 13:15   ` Thomas Gleixner
2021-10-03 22:35     ` Bae, Chang Seok
2021-10-04 12:54       ` Thomas Gleixner
2021-08-25 15:53 ` [PATCH v10 06/28] x86/fpu/xstate: Add new variables to indicate dynamic XSTATE buffer size Chang S. Bae
2021-10-01 13:32   ` Thomas Gleixner
2021-10-03 22:36     ` Bae, Chang Seok
2021-08-25 15:53 ` [PATCH v10 08/28] x86/fpu/xstate: Convert the struct fpu 'state' field to a pointer Chang S. Bae
2021-08-25 15:53 ` [PATCH v10 10/28] x86/fpu/xstate: Update the XSTATE save function to support dynamic states Chang S. Bae
2021-10-01 15:41   ` Thomas Gleixner
2021-10-02 21:31     ` Thomas Gleixner
2021-10-02 22:54       ` Bae, Chang Seok
2021-10-05  8:16         ` Paolo Bonzini
2021-10-05  7:50       ` Paolo Bonzini
2021-10-05  9:55         ` Thomas Gleixner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).